ECCE @ EIC Software
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
half.cpp
Go to the documentation of this file. Or view the newest version in sPHENIX GitHub for file half.cpp
1 
2 //
3 // Copyright (c) 2002, Industrial Light & Magic, a division of Lucas
4 // Digital Ltd. LLC
5 //
6 // All rights reserved.
7 //
8 // Redistribution and use in source and binary forms, with or without
9 // modification, are permitted provided that the following conditions are
10 // met:
11 // * Redistributions of source code must retain the above copyright
12 // notice, this list of conditions and the following disclaimer.
13 // * Redistributions in binary form must reproduce the above
14 // copyright notice, this list of conditions and the following disclaimer
15 // in the documentation and/or other materials provided with the
16 // distribution.
17 // * Neither the name of Industrial Light & Magic nor the names of
18 // its contributors may be used to endorse or promote products derived
19 // from this software without specific prior written permission.
20 //
21 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 //
34 
35 // Primary authors:
36 // Florian Kainz <kainz@ilm.com>
37 // Rod Bogart <rgb@ilm.com>
38 
39 
40 //---------------------------------------------------------------------------
41 //
42 // class half --
43 // implementation of non-inline members
44 //
45 //---------------------------------------------------------------------------
46 
47 #include <assert.h>
48 #include "half.h"
49 
50 using namespace std;
51 
52 //-------------------------------------------------------------
53 // Lookup tables for half-to-float and float-to-half conversion
54 //-------------------------------------------------------------
55 
57 #include "toFloat.h"
58 HALF_EXPORT_CONST unsigned short half::_eLut[1 << 9] =
59 #include "eLut.h"
60 
61 
62 //-----------------------------------------------
63 // Overflow handler for float-to-half conversion;
64 // generates a hardware floating-point overflow,
65 // which may be trapped by the operating system.
66 //-----------------------------------------------
67 
68 float
70 {
71  volatile float f = 1e10;
72 
73  for (int i = 0; i < 10; i++)
74  f *= f; // this will overflow before
75  // the for­loop terminates
76  return f;
77 }
78 
79 
80 //-----------------------------------------------------
81 // Float-to-half conversion -- general case, including
82 // zeroes, denormalized numbers and exponent overflows.
83 //-----------------------------------------------------
84 
85 short
86 half::convert (int i)
87 {
88  //
89  // Our floating point number, f, is represented by the bit
90  // pattern in integer i. Disassemble that bit pattern into
91  // the sign, s, the exponent, e, and the significand, m.
92  // Shift s into the position where it will go in in the
93  // resulting half number.
94  // Adjust e, accounting for the different exponent bias
95  // of float and half (127 versus 15).
96  //
97 
98  int s = (i >> 16) & 0x00008000;
99  int e = ((i >> 23) & 0x000000ff) - (127 - 15);
100  int m = i & 0x007fffff;
101 
102  //
103  // Now reassemble s, e and m into a half:
104  //
105 
106  if (e <= 0)
107  {
108  if (e < -10)
109  {
110  //
111  // E is less than -10. The absolute value of f is
112  // less than HALF_MIN (f may be a small normalized
113  // float, a denormalized float or a zero).
114  //
115  // We convert f to a half zero with the same sign as f.
116  //
117 
118  return s;
119  }
120 
121  //
122  // E is between -10 and 0. F is a normalized float
123  // whose magnitude is less than HALF_NRM_MIN.
124  //
125  // We convert f to a denormalized half.
126  //
127 
128  //
129  // Add an explicit leading 1 to the significand.
130  //
131 
132  m = m | 0x00800000;
133 
134  //
135  // Round to m to the nearest (10+e)-bit value (with e between
136  // -10 and 0); in case of a tie, round to the nearest even value.
137  //
138  // Rounding may cause the significand to overflow and make
139  // our number normalized. Because of the way a half's bits
140  // are laid out, we don't have to treat this case separately;
141  // the code below will handle it correctly.
142  //
143 
144  int t = 14 - e;
145  int a = (1 << (t - 1)) - 1;
146  int b = (m >> t) & 1;
147 
148  m = (m + a + b) >> t;
149 
150  //
151  // Assemble the half from s, e (zero) and m.
152  //
153 
154  return s | m;
155  }
156  else if (e == 0xff - (127 - 15))
157  {
158  if (m == 0)
159  {
160  //
161  // F is an infinity; convert f to a half
162  // infinity with the same sign as f.
163  //
164 
165  return s | 0x7c00;
166  }
167  else
168  {
169  //
170  // F is a NAN; we produce a half NAN that preserves
171  // the sign bit and the 10 leftmost bits of the
172  // significand of f, with one exception: If the 10
173  // leftmost bits are all zero, the NAN would turn
174  // into an infinity, so we have to set at least one
175  // bit in the significand.
176  //
177 
178  m >>= 13;
179  return s | 0x7c00 | m | (m == 0);
180  }
181  }
182  else
183  {
184  //
185  // E is greater than zero. F is a normalized float.
186  // We try to convert f to a normalized half.
187  //
188 
189  //
190  // Round to m to the nearest 10-bit value. In case of
191  // a tie, round to the nearest even value.
192  //
193 
194  m = m + 0x00000fff + ((m >> 13) & 1);
195 
196  if (m & 0x00800000)
197  {
198  m = 0; // overflow in significand,
199  e += 1; // adjust exponent
200  }
201 
202  //
203  // Handle exponent overflow
204  //
205 
206  if (e > 30)
207  {
208  overflow (); // Cause a hardware floating point overflow;
209  return s | 0x7c00; // if this returns, the half becomes an
210  } // infinity with the same sign as f.
211 
212  //
213  // Assemble the half from s, e and m.
214  //
215 
216  return s | (e << 10) | (m >> 13);
217  }
218 }
219 
220 
221 //---------------------
222 // Stream I/O operators
223 //---------------------
224 
225 ostream &
226 operator << (ostream &os, half h)
227 {
228  os << float (h);
229  return os;
230 }
231 
232 
233 istream &
234 operator >> (istream &is, half &h)
235 {
236  float f;
237  is >> f;
238  h = half (f);
239  return is;
240 }
241 
242 
243 //---------------------------------------
244 // Functions to print the bit-layout of
245 // floats and halfs, mostly for debugging
246 //---------------------------------------
247 
248 void
249 printBits (ostream &os, half h)
250 {
251  unsigned short b = h.bits();
252 
253  for (int i = 15; i >= 0; i--)
254  {
255  os << (((b >> i) & 1)? '1': '0');
256 
257  if (i == 15 || i == 10)
258  os << ' ';
259  }
260 }
261 
262 
263 void
264 printBits (ostream &os, float f)
265 {
266  half::uif x;
267  x.f = f;
268 
269  for (int i = 31; i >= 0; i--)
270  {
271  os << (((x.i >> i) & 1)? '1': '0');
272 
273  if (i == 31 || i == 23)
274  os << ' ';
275  }
276 }
277 
278 
279 void
280 printBits (char c[19], half h)
281 {
282  unsigned short b = h.bits();
283 
284  for (int i = 15, j = 0; i >= 0; i--, j++)
285  {
286  c[j] = (((b >> i) & 1)? '1': '0');
287 
288  if (i == 15 || i == 10)
289  c[++j] = ' ';
290  }
291 
292  c[18] = 0;
293 }
294 
295 
296 void
297 printBits (char c[35], float f)
298 {
299  half::uif x;
300  x.f = f;
301 
302  for (int i = 31, j = 0; i >= 0; i--, j++)
303  {
304  c[j] = (((x.i >> i) & 1)? '1': '0');
305 
306  if (i == 31 || i == 23)
307  c[++j] = ' ';
308  }
309 
310  c[34] = 0;
311 }