ECCE @ EIC Software
Reference for
ECCE @ EIC
simulation and reconstruction software on GitHub
Home page
Related Pages
Modules
Namespaces
Classes
Files
External Links
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
half.h
Go to the documentation of this file.
Or view
the newest version in sPHENIX GitHub for file half.h
1
2
//
3
// Copyright (c) 2002, Industrial Light & Magic, a division of Lucas
4
// Digital Ltd. LLC
5
//
6
// All rights reserved.
7
//
8
// Redistribution and use in source and binary forms, with or without
9
// modification, are permitted provided that the following conditions are
10
// met:
11
// * Redistributions of source code must retain the above copyright
12
// notice, this list of conditions and the following disclaimer.
13
// * Redistributions in binary form must reproduce the above
14
// copyright notice, this list of conditions and the following disclaimer
15
// in the documentation and/or other materials provided with the
16
// distribution.
17
// * Neither the name of Industrial Light & Magic nor the names of
18
// its contributors may be used to endorse or promote products derived
19
// from this software without specific prior written permission.
20
//
21
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
//
34
35
// Primary authors:
36
// Florian Kainz <kainz@ilm.com>
37
// Rod Bogart <rgb@ilm.com>
38
39
//---------------------------------------------------------------------------
40
//
41
// half -- a 16-bit floating point number class:
42
//
43
// Type half can represent positive and negative numbers whose
44
// magnitude is between roughly 6.1e-5 and 6.5e+4 with a relative
45
// error of 9.8e-4; numbers smaller than 6.1e-5 can be represented
46
// with an absolute error of 6.0e-8. All integers from -2048 to
47
// +2048 can be represented exactly.
48
//
49
// Type half behaves (almost) like the built-in C++ floating point
50
// types. In arithmetic expressions, half, float and double can be
51
// mixed freely. Here are a few examples:
52
//
53
// half a (3.5);
54
// float b (a + sqrt (a));
55
// a += b;
56
// b += a;
57
// b = a + 7;
58
//
59
// Conversions from half to float are lossless; all half numbers
60
// are exactly representable as floats.
61
//
62
// Conversions from float to half may not preserve a float's value
63
// exactly. If a float is not representable as a half, then the
64
// float value is rounded to the nearest representable half. If a
65
// float value is exactly in the middle between the two closest
66
// representable half values, then the float value is rounded to
67
// the closest half whose least significant bit is zero.
68
//
69
// Overflows during float-to-half conversions cause arithmetic
70
// exceptions. An overflow occurs when the float value to be
71
// converted is too large to be represented as a half, or if the
72
// float value is an infinity or a NAN.
73
//
74
// The implementation of type half makes the following assumptions
75
// about the implementation of the built-in C++ types:
76
//
77
// float is an IEEE 754 single-precision number
78
// sizeof (float) == 4
79
// sizeof (unsigned int) == sizeof (float)
80
// alignof (unsigned int) == alignof (float)
81
// sizeof (unsigned short) == 2
82
//
83
//---------------------------------------------------------------------------
84
85
#ifndef _HALF_H_
86
#define _HALF_H_
87
88
#include <iostream>
89
90
#if defined(OPENEXR_DLL)
91
#if defined(HALF_EXPORTS)
92
#define HALF_EXPORT __declspec(dllexport)
93
#else
94
#define HALF_EXPORT __declspec(dllimport)
95
#endif
96
#define HALF_EXPORT_CONST
97
#else
98
#define HALF_EXPORT
99
#define HALF_EXPORT_CONST const
100
#endif
101
102
class
HALF_EXPORT
half
103
{
104
public
:
105
106
//-------------
107
// Constructors
108
//-------------
109
110
half
();
// no initialization
111
half
(
float
f
);
112
113
114
//--------------------
115
// Conversion to float
116
//--------------------
117
118
operator
float ()
const
;
119
120
121
//------------
122
// Unary minus
123
//------------
124
125
half
operator -
()
const
;
126
127
128
//-----------
129
// Assignment
130
//-----------
131
132
half
& operator = (
half
h
);
133
half
& operator = (
float
f);
134
135
half
& operator += (
half
h);
136
half
& operator += (
float
f);
137
138
half
& operator -= (
half
h);
139
half
& operator -= (
float
f);
140
141
half
& operator *= (
half
h);
142
half
& operator *= (
float
f);
143
144
half
& operator /= (
half
h);
145
half
& operator /= (
float
f);
146
147
148
//---------------------------------------------------------
149
// Round to n-bit precision (n should be between 0 and 10).
150
// After rounding, the significand's 10-n least significant
151
// bits will be zero.
152
//---------------------------------------------------------
153
154
half
round
(
unsigned
int
n
)
const
;
155
156
157
//--------------------------------------------------------------------
158
// Classification:
159
//
160
// h.isFinite() returns true if h is a normalized number,
161
// a denormalized number or zero
162
//
163
// h.isNormalized() returns true if h is a normalized number
164
//
165
// h.isDenormalized() returns true if h is a denormalized number
166
//
167
// h.isZero() returns true if h is zero
168
//
169
// h.isNan() returns true if h is a NAN
170
//
171
// h.isInfinity() returns true if h is a positive
172
// or a negative infinity
173
//
174
// h.isNegative() returns true if the sign bit of h
175
// is set (negative)
176
//--------------------------------------------------------------------
177
178
bool
isFinite ()
const
;
179
bool
isNormalized ()
const
;
180
bool
isDenormalized ()
const
;
181
bool
isZero ()
const
;
182
bool
isNan ()
const
;
183
bool
isInfinity ()
const
;
184
bool
isNegative ()
const
;
185
186
187
//--------------------------------------------
188
// Special values
189
//
190
// posInf() returns +infinity
191
//
192
// negInf() returns -infinity
193
//
194
// qNan() returns a NAN with the bit
195
// pattern 0111111111111111
196
//
197
// sNan() returns a NAN with the bit
198
// pattern 0111110111111111
199
//--------------------------------------------
200
201
static
half
posInf ();
202
static
half
negInf ();
203
static
half
qNan ();
204
static
half
sNan ();
205
206
207
//--------------------------------------
208
// Access to the internal representation
209
//--------------------------------------
210
211
unsigned
short
bits ()
const
;
212
void
setBits (
unsigned
short
bits);
213
214
215
public
:
216
217
union
uif
218
{
219
unsigned
int
i
;
220
float
f
;
221
};
222
223
private
:
224
225
static
short
convert (
int
i);
226
static
float
overflow ();
227
228
unsigned
short
_h
;
229
230
static
HALF_EXPORT_CONST
uif
_toFloat[1 << 16];
231
static
HALF_EXPORT_CONST
unsigned
short
_eLut[1 << 9];
232
};
233
234
//-----------
235
// Stream I/O
236
//-----------
237
238
HALF_EXPORT
std::ostream &
operator <<
(std::ostream &os,
half
h
);
239
HALF_EXPORT
std::istream &
operator >>
(std::istream &is,
half
&
h
);
240
241
242
//----------
243
// Debugging
244
//----------
245
246
HALF_EXPORT
void
printBits
(std::ostream &os,
half
h
);
247
HALF_EXPORT
void
printBits
(std::ostream &os,
float
f
);
248
HALF_EXPORT
void
printBits
(
char
c
[19],
half
h
);
249
HALF_EXPORT
void
printBits
(
char
c
[35],
float
f
);
250
251
252
//-------------------------------------------------------------------------
253
// Limits
254
//
255
// Visual C++ will complain if HALF_MIN, HALF_NRM_MIN etc. are not float
256
// constants, but at least one other compiler (gcc 2.96) produces incorrect
257
// results if they are.
258
//-------------------------------------------------------------------------
259
260
#if (defined _WIN32 || defined _WIN64) && defined _MSC_VER
261
262
#define HALF_MIN 5.96046448e-08f // Smallest positive half
263
264
#define HALF_NRM_MIN 6.10351562e-05f // Smallest positive normalized half
265
266
#define HALF_MAX 65504.0f // Largest positive half
267
268
#define HALF_EPSILON 0.00097656f // Smallest positive e for which
269
// half (1.0 + e) != half (1.0)
270
#else
271
272
#define HALF_MIN 5.96046448e-08 // Smallest positive half
273
274
#define HALF_NRM_MIN 6.10351562e-05 // Smallest positive normalized half
275
276
#define HALF_MAX 65504.0 // Largest positive half
277
278
#define HALF_EPSILON 0.00097656 // Smallest positive e for which
279
// half (1.0 + e) != half (1.0)
280
#endif
281
282
283
#define HALF_MANT_DIG 11 // Number of digits in mantissa
284
// (significand + hidden leading 1)
285
286
#define HALF_DIG 2 // Number of base 10 digits that
287
// can be represented without change
288
289
#define HALF_RADIX 2 // Base of the exponent
290
291
#define HALF_MIN_EXP -13 // Minimum negative integer such that
292
// HALF_RADIX raised to the power of
293
// one less than that integer is a
294
// normalized half
295
296
#define HALF_MAX_EXP 16 // Maximum positive integer such that
297
// HALF_RADIX raised to the power of
298
// one less than that integer is a
299
// normalized half
300
301
#define HALF_MIN_10_EXP -4 // Minimum positive integer such
302
// that 10 raised to that power is
303
// a normalized half
304
305
#define HALF_MAX_10_EXP 4 // Maximum positive integer such
306
// that 10 raised to that power is
307
// a normalized half
308
309
310
//---------------------------------------------------------------------------
311
//
312
// Implementation --
313
//
314
// Representation of a float:
315
//
316
// We assume that a float, f, is an IEEE 754 single-precision
317
// floating point number, whose bits are arranged as follows:
318
//
319
// 31 (msb)
320
// |
321
// | 30 23
322
// | | |
323
// | | | 22 0 (lsb)
324
// | | | | |
325
// X XXXXXXXX XXXXXXXXXXXXXXXXXXXXXXX
326
//
327
// s e m
328
//
329
// S is the sign-bit, e is the exponent and m is the significand.
330
//
331
// If e is between 1 and 254, f is a normalized number:
332
//
333
// s e-127
334
// f = (-1) * 2 * 1.m
335
//
336
// If e is 0, and m is not zero, f is a denormalized number:
337
//
338
// s -126
339
// f = (-1) * 2 * 0.m
340
//
341
// If e and m are both zero, f is zero:
342
//
343
// f = 0.0
344
//
345
// If e is 255, f is an "infinity" or "not a number" (NAN),
346
// depending on whether m is zero or not.
347
//
348
// Examples:
349
//
350
// 0 00000000 00000000000000000000000 = 0.0
351
// 0 01111110 00000000000000000000000 = 0.5
352
// 0 01111111 00000000000000000000000 = 1.0
353
// 0 10000000 00000000000000000000000 = 2.0
354
// 0 10000000 10000000000000000000000 = 3.0
355
// 1 10000101 11110000010000000000000 = -124.0625
356
// 0 11111111 00000000000000000000000 = +infinity
357
// 1 11111111 00000000000000000000000 = -infinity
358
// 0 11111111 10000000000000000000000 = NAN
359
// 1 11111111 11111111111111111111111 = NAN
360
//
361
// Representation of a half:
362
//
363
// Here is the bit-layout for a half number, h:
364
//
365
// 15 (msb)
366
// |
367
// | 14 10
368
// | | |
369
// | | | 9 0 (lsb)
370
// | | | | |
371
// X XXXXX XXXXXXXXXX
372
//
373
// s e m
374
//
375
// S is the sign-bit, e is the exponent and m is the significand.
376
//
377
// If e is between 1 and 30, h is a normalized number:
378
//
379
// s e-15
380
// h = (-1) * 2 * 1.m
381
//
382
// If e is 0, and m is not zero, h is a denormalized number:
383
//
384
// S -14
385
// h = (-1) * 2 * 0.m
386
//
387
// If e and m are both zero, h is zero:
388
//
389
// h = 0.0
390
//
391
// If e is 31, h is an "infinity" or "not a number" (NAN),
392
// depending on whether m is zero or not.
393
//
394
// Examples:
395
//
396
// 0 00000 0000000000 = 0.0
397
// 0 01110 0000000000 = 0.5
398
// 0 01111 0000000000 = 1.0
399
// 0 10000 0000000000 = 2.0
400
// 0 10000 1000000000 = 3.0
401
// 1 10101 1111000001 = -124.0625
402
// 0 11111 0000000000 = +infinity
403
// 1 11111 0000000000 = -infinity
404
// 0 11111 1000000000 = NAN
405
// 1 11111 1111111111 = NAN
406
//
407
// Conversion:
408
//
409
// Converting from a float to a half requires some non-trivial bit
410
// manipulations. In some cases, this makes conversion relatively
411
// slow, but the most common case is accelerated via table lookups.
412
//
413
// Converting back from a half to a float is easier because we don't
414
// have to do any rounding. In addition, there are only 65536
415
// different half numbers; we can convert each of those numbers once
416
// and store the results in a table. Later, all conversions can be
417
// done using only simple table lookups.
418
//
419
//---------------------------------------------------------------------------
420
421
422
//--------------------
423
// Simple constructors
424
//--------------------
425
426
inline
427
half::half
()
428
{
429
// no initialization
430
}
431
432
433
//----------------------------
434
// Half-from-float constructor
435
//----------------------------
436
437
inline
438
half::half
(
float
f
)
439
{
440
uif
x
;
441
442
x.
f
=
f
;
443
444
if
(f == 0)
445
{
446
//
447
// Common special case - zero.
448
// Preserve the zero's sign bit.
449
//
450
451
_h
= (x.
i
>> 16);
452
}
453
else
454
{
455
//
456
// We extract the combined sign and exponent, e, from our
457
// floating-point number, f. Then we convert e to the sign
458
// and exponent of the half number via a table lookup.
459
//
460
// For the most common case, where a normalized half is produced,
461
// the table lookup returns a non-zero value; in this case, all
462
// we have to do is round f's significand to 10 bits and combine
463
// the result with e.
464
//
465
// For all other cases (overflow, zeroes, denormalized numbers
466
// resulting from underflow, infinities and NANs), the table
467
// lookup returns zero, and we call a longer, non-inline function
468
// to do the float-to-half conversion.
469
//
470
471
int
e
= (x.
i
>> 23) & 0x000001ff;
472
473
e =
_eLut
[
e
];
474
475
if
(e)
476
{
477
//
478
// Simple case - round the significand, m, to 10
479
// bits and combine it with the sign and exponent.
480
//
481
482
int
m
= x.
i
& 0x007fffff;
483
_h
= e + ((m + 0x00000fff + ((m >> 13) & 1)) >> 13);
484
}
485
else
486
{
487
//
488
// Difficult case - call a function.
489
//
490
491
_h
=
convert
(x.
i
);
492
}
493
}
494
}
495
496
497
//------------------------------------------
498
// Half-to-float conversion via table lookup
499
//------------------------------------------
500
501
inline
502
half::operator float ()
const
503
{
504
return
_toFloat[
_h
].f;
505
}
506
507
508
//-------------------------
509
// Round to n-bit precision
510
//-------------------------
511
512
inline
half
513
half::round
(
unsigned
int
n
)
const
514
{
515
//
516
// Parameter check.
517
//
518
519
if
(n >= 10)
520
return
*
this
;
521
522
//
523
// Disassemble h into the sign, s,
524
// and the combined exponent and significand, e.
525
//
526
527
unsigned
short
s
=
_h
& 0x8000;
528
unsigned
short
e
=
_h
& 0x7fff;
529
530
//
531
// Round the exponent and significand to the nearest value
532
// where ones occur only in the (10-n) most significant bits.
533
// Note that the exponent adjusts automatically if rounding
534
// up causes the significand to overflow.
535
//
536
537
e >>= 9 -
n
;
538
e += e & 1;
539
e <<= 9 -
n
;
540
541
//
542
// Check for exponent overflow.
543
//
544
545
if
(e >= 0x7c00)
546
{
547
//
548
// Overflow occurred -- truncate instead of rounding.
549
//
550
551
e =
_h
;
552
e >>= 10 -
n
;
553
e <<= 10 -
n
;
554
}
555
556
//
557
// Put the original sign bit back.
558
//
559
560
half
h
;
561
h.
_h
= s |
e
;
562
563
return
h
;
564
}
565
566
567
//-----------------------
568
// Other inline functions
569
//-----------------------
570
571
inline
half
572
half::operator -
()
const
573
{
574
half
h
;
575
h.
_h
=
_h
^ 0x8000;
576
return
h
;
577
}
578
579
580
inline
half
&
581
half::operator =
(
half
h
)
582
{
583
_h
= h.
_h
;
584
return
*
this
;
585
}
586
587
588
inline
half
&
589
half::operator =
(
float
f
)
590
{
591
*
this
=
half
(f);
592
return
*
this
;
593
}
594
595
596
inline
half
&
597
half::operator +=
(
half
h
)
598
{
599
*
this
=
half
(
float
(*
this
) +
float
(h));
600
return
*
this
;
601
}
602
603
604
inline
half
&
605
half::operator +=
(
float
f
)
606
{
607
*
this
=
half
(
float
(*
this
) + f);
608
return
*
this
;
609
}
610
611
612
inline
half
&
613
half::operator -=
(
half
h
)
614
{
615
*
this
=
half
(
float
(*
this
) -
float
(h));
616
return
*
this
;
617
}
618
619
620
inline
half
&
621
half::operator -=
(
float
f
)
622
{
623
*
this
=
half
(
float
(*
this
) - f);
624
return
*
this
;
625
}
626
627
628
inline
half
&
629
half::operator *=
(
half
h
)
630
{
631
*
this
=
half
(
float
(*
this
) *
float
(h));
632
return
*
this
;
633
}
634
635
636
inline
half
&
637
half::operator *=
(
float
f
)
638
{
639
*
this
=
half
(
float
(*
this
) * f);
640
return
*
this
;
641
}
642
643
644
inline
half
&
645
half::operator /=
(
half
h
)
646
{
647
*
this
=
half
(
float
(*
this
) /
float
(h));
648
return
*
this
;
649
}
650
651
652
inline
half
&
653
half::operator /=
(
float
f
)
654
{
655
*
this
=
half
(
float
(*
this
) / f);
656
return
*
this
;
657
}
658
659
660
inline
bool
661
half::isFinite
()
const
662
{
663
unsigned
short
e
= (
_h
>> 10) & 0x001f;
664
return
e < 31;
665
}
666
667
668
inline
bool
669
half::isNormalized
()
const
670
{
671
unsigned
short
e
= (
_h
>> 10) & 0x001f;
672
return
e > 0 && e < 31;
673
}
674
675
676
inline
bool
677
half::isDenormalized
()
const
678
{
679
unsigned
short
e
= (
_h
>> 10) & 0x001f;
680
unsigned
short
m
=
_h
& 0x3ff;
681
return
e == 0 && m != 0;
682
}
683
684
685
inline
bool
686
half::isZero
()
const
687
{
688
return
(
_h
& 0x7fff) == 0;
689
}
690
691
692
inline
bool
693
half::isNan
()
const
694
{
695
unsigned
short
e
= (
_h
>> 10) & 0x001f;
696
unsigned
short
m
=
_h
& 0x3ff;
697
return
e == 31 && m != 0;
698
}
699
700
701
inline
bool
702
half::isInfinity
()
const
703
{
704
unsigned
short
e
= (
_h
>> 10) & 0x001f;
705
unsigned
short
m
=
_h
& 0x3ff;
706
return
e == 31 && m == 0;
707
}
708
709
710
inline
bool
711
half::isNegative
()
const
712
{
713
return
(
_h
& 0x8000) != 0;
714
}
715
716
717
inline
half
718
half::posInf
()
719
{
720
half
h
;
721
h.
_h
= 0x7c00;
722
return
h
;
723
}
724
725
726
inline
half
727
half::negInf
()
728
{
729
half
h
;
730
h.
_h
= 0xfc00;
731
return
h
;
732
}
733
734
735
inline
half
736
half::qNan
()
737
{
738
half
h
;
739
h.
_h
= 0x7fff;
740
return
h
;
741
}
742
743
744
inline
half
745
half::sNan
()
746
{
747
half
h
;
748
h.
_h
= 0x7dff;
749
return
h
;
750
}
751
752
753
inline
unsigned
short
754
half::bits
()
const
755
{
756
return
_h
;
757
}
758
759
760
inline
void
761
half::setBits
(
unsigned
short
bits)
762
{
763
_h
=
bits
;
764
}
765
766
#endif
coresoftware
blob
master
offline
packages
Half
half.h
Built by
Jin Huang
. updated:
Wed Jun 29 2022 17:24:34
using
1.8.2 with
ECCE GitHub integration