mulhf3.c
5.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
//===-- lib/mulhf3.c - Half-precision multiplication ------------*- C -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements single-precision soft-float multiplication
// with the IEEE-754 default rounding (to nearest, ties to even).
//
//===----------------------------------------------------------------------===//
#define HALF_PRECISION
#include "fp_mul_impl.inc"
COMPILER_RT_ABI fp_t __mulhf3(fp_t a, fp_t b) {
// return __mulXf3__(a, b);
const unsigned int aExponent = toRep(a) >> significandBits & maxExponent;
const unsigned int bExponent = toRep(b) >> significandBits & maxExponent;
const rep_t productSign = (toRep(a) ^ toRep(b)) & signBit;
rep_t aSignificand = toRep(a) & significandMask;
rep_t bSignificand = toRep(b) & significandMask;
int scale = 0;
// Detect if a or b is zero, denormal, infinity, or NaN.
if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) {
const rep_t aAbs = toRep(a) & absMask;
const rep_t bAbs = toRep(b) & absMask;
// NaN * anything = qNaN
if (aAbs > infRep) return fromRep(toRep(a) | quietBit);
// anything * NaN = qNaN
if (bAbs > infRep) return fromRep(toRep(b) | quietBit);
if (aAbs == infRep) {
// infinity * non-zero = +/- infinity
if (bAbs) return fromRep(aAbs | productSign);
// infinity * zero = NaN
else return fromRep(qnanRep);
}
if (bAbs == infRep) {
//? non-zero * infinity = +/- infinity
if (aAbs) return fromRep(bAbs | productSign);
// zero * infinity = NaN
else return fromRep(qnanRep);
}
// zero * anything = +/- zero
if (!aAbs) return fromRep(productSign);
// anything * zero = +/- zero
if (!bAbs) return fromRep(productSign);
// one or both of a or b is denormal, the other (if applicable) is a
// normal number. Renormalize one or both of a and b, and set scale to
// include the necessary exponent adjustment.
if (aAbs < implicitBit) scale += normalize(&aSignificand);
if (bAbs < implicitBit) scale += normalize(&bSignificand);
}
// Or in the implicit significand bit. (If we fell through from the
// denormal path it was already set by normalize( ), but setting it twice
// won't hurt anything.)
aSignificand |= implicitBit;
bSignificand |= implicitBit;
// Get the significand of a*b. Before multiplying the significands, shift
// one of them left to left-align it in the field. Thus, the product will
// have (exponentBits + 2) integral digits, all but two of which must be
// zero. Normalizing this result is just a conditional left-shift by one
// and bumping the exponent accordingly.
rep_t productHi, productLo;
wideMultiply(aSignificand, bSignificand << exponentBits,
&productHi, &productLo);
int productExponent = aExponent + bExponent - exponentBias + scale;
// Normalize the significand, adjust exponent if needed.
if (productHi & implicitBit) productExponent++;
else wideLeftShift(&productHi, &productLo, 1);
// If we have overflowed the type, return +/- infinity.
if (productExponent >= maxExponent) return fromRep(infRep | productSign);
if (productExponent <= 0) {
// Result is denormal before rounding
//
// If the result is so small that it just underflows to zero, return
// a zero of the appropriate sign. Mathematically there is no need to
// handle this case separately, but we make it a special case to
// simplify the shift logic.
const unsigned int shift = REP_C(1) - (unsigned int)productExponent;
if (shift >= typeWidth) return fromRep(productSign);
// Otherwise, shift the significand of the result so that the round
// bit is the high bit of productLo.
// wideRightShiftWithSticky(&productHi, &productLo, shift);
if (shift < typeWidth) {
const bool sticky = productLo << (typeWidth - shift);
productLo = productHi << (typeWidth - shift) | productLo >> shift | sticky;
productHi = productHi >> shift;
}
else if (shift < 2*typeWidth) {
const bool sticky = productHi << (2*typeWidth - shift) | productLo;
productLo = productHi >> (shift - typeWidth) | sticky;
productHi = 0;
} else {
const bool sticky = productHi | productLo;
productLo = sticky;
productHi = 0;
}
}
else {
// Result is normal before rounding; insert the exponent.
productHi &= significandMask;
productHi |= (rep_t)productExponent << significandBits;
}
// Insert the sign of the result:
productHi |= productSign;
// Final rounding. The final result may overflow to infinity, or underflow
// to zero, but those are the correct results in those cases. We use the
// default IEEE-754 round-to-nearest, ties-to-even rounding mode.
if (productLo > signBit) productHi++;
if (productLo == signBit) productHi += productHi & 1;
return fromRep(productHi);
}
#if defined(__ARM_EABI__)
#if defined(COMPILER_RT_ARMHF_TARGET)
AEABI_RTABI fp_t __aeabi_fmulh(fp_t a, fp_t b) {
return __mulhf3(a, b);
}
#else
AEABI_RTABI fp_t __aeabi_fmulh(fp_t a, fp_t b) COMPILER_RT_ALIAS(__mulhf3);
#endif
#endif