1 // Copyright 2010 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Software IEEE754 64-bit floating point.
6 // Only referred to (and thus linked in) by softfloat targets
7 // and by tests in this directory.
14 bias64 = -1<<(expbits64-1) + 1
16 nan64 uint64 = (1<<expbits64-1)<<mantbits64 + 1<<(mantbits64-1) // quiet NaN, 0 payload
17 inf64 uint64 = (1<<expbits64 - 1) << mantbits64
18 neg64 uint64 = 1 << (expbits64 + mantbits64)
22 bias32 = -1<<(expbits32-1) + 1
24 nan32 uint32 = (1<<expbits32-1)<<mantbits32 + 1<<(mantbits32-1) // quiet NaN, 0 payload
25 inf32 uint32 = (1<<expbits32 - 1) << mantbits32
26 neg32 uint32 = 1 << (expbits32 + mantbits32)
29 func funpack64(f uint64) (sign, mant uint64, exp int, inf, nan bool) {
30 sign = f & (1 << (mantbits64 + expbits64))
31 mant = f & (1<<mantbits64 - 1)
32 exp = int(f>>mantbits64) & (1<<expbits64 - 1)
35 case 1<<expbits64 - 1:
47 for mant < 1<<mantbits64 {
54 // add implicit top bit
55 mant |= 1 << mantbits64
61 func funpack32(f uint32) (sign, mant uint32, exp int, inf, nan bool) {
62 sign = f & (1 << (mantbits32 + expbits32))
63 mant = f & (1<<mantbits32 - 1)
64 exp = int(f>>mantbits32) & (1<<expbits32 - 1)
67 case 1<<expbits32 - 1:
79 for mant < 1<<mantbits32 {
86 // add implicit top bit
87 mant |= 1 << mantbits32
93 func fpack64(sign, mant uint64, exp int, trunc uint64) uint64 {
94 mant0, exp0, trunc0 := mant, exp, trunc
98 for mant < 1<<mantbits64 {
102 for mant >= 4<<mantbits64 {
107 if mant >= 2<<mantbits64 {
108 if mant&1 != 0 && (trunc != 0 || mant&2 != 0) {
110 if mant >= 4<<mantbits64 {
118 if exp >= 1<<expbits64-1+bias64 {
122 if exp < bias64-int(mantbits64) {
125 // repeat expecting denormal
126 mant, exp, trunc = mant0, exp0, trunc0
132 if mant&1 != 0 && (trunc != 0 || mant&2 != 0) {
137 if mant < 1<<mantbits64 {
141 return sign | uint64(exp-bias64)<<mantbits64 | mant&(1<<mantbits64-1)
144 func fpack32(sign, mant uint32, exp int, trunc uint32) uint32 {
145 mant0, exp0, trunc0 := mant, exp, trunc
149 for mant < 1<<mantbits32 {
153 for mant >= 4<<mantbits32 {
158 if mant >= 2<<mantbits32 {
159 if mant&1 != 0 && (trunc != 0 || mant&2 != 0) {
161 if mant >= 4<<mantbits32 {
169 if exp >= 1<<expbits32-1+bias32 {
173 if exp < bias32-int(mantbits32) {
176 // repeat expecting denormal
177 mant, exp, trunc = mant0, exp0, trunc0
183 if mant&1 != 0 && (trunc != 0 || mant&2 != 0) {
188 if mant < 1<<mantbits32 {
192 return sign | uint32(exp-bias32)<<mantbits32 | mant&(1<<mantbits32-1)
195 func fadd64(f, g uint64) uint64 {
196 fs, fm, fe, fi, fn := funpack64(f)
197 gs, gm, ge, gi, gn := funpack64(g)
201 case fn || gn: // NaN + x or x + NaN = NaN
204 case fi && gi && fs != gs: // +Inf + -Inf or -Inf + +Inf = NaN
207 case fi: // ±Inf + g = ±Inf
210 case gi: // f + ±Inf = ±Inf
213 case fm == 0 && gm == 0 && fs != 0 && gs != 0: // -0 + -0 = -0
216 case fm == 0: // 0 + g = g but 0 + -0 = +0
222 case gm == 0: // f + 0 = f
227 if fe < ge || fe == ge && fm < gm {
228 f, g, fs, fm, fe, gs, gm, ge = g, f, gs, gm, ge, fs, fm, fe
231 shift := uint(fe - ge)
234 trunc := gm & (1<<shift - 1)
247 return fpack64(fs, fm, fe-2, trunc)
250 func fsub64(f, g uint64) uint64 {
251 return fadd64(f, fneg64(g))
254 func fneg64(f uint64) uint64 {
255 return f ^ (1 << (mantbits64 + expbits64))
258 func fmul64(f, g uint64) uint64 {
259 fs, fm, fe, fi, fn := funpack64(f)
260 gs, gm, ge, gi, gn := funpack64(g)
264 case fn || gn: // NaN * g or f * NaN = NaN
267 case fi && gi: // Inf * Inf = Inf (with sign adjusted)
270 case fi && gm == 0, fm == 0 && gi: // 0 * Inf = Inf * 0 = NaN
273 case fm == 0: // 0 * x = 0 (with sign adjusted)
276 case gm == 0: // x * 0 = 0 (with sign adjusted)
280 // 53-bit * 53-bit = 107- or 108-bit
281 lo, hi := mullu(fm, gm)
282 shift := mantbits64 - 1
283 trunc := lo & (1<<shift - 1)
284 mant := hi<<(64-shift) | lo>>shift
285 return fpack64(fs^gs, mant, fe+ge-1, trunc)
288 func fdiv64(f, g uint64) uint64 {
289 fs, fm, fe, fi, fn := funpack64(f)
290 gs, gm, ge, gi, gn := funpack64(g)
294 case fn || gn: // NaN / g = f / NaN = NaN
297 case fi && gi: // ±Inf / ±Inf = NaN
300 case !fi && !gi && fm == 0 && gm == 0: // 0 / 0 = NaN
303 case fi, !gi && gm == 0: // Inf / g = f / 0 = Inf
304 return fs ^ gs ^ inf64
306 case gi, fm == 0: // f / Inf = 0 / g = Inf
309 _, _, _, _ = fi, fn, gi, gn
311 // 53-bit<<54 / 53-bit = 53- or 54-bit.
312 shift := mantbits64 + 2
313 q, r := divlu(fm>>(64-shift), fm<<shift, gm)
314 return fpack64(fs^gs, q, fe-ge-2, r)
317 func f64to32(f uint64) uint32 {
318 fs, fm, fe, fi, fn := funpack64(f)
322 fs32 := uint32(fs >> 32)
326 const d = mantbits64 - mantbits32 - 1
327 return fpack32(fs32, uint32(fm>>d), fe-1, uint32(fm&(1<<d-1)))
330 func f32to64(f uint32) uint64 {
331 const d = mantbits64 - mantbits32
332 fs, fm, fe, fi, fn := funpack32(f)
336 fs64 := uint64(fs) << 32
340 return fpack64(fs64, uint64(fm)<<d, fe, 0)
343 func fcmp64(f, g uint64) (cmp int32, isnan bool) {
344 fs, fm, _, fi, fn := funpack64(f)
345 gs, gm, _, gi, gn := funpack64(g)
348 case fn, gn: // flag NaN
351 case !fi && !gi && fm == 0 && gm == 0: // ±0 == ±0
354 case fs > gs: // f < 0, g > 0
357 case fs < gs: // f > 0, g < 0
360 // Same sign, not NaN.
361 // Can compare encodings directly now.
363 case fs == 0 && f < g, fs != 0 && f > g:
366 case fs == 0 && f > g, fs != 0 && f < g:
374 func f64toint(f uint64) (val int64, ok bool) {
375 fs, fm, fe, fi, fn := funpack64(f)
381 case fe < -1: // f < 0.5
384 case fe > 63: // f >= 2^63
385 if fs != 0 && fm == 0 { // f == -2^63
386 return -1 << 63, true
394 for fe > int(mantbits64) {
398 for fe < int(mantbits64) {
409 func fintto64(val int64) (f uint64) {
410 fs := uint64(val) & (1 << 63)
415 return fpack64(fs, mant, int(mantbits64), 0)
417 func fintto32(val int64) (f uint32) {
418 fs := uint64(val) & (1 << 63)
423 // Reduce mantissa size until it fits into a uint32.
424 // Keep track of the bits we throw away, and if any are
425 // nonzero or them into the lowest bit.
426 exp := int(mantbits32)
429 trunc |= uint32(mant) & 1
434 return fpack32(uint32(fs>>32), uint32(mant), exp, trunc)
437 // 64x64 -> 128 multiply.
438 // adapted from hacker's delight.
439 func mullu(u, v uint64) (lo, hi uint64) {
453 return u * v, u1*v1 + w2 + w1>>s
456 // 128/64 -> 64 quotient, 64 remainder.
457 // adapted from hacker's delight
458 func divlu(u1, u0, v uint64) (q, r uint64) {
462 return 1<<64 - 1, 1<<64 - 1
465 // s = nlz(v); v <<= s
473 vn0 := v & (1<<32 - 1)
474 un32 := u1<<s | u0>>(64-s)
477 un0 := un10 & (1<<32 - 1)
479 rhat := un32 - q1*vn1
482 if q1 >= b || q1*vn0 > b*rhat+un1 {
490 un21 := un32*b + un1 - q1*v
495 if q0 >= b || q0*vn0 > b*rhat+un0 {
503 return q1*b + q0, (un21*b + un0 - q0*v) >> s
506 func fadd32(x, y uint32) uint32 {
507 return f64to32(fadd64(f32to64(x), f32to64(y)))
510 func fmul32(x, y uint32) uint32 {
511 return f64to32(fmul64(f32to64(x), f32to64(y)))
514 func fdiv32(x, y uint32) uint32 {
515 // TODO: are there double-rounding problems here? See issue 48807.
516 return f64to32(fdiv64(f32to64(x), f32to64(y)))
519 func feq32(x, y uint32) bool {
520 cmp, nan := fcmp64(f32to64(x), f32to64(y))
521 return cmp == 0 && !nan
524 func fgt32(x, y uint32) bool {
525 cmp, nan := fcmp64(f32to64(x), f32to64(y))
526 return cmp >= 1 && !nan
529 func fge32(x, y uint32) bool {
530 cmp, nan := fcmp64(f32to64(x), f32to64(y))
531 return cmp >= 0 && !nan
534 func feq64(x, y uint64) bool {
535 cmp, nan := fcmp64(x, y)
536 return cmp == 0 && !nan
539 func fgt64(x, y uint64) bool {
540 cmp, nan := fcmp64(x, y)
541 return cmp >= 1 && !nan
544 func fge64(x, y uint64) bool {
545 cmp, nan := fcmp64(x, y)
546 return cmp >= 0 && !nan
549 func fint32to32(x int32) uint32 {
550 return fintto32(int64(x))
553 func fint32to64(x int32) uint64 {
554 return fintto64(int64(x))
557 func fint64to32(x int64) uint32 {
561 func fint64to64(x int64) uint64 {
565 func f32toint32(x uint32) int32 {
566 val, _ := f64toint(f32to64(x))
570 func f32toint64(x uint32) int64 {
571 val, _ := f64toint(f32to64(x))
575 func f64toint32(x uint64) int32 {
576 val, _ := f64toint(x)
580 func f64toint64(x uint64) int64 {
581 val, _ := f64toint(x)
585 func f64touint64(x uint64) uint64 {
586 var m uint64 = 0x43e0000000000000 // float64 1<<63
588 return uint64(f64toint64(x))
591 z := uint64(f64toint64(y))
595 func f32touint64(x uint32) uint64 {
596 var m uint32 = 0x5f000000 // float32 1<<63
598 return uint64(f32toint64(x))
601 z := uint64(f32toint64(y))
605 func fuint64to64(x uint64) uint64 {
607 return fint64to64(int64(x))
609 // See ../cmd/compile/internal/ssagen/ssa.go:uint64Tofloat
613 r := fint64to64(int64(z))
617 func fuint64to32(x uint64) uint32 {
619 return fint64to32(int64(x))
621 // See ../cmd/compile/internal/ssagen/ssa.go:uint64Tofloat
625 r := fint64to32(int64(z))