strconv: Implement Ryū algorithm for ftoa shortest mode

author Rémy Oudompheng <remyoudompheng@gmail.com>

Wed, 27 Mar 2019 05:55:52 +0000 (06:55 +0100)

committer Emmanuel Odeke <emmanuel@orijtech.com>

Thu, 15 Apr 2021 09:18:03 +0000 (09:18 +0000)
author Rémy Oudompheng <remyoudompheng@gmail.com>
Wed, 27 Mar 2019 05:55:52 +0000 (06:55 +0100)
committer Emmanuel Odeke <emmanuel@orijtech.com>
Thu, 15 Apr 2021 09:18:03 +0000 (09:18 +0000)
diff --git a/src/strconv/ftoa.go b/src/strconv/ftoa.go

index a3bff52bc8333b2a7f9daed2286e1ef03dfb7ff2..eca04b851c3ad0e21c295077d8f20f40e833a0b0 100644 (file)
--- a/src/strconv/ftoa.go
+++ b/src/strconv/ftoa.go
@@ -113,15 +113,11 @@ func genericFtoa(dst []byte, val float64, fmt byte, prec, bitSize int) []byte {
         // Negative precision means "only as much as needed to be exact."
         shortest := prec < 0
         if shortest {
-               // Try Grisu3 algorithm.
-               f := new(extFloat)
-               lower, upper := f.AssignComputeBounds(mant, exp, neg, flt)
+               // Use Ryu algorithm.
                 var buf [32]byte
                 digs.d = buf[:]
-               ok = f.ShortestDecimal(&digs, &lower, &upper)
-               if !ok {
-                       return bigFtoa(dst, prec, fmt, neg, mant, exp, flt)
-               }
+               ryuFtoaShortest(&digs, mant, exp-int(flt.mantbits), flt)
+               ok = true
                 // Precision for shortest representation mode.
                 switch fmt {
                 case 'e', 'E':
diff --git a/src/strconv/ftoa_test.go b/src/strconv/ftoa_test.go

index b1685aeb2000b6c80626a01f9e38f22be97be4f0..73008b1c6219132b231e220b8d12a0bc8ebe3668 100644 (file)
--- a/src/strconv/ftoa_test.go
+++ b/src/strconv/ftoa_test.go
@@ -40,6 +40,7 @@ var ftoatests = []ftoaTest{
         {200000, 'x', -1, "0x1.86ap+17"},
         {200000, 'X', -1, "0X1.86AP+17"},
         {2000000, 'g', -1, "2e+06"},
+       {1e10, 'g', -1, "1e+10"},
  
         // g conversion and zero suppression
         {400, 'g', 2, "4e+02"},
@@ -84,6 +85,7 @@ var ftoatests = []ftoaTest{
         {1.2355, 'f', 3, "1.236"},
         {1234567890123456.5, 'e', 15, "1.234567890123456e+15"},
         {1234567890123457.5, 'e', 15, "1.234567890123458e+15"},
+       {108678236358137.625, 'g', -1, "1.0867823635813762e+14"},
  
         {1e23, 'e', 17, "9.99999999999999916e+22"},
         {1e23, 'f', 17, "99999999999999991611392.00000000000000000"},
@@ -191,6 +193,25 @@ func TestFtoa(t *testing.T) {
         }
  }
  
+func TestFtoaPowersOfTwo(t *testing.T) {
+       for exp := -2048; exp <= 2048; exp++ {
+               f := math.Ldexp(1, exp)
+               if !math.IsInf(f, 0) {
+                       s := FormatFloat(f, 'e', -1, 64)
+                       if x, _ := ParseFloat(s, 64); x != f {
+                               t.Errorf("failed roundtrip %v => %s => %v", f, s, x)
+                       }
+               }
+               f32 := float32(f)
+               if !math.IsInf(float64(f32), 0) {
+                       s := FormatFloat(float64(f32), 'e', -1, 32)
+                       if x, _ := ParseFloat(s, 32); float32(x) != f32 {
+                               t.Errorf("failed roundtrip %v => %s => %v", f32, s, float32(x))
+                       }
+               }
+       }
+}
+
  func TestFtoaRandom(t *testing.T) {
         N := int(1e4)
         if testing.Short() {
@@ -240,6 +261,7 @@ var ftoaBenches = []struct {
         {"Float", 339.7784, 'g', -1, 64},
         {"Exp", -5.09e75, 'g', -1, 64},
         {"NegExp", -5.11e-95, 'g', -1, 64},
+       {"LongExp", 1.234567890123456e-78, 'g', -1, 64},
  
         {"Big", 123456789123456789123456789, 'g', -1, 64},
         {"BinaryExp", -1, 'b', -1, 64},
@@ -249,6 +271,7 @@ var ftoaBenches = []struct {
         {"32Point", 339.7784, 'g', -1, 32},
         {"32Exp", -5.09e25, 'g', -1, 32},
         {"32NegExp", -5.11e-25, 'g', -1, 32},
+       {"32Shortest", 1.234567e-8, 'g', -1, 32},
         {"32Fixed8Hard", math.Ldexp(15961084, -125), 'e', 8, 32},
         {"32Fixed9Hard", math.Ldexp(14855922, -83), 'e', 9, 32},
  
@@ -264,7 +287,14 @@ var ftoaBenches = []struct {
         {"64Fixed18Hard", math.Ldexp(6994187472632449, 690), 'e', 18, 64},
  
         // Trigger slow path (see issue #15672).
-       {"Slowpath64", 622666234635.3213e-320, 'e', -1, 64},
+       // The shortest is: 8.034137530808823e+43
+       {"Slowpath64", 8.03413753080882349e+43, 'e', -1, 64},
+       // This denormal is pathological because the lower/upper
+       // halfways to neighboring floats are:
+       // 622666234635.321003e-320 ~= 622666234635.321e-320
+       // 622666234635.321497e-320 ~= 622666234635.3215e-320
+       // making it hard to find the 3rd digit
+       {"SlowpathDenormal64", 622666234635.3213e-320, 'e', -1, 64},
  }
  
  func BenchmarkFormatFloat(b *testing.B) {
diff --git a/src/strconv/ftoaryu.go b/src/strconv/ftoaryu.go

index 44a55b1da9577604b88b53b9ee6d80a70719e9d4..e53de756677d92a6886f0a87e0d740f28b92b3c8 100644 (file)
--- a/src/strconv/ftoaryu.go
+++ b/src/strconv/ftoaryu.go
@@ -218,6 +218,109 @@ func formatDecimal(d *decimalSlice, m uint64, trunc bool, roundUp bool, prec int
         d.dp = d.nd + trimmed
  }
  
+// ryuFtoaShortest formats mant*2^exp with prec decimal digits.
+func ryuFtoaShortest(d *decimalSlice, mant uint64, exp int, flt *floatInfo) {
+       if mant == 0 {
+               d.nd, d.dp = 0, 0
+               return
+       }
+       // If input is an exact integer with fewer bits than the mantissa,
+       // the previous and next integer are not admissible representations.
+       if exp <= 0 && bits.TrailingZeros64(mant) >= -exp {
+               mant >>= uint(-exp)
+               ryuDigits(d, mant, mant, mant, true, false)
+               return
+       }
+       ml, mc, mu, e2 := computeBounds(mant, exp, flt)
+       if e2 == 0 {
+               ryuDigits(d, ml, mc, mu, true, false)
+               return
+       }
+       // Find 10^q *larger* than 2^-e2
+       q := mulByLog2Log10(-e2) + 1
+
+       // We are going to multiply by 10^q using 128-bit arithmetic.
+       // The exponent is the same for all 3 numbers.
+       var dl, dc, du uint64
+       var dl0, dc0, du0 bool
+       if flt == &float32info {
+               var dl32, dc32, du32 uint32
+               dl32, _, dl0 = mult64bitPow10(uint32(ml), e2, q)
+               dc32, _, dc0 = mult64bitPow10(uint32(mc), e2, q)
+               du32, e2, du0 = mult64bitPow10(uint32(mu), e2, q)
+               dl, dc, du = uint64(dl32), uint64(dc32), uint64(du32)
+       } else {
+               dl, _, dl0 = mult128bitPow10(ml, e2, q)
+               dc, _, dc0 = mult128bitPow10(mc, e2, q)
+               du, e2, du0 = mult128bitPow10(mu, e2, q)
+       }
+       if e2 >= 0 {
+               panic("not enough significant bits after mult128bitPow10")
+       }
+       // Is it an exact computation?
+       if q > 55 {
+               // Large positive powers of ten are not exact
+               dl0, dc0, du0 = false, false, false
+       }
+       if q < 0 && q >= -24 {
+               // Division by a power of ten may be exact.
+               // (note that 5^25 is a 59-bit number so division by 5^25 is never exact).
+               if divisibleByPower5(ml, -q) {
+                       dl0 = true
+               }
+               if divisibleByPower5(mc, -q) {
+                       dc0 = true
+               }
+               if divisibleByPower5(mu, -q) {
+                       du0 = true
+               }
+       }
+       // Express the results (dl, dc, du)*2^e2 as integers.
+       // Extra bits must be removed and rounding hints computed.
+       extra := uint(-e2)
+       extraMask := uint64(1<<extra - 1)
+       // Now compute the floored, integral base 10 mantissas.
+       dl, fracl := dl>>extra, dl&extraMask
+       dc, fracc := dc>>extra, dc&extraMask
+       du, fracu := du>>extra, du&extraMask
+       // Is it allowed to use 'du' as a result?
+       // It is always allowed when it is truncated, but also
+       // if it is exact and the original binary mantissa is even
+       // When disallowed, we can substract 1.
+       uok := !du0 || fracu > 0
+       if du0 && fracu == 0 {
+               uok = mant&1 == 0
+       }
+       if !uok {
+               du--
+       }
+       // Is 'dc' the correctly rounded base 10 mantissa?
+       // The correct rounding might be dc+1
+       cup := false // don't round up.
+       if dc0 {
+               // If we computed an exact product, the half integer
+               // should round to next (even) integer if 'dc' is odd.
+               cup = fracc > 1<<(extra-1) ||
+                       (fracc == 1<<(extra-1) && dc&1 == 1)
+       } else {
+               // otherwise, the result is a lower truncation of the ideal
+               // result.
+               cup = fracc>>(extra-1) == 1
+       }
+       // Is 'dl' an allowed representation?
+       // Only if it is an exact value, and if the original binary mantissa
+       // was even.
+       lok := dl0 && fracl == 0 && (mant&1 == 0)
+       if !lok {
+               dl++
+       }
+       // We need to remember whether the trimmed digits of 'dc' are zero.
+       c0 := dc0 && fracc == 0
+       // render digits
+       ryuDigits(d, dl, dc, du, c0, cup)
+       d.dp -= q
+}
+
  // mulByLog2Log10 returns math.Floor(x * log(2)/log(10)) for an integer x in
  // the range -1600 <= x && x <= +1600.
  //
@@ -238,6 +341,140 @@ func mulByLog10Log2(x int) int {
         return (x * 108853) >> 15
  }
  
+// computeBounds returns a floating-point vector (l, c, u)×2^e2
+// where the mantissas are 55-bit (or 26-bit) integers, describing the interval
+// represented by the input float64 or float32.
+func computeBounds(mant uint64, exp int, flt *floatInfo) (lower, central, upper uint64, e2 int) {
+       if mant != 1<<flt.mantbits || exp == flt.bias+1-int(flt.mantbits) {
+               // regular case (or denormals)
+               lower, central, upper = 2*mant-1, 2*mant, 2*mant+1
+               e2 = exp - 1
+               return
+       } else {
+               // border of an exponent
+               lower, central, upper = 4*mant-1, 4*mant, 4*mant+2
+               e2 = exp - 2
+               return
+       }
+}
+
+func ryuDigits(d *decimalSlice, lower, central, upper uint64,
+       c0, cup bool) {
+       lhi, llo := divmod1e9(lower)
+       chi, clo := divmod1e9(central)
+       uhi, ulo := divmod1e9(upper)
+       if uhi == 0 {
+               // only low digits (for denormals)
+               ryuDigits32(d, llo, clo, ulo, c0, cup, 8)
+       } else if lhi < uhi {
+               // truncate 9 digits at once.
+               if llo != 0 {
+                       lhi++
+               }
+               c0 = c0 && clo == 0
+               cup = (clo > 5e8) || (clo == 5e8 && cup)
+               ryuDigits32(d, lhi, chi, uhi, c0, cup, 8)
+               d.dp += 9
+       } else {
+               d.nd = 0
+               // emit high part
+               n := uint(9)
+               for v := chi; v > 0; {
+                       v1, v2 := v/10, v%10
+                       v = v1
+                       n--
+                       d.d[n] = byte(v2 + '0')
+               }
+               d.d = d.d[n:]
+               d.nd = int(9 - n)
+               // emit low part
+               ryuDigits32(d, llo, clo, ulo,
+                       c0, cup, d.nd+8)
+       }
+       // trim trailing zeros
+       for d.nd > 0 && d.d[d.nd-1] == '0' {
+               d.nd--
+       }
+       // trim initial zeros
+       for d.nd > 0 && d.d[0] == '0' {
+               d.nd--
+               d.dp--
+               d.d = d.d[1:]
+       }
+}
+
+// ryuDigits32 emits decimal digits for a number less than 1e9.
+func ryuDigits32(d *decimalSlice, lower, central, upper uint32,
+       c0, cup bool, endindex int) {
+       if upper == 0 {
+               d.dp = endindex + 1
+               return
+       }
+       trimmed := 0
+       // Remember last trimmed digit to check for round-up.
+       // c0 will be used to remember zeroness of following digits.
+       cNextDigit := 0
+       for upper > 0 {
+               // Repeatedly compute:
+               // l = Ceil(lower / 10^k)
+               // c = Round(central / 10^k)
+               // u = Floor(upper / 10^k)
+               // and stop when c goes out of the (l, u) interval.
+               l := (lower + 9) / 10
+               c, cdigit := central/10, central%10
+               u := upper / 10
+               if l > u {
+                       // don't trim the last digit as it is forbidden to go below l
+                       // other, trim and exit now.
+                       break
+               }
+               // Check that we didn't cross the lower boundary.
+               // The case where l < u but c == l-1 is essentially impossible,
+               // but may happen if:
+               //    lower   = ..11
+               //    central = ..19
+               //    upper   = ..31
+               // and means that 'central' is very close but less than
+               // an integer ending with many zeros, and usually
+               // the "round-up" logic hides the problem.
+               if l == c+1 && c < u {
+                       c++
+                       cdigit = 0
+                       cup = false
+               }
+               trimmed++
+               // Remember trimmed digits of c
+               c0 = c0 && cNextDigit == 0
+               cNextDigit = int(cdigit)
+               lower, central, upper = l, c, u
+       }
+       // should we round up?
+       if trimmed > 0 {
+               cup = cNextDigit > 5 ||
+                       (cNextDigit == 5 && !c0) ||
+                       (cNextDigit == 5 && c0 && central&1 == 1)
+       }
+       if central < upper && cup {
+               central++
+       }
+       // We know where the number ends, fill directly
+       endindex -= trimmed
+       v := central
+       n := endindex
+       for n > d.nd {
+               v1, v2 := v/100, v%100
+               d.d[n] = smallsString[2*v2+1]
+               d.d[n-1] = smallsString[2*v2+0]
+               n -= 2
+               v = v1
+       }
+       if n == d.nd {
+               d.d[n] = byte(v + '0')
+       }
+       d.nd = endindex + 1
+       d.dp = d.nd + trimmed
+}
+
  // mult64bitPow10 takes a floating-point input with a 25-bit
  // mantissa and multiplies it with 10^q. The resulting mantissa
  // is m*P >> 57 where P is a 64-bit element of the detailedPowersOfTen tables.
@@ -249,7 +486,8 @@ func mulByLog10Log2(x int) int {
  //     exact = ε == 0
  func mult64bitPow10(m uint32, e2, q int) (resM uint32, resE int, exact bool) {
         if q == 0 {
-               return m << 7, e2 - 7, true
+               // P == 1<<63
+               return m << 6, e2 - 6, true
         }
         if q < detailedPowersOfTenMinExp10 || detailedPowersOfTenMaxExp10 < q {
                 // This never happens due to the range of float32/float64 exponent
@@ -276,7 +514,8 @@ func mult64bitPow10(m uint32, e2, q int) (resM uint32, resE int, exact bool) {
  //     exact = ε == 0
  func mult128bitPow10(m uint64, e2, q int) (resM uint64, resE int, exact bool) {
         if q == 0 {
-               return m << 9, e2 - 9, true
+               // P == 1<<127
+               return m << 8, e2 - 8, true
         }
         if q < detailedPowersOfTenMinExp10 || detailedPowersOfTenMaxExp10 < q {
                 // This never happens due to the range of float32/float64 exponent
@@ -309,3 +548,15 @@ func divisibleByPower5(m uint64, k int) bool {
         }
         return true
  }
+
+// divmod1e9 computes quotient and remainder of division by 1e9,
+// avoiding runtime uint64 division on 32-bit platforms.
+func divmod1e9(x uint64) (uint32, uint32) {
+       if !host32bit {
+               return uint32(x / 1e9), uint32(x % 1e9)
+       }
+       // Use the same sequence of operations as the amd64 compiler.
+       hi, _ := bits.Mul64(x>>1, 0x89705f4136b4a598) // binary digits of 1e-9
+       q := hi >> 28
+       return uint32(q), uint32(x - q*1e9)
+}
author	Rémy Oudompheng <remyoudompheng@gmail.com>
	Wed, 27 Mar 2019 05:55:52 +0000 (06:55 +0100)
committer	Emmanuel Odeke <emmanuel@orijtech.com>
	Thu, 15 Apr 2021 09:18:03 +0000 (09:18 +0000)
src/strconv/ftoa.go		patch \| blob \| history
src/strconv/ftoa_test.go		patch \| blob \| history
src/strconv/ftoaryu.go		patch \| blob \| history