test/float_lit2.go

   1 // run
   2
   3 // Check conversion of constant to float32/float64 near min/max boundaries.
   4
   5 // Copyright 2014 The Go Authors. All rights reserved.
   6 // Use of this source code is governed by a BSD-style
   7 // license that can be found in the LICENSE file.
   8
   9 package main
  10
  11 import (
  12         "fmt"
  13         "math"
  14 )
  15
  16 // The largest exact float32 is f₁ = (1+1-1/2²³)×2¹²⁷ = (2-2⁻²³)×2¹²⁷ = 2¹²⁸ - 2¹⁰⁴.
  17 // The next float32 would be f₂ = (1+1)×2¹²⁷ = 1×2¹²⁸, except that exponent is out of range.
  18 // Float32 conversion rounds to the nearest float32, rounding to even mantissa:
  19 // between f₁ and f₂, values closer to f₁ round to f₁ and values closer to f₂ are rejected as out of range.
  20 // f₁ is an odd mantissa, so the halfway point (f₁+f₂)/2 rounds to f₂ and is rejected.
  21 // The halfway point is (f₁+f₂)/2 = 2¹²⁸ - 2¹⁰³.
  22 //
  23 // The same is true of float64, with different constants: s/24/53/ and s/128/1024/.
  24
  25 const (
  26         two24   = 1.0 * (1 << 24)
  27         two53   = 1.0 * (1 << 53)
  28         two64   = 1.0 * (1 << 64)
  29         two128  = two64 * two64
  30         two256  = two128 * two128
  31         two512  = two256 * two256
  32         two768  = two512 * two256
  33         two1024 = two512 * two512
  34
  35         ulp32 = two128 / two24
  36         max32 = two128 - ulp32
  37
  38         ulp64 = two1024 / two53
  39         max64 = two1024 - ulp64
  40 )
  41
  42 var cvt = []struct {
  43         bits   uint64 // keep us honest
  44         exact  interface{}
  45         approx interface{}
  46         text   string
  47 }{
  48         // 0
  49         {0x7f7ffffe, float32(max32 - ulp32), float32(max32 - ulp32 - ulp32/2), "max32 - ulp32 - ulp32/2"},
  50         {0x7f7ffffe, float32(max32 - ulp32), float32(max32 - ulp32), "max32 - ulp32"},
  51         {0x7f7ffffe, float32(max32 - ulp32), float32(max32 - ulp32/2), "max32 - ulp32/2"},
  52         {0x7f7ffffe, float32(max32 - ulp32), float32(max32 - ulp32 + ulp32/2), "max32 - ulp32 + ulp32/2"},
  53         {0x7f7fffff, float32(max32), float32(max32 - ulp32 + ulp32/2 + ulp32/two64), "max32 - ulp32 + ulp32/2 + ulp32/two64"},
  54         {0x7f7fffff, float32(max32), float32(max32 - ulp32/2 + ulp32/two64), "max32 - ulp32/2 + ulp32/two64"},
  55         {0x7f7fffff, float32(max32), float32(max32), "max32"},
  56         {0x7f7fffff, float32(max32), float32(max32 + ulp32/2 - ulp32/two64), "max32 + ulp32/2 - ulp32/two64"},
  57
  58         {0xff7ffffe, float32(-(max32 - ulp32)), float32(-(max32 - ulp32 - ulp32/2)), "-(max32 - ulp32 - ulp32/2)"},
  59         {0xff7ffffe, float32(-(max32 - ulp32)), float32(-(max32 - ulp32)), "-(max32 - ulp32)"},
  60         {0xff7ffffe, float32(-(max32 - ulp32)), float32(-(max32 - ulp32/2)), "-(max32 - ulp32/2)"},
  61         {0xff7ffffe, float32(-(max32 - ulp32)), float32(-(max32 - ulp32 + ulp32/2)), "-(max32 - ulp32 + ulp32/2)"},
  62         {0xff7fffff, float32(-(max32)), float32(-(max32 - ulp32 + ulp32/2 + ulp32/two64)), "-(max32 - ulp32 + ulp32/2 + ulp32/two64)"},
  63         {0xff7fffff, float32(-(max32)), float32(-(max32 - ulp32/2 + ulp32/two64)), "-(max32 - ulp32/2 + ulp32/two64)"},
  64         {0xff7fffff, float32(-(max32)), float32(-(max32)), "-(max32)"},
  65         {0xff7fffff, float32(-(max32)), float32(-(max32 + ulp32/2 - ulp32/two64)), "-(max32 + ulp32/2 - ulp32/two64)"},
  66
  67         // These are required to work: according to the Go spec, the internal float mantissa must be at least 256 bits,
  68         // and these expressions can be represented exactly with a 256-bit mantissa.
  69         {0x7f7fffff, float32(max32), float32(max32 - ulp32 + ulp32/2 + 1), "max32 - ulp32 + ulp32/2 + 1"},
  70         {0x7f7fffff, float32(max32), float32(max32 - ulp32/2 + 1), "max32 - ulp32/2 + 1"},
  71         {0x7f7fffff, float32(max32), float32(max32 + ulp32/2 - 1), "max32 + ulp32/2 - 1"},
  72         {0xff7fffff, float32(-(max32)), float32(-(max32 - ulp32 + ulp32/2 + 1)), "-(max32 - ulp32 + ulp32/2 + 1)"},
  73         {0xff7fffff, float32(-(max32)), float32(-(max32 - ulp32/2 + 1)), "-(max32 - ulp32/2 + 1)"},
  74         {0xff7fffff, float32(-(max32)), float32(-(max32 + ulp32/2 - 1)), "-(max32 + ulp32/2 - 1)"},
  75
  76         {0x7f7fffff, float32(max32), float32(max32 - ulp32 + ulp32/2 + 1/two128), "max32 - ulp32 + ulp32/2 + 1/two128"},
  77         {0x7f7fffff, float32(max32), float32(max32 - ulp32/2 + 1/two128), "max32 - ulp32/2 + 1/two128"},
  78         {0x7f7fffff, float32(max32), float32(max32 + ulp32/2 - 1/two128), "max32 + ulp32/2 - 1/two128"},
  79         {0xff7fffff, float32(-(max32)), float32(-(max32 - ulp32 + ulp32/2 + 1/two128)), "-(max32 - ulp32 + ulp32/2 + 1/two128)"},
  80         {0xff7fffff, float32(-(max32)), float32(-(max32 - ulp32/2 + 1/two128)), "-(max32 - ulp32/2 + 1/two128)"},
  81         {0xff7fffff, float32(-(max32)), float32(-(max32 + ulp32/2 - 1/two128)), "-(max32 + ulp32/2 - 1/two128)"},
  82
  83         {0x7feffffffffffffe, float64(max64 - ulp64), float64(max64 - ulp64 - ulp64/2), "max64 - ulp64 - ulp64/2"},
  84         {0x7feffffffffffffe, float64(max64 - ulp64), float64(max64 - ulp64), "max64 - ulp64"},
  85         {0x7feffffffffffffe, float64(max64 - ulp64), float64(max64 - ulp64/2), "max64 - ulp64/2"},
  86         {0x7feffffffffffffe, float64(max64 - ulp64), float64(max64 - ulp64 + ulp64/2), "max64 - ulp64 + ulp64/2"},
  87         {0x7fefffffffffffff, float64(max64), float64(max64 - ulp64 + ulp64/2 + ulp64/two64), "max64 - ulp64 + ulp64/2 + ulp64/two64"},
  88         {0x7fefffffffffffff, float64(max64), float64(max64 - ulp64/2 + ulp64/two64), "max64 - ulp64/2 + ulp64/two64"},
  89         {0x7fefffffffffffff, float64(max64), float64(max64), "max64"},
  90         {0x7fefffffffffffff, float64(max64), float64(max64 + ulp64/2 - ulp64/two64), "max64 + ulp64/2 - ulp64/two64"},
  91
  92         {0xffeffffffffffffe, float64(-(max64 - ulp64)), float64(-(max64 - ulp64 - ulp64/2)), "-(max64 - ulp64 - ulp64/2)"},
  93         {0xffeffffffffffffe, float64(-(max64 - ulp64)), float64(-(max64 - ulp64)), "-(max64 - ulp64)"},
  94         {0xffeffffffffffffe, float64(-(max64 - ulp64)), float64(-(max64 - ulp64/2)), "-(max64 - ulp64/2)"},
  95         {0xffeffffffffffffe, float64(-(max64 - ulp64)), float64(-(max64 - ulp64 + ulp64/2)), "-(max64 - ulp64 + ulp64/2)"},
  96         {0xffefffffffffffff, float64(-(max64)), float64(-(max64 - ulp64 + ulp64/2 + ulp64/two64)), "-(max64 - ulp64 + ulp64/2 + ulp64/two64)"},
  97         {0xffefffffffffffff, float64(-(max64)), float64(-(max64 - ulp64/2 + ulp64/two64)), "-(max64 - ulp64/2 + ulp64/two64)"},
  98         {0xffefffffffffffff, float64(-(max64)), float64(-(max64)), "-(max64)"},
  99         {0xffefffffffffffff, float64(-(max64)), float64(-(max64 + ulp64/2 - ulp64/two64)), "-(max64 + ulp64/2 - ulp64/two64)"},
 100
 101         // These are required to work.
 102         // The mantissas are exactly 256 bits.
 103         // max64 is just below 2¹⁰²⁴ so the bottom bit we can use is 2⁷⁶⁸.
 104         {0x7fefffffffffffff, float64(max64), float64(max64 - ulp64 + ulp64/2 + two768), "max64 - ulp64 + ulp64/2 + two768"},
 105         {0x7fefffffffffffff, float64(max64), float64(max64 - ulp64/2 + two768), "max64 - ulp64/2 + two768"},
 106         {0x7fefffffffffffff, float64(max64), float64(max64 + ulp64/2 - two768), "max64 + ulp64/2 - two768"},
 107         {0xffefffffffffffff, float64(-(max64)), float64(-(max64 - ulp64 + ulp64/2 + two768)), "-(max64 - ulp64 + ulp64/2 + two768)"},
 108         {0xffefffffffffffff, float64(-(max64)), float64(-(max64 - ulp64/2 + two768)), "-(max64 - ulp64/2 + two768)"},
 109         {0xffefffffffffffff, float64(-(max64)), float64(-(max64 + ulp64/2 - two768)), "-(max64 + ulp64/2 - two768)"},
 110 }
 111
 112 var bugged = false
 113
 114 func bug() {
 115         if !bugged {
 116                 bugged = true
 117                 fmt.Println("BUG")
 118         }
 119 }
 120
 121 func main() {
 122         u64 := math.Float64frombits(0x7fefffffffffffff) - math.Float64frombits(0x7feffffffffffffe)
 123         if ulp64 != u64 {
 124                 bug()
 125                 fmt.Printf("ulp64=%g, want %g", ulp64, u64)
 126         }
 127
 128         u32 := math.Float32frombits(0x7f7fffff) - math.Float32frombits(0x7f7ffffe)
 129         if ulp32 != u32 {
 130                 bug()
 131                 fmt.Printf("ulp32=%g, want %g", ulp32, u32)
 132         }
 133
 134         for _, c := range cvt {
 135                 if bits(c.exact) != c.bits {
 136                         bug()
 137                         fmt.Printf("%s: inconsistent table: bits=%#x (%g) but exact=%g (%#x)\n", c.text, c.bits, fromBits(c.bits, c.exact), c.exact, bits(c.exact))
 138                 }
 139                 if c.approx != c.exact || bits(c.approx) != c.bits {
 140                         bug()
 141                         fmt.Printf("%s: have %g (%#x) want %g (%#x)\n", c.text, c.approx, bits(c.approx), c.exact, c.bits)
 142                 }
 143         }
 144 }
 145
 146 func bits(x interface{}) interface{} {
 147         switch x := x.(type) {
 148         case float32:
 149                 return uint64(math.Float32bits(x))
 150         case float64:
 151                 return math.Float64bits(x)
 152         }
 153         return 0
 154 }
 155
 156 func fromBits(b uint64, x interface{}) interface{} {
 157         switch x.(type) {
 158         case float32:
 159                 return math.Float32frombits(uint32(b))
 160         case float64:
 161                 return math.Float64frombits(b)
 162         }
 163         return "?"
 164 }