]> Cypherpunks.ru repositories - gostls13.git/commitdiff
math/rand/v2: remove bias in ExpFloat64 and NormFloat64
authorBranden Brown <zephyrtronium@gmail.com>
Sat, 5 Aug 2023 13:24:57 +0000 (09:24 -0400)
committerGopher Robot <gobot@golang.org>
Mon, 30 Oct 2023 17:08:47 +0000 (17:08 +0000)
The original implementation of the ziggurat algorithm was designed for
32-bit random integer inputs. This necessitated reusing some low-order
bits for the slice selection and the random coordinate, which introduces
statistical bias. The result is that PractRand consistently fails the
math/rand normal and exponential sequences (transformed to uniform)
within 2 GB of variates.

This change adjusts the ziggurat procedures to use 63-bit random inputs,
so that there is no need to reuse bits between the slice and coordinate.
This is sufficient for the normal sequence to survive to 256 GB of
PractRand testing.

An alternative technique is to recalculate the ziggurats to use 1024
rather than 128 or 256 slices to make full use of 64-bit inputs. This
improves the survival of the normal sequence to far beyond 256 GB and
additionally provides a 6% performance improvement due to the improved
rejection procedure efficiency. However, doing so increases the total
size of the ziggurat tables from 4.5 kB to 48 kB.

goos: linux
goarch: amd64
pkg: math/rand/v2
cpu: AMD Ryzen 9 7950X 16-Core Processor
                        │ 2703446c2e.amd64 │           e1bbe739fb.amd64           │
                        │      sec/op      │    sec/op     vs base                │
SourceUint64-32                1.337n ± 1%    1.316n ± 2%        ~ (p=0.024 n=20)
GlobalInt64-32                 2.225n ± 2%    2.048n ± 1%   -7.93% (p=0.000 n=20)
GlobalInt64Parallel-32        0.1043n ± 2%   0.1037n ± 1%        ~ (p=0.587 n=20)
GlobalUint64-32                2.058n ± 1%    2.039n ± 2%        ~ (p=0.030 n=20)
GlobalUint64Parallel-32       0.1009n ± 1%   0.1013n ± 1%        ~ (p=0.984 n=20)
Int64-32                       1.719n ± 2%    1.692n ± 2%        ~ (p=0.085 n=20)
Uint64-32                      1.669n ± 1%    1.643n ± 2%        ~ (p=0.049 n=20)
GlobalIntN1000-32              3.321n ± 2%    3.287n ± 1%        ~ (p=0.298 n=20)
IntN1000-32                    2.479n ± 1%    2.678n ± 2%   +8.01% (p=0.000 n=20)
Int64N1000-32                  2.477n ± 1%    2.684n ± 2%   +8.38% (p=0.000 n=20)
Int64N1e8-32                   2.490n ± 1%    2.663n ± 2%   +6.99% (p=0.000 n=20)
Int64N1e9-32                   2.458n ± 1%    2.633n ± 1%   +7.12% (p=0.000 n=20)
Int64N2e9-32                   2.486n ± 2%    2.657n ± 1%   +6.90% (p=0.000 n=20)
Int64N1e18-32                  3.215n ± 2%    3.125n ± 2%   -2.78% (p=0.000 n=20)
Int64N2e18-32                  3.588n ± 2%    3.476n ± 1%   -3.15% (p=0.000 n=20)
Int64N4e18-32                  4.938n ± 2%    4.795n ± 1%   -2.91% (p=0.000 n=20)
Int32N1000-32                  2.673n ± 2%    2.485n ± 2%   -7.02% (p=0.000 n=20)
Int32N1e8-32                   2.631n ± 2%    2.457n ± 1%   -6.63% (p=0.000 n=20)
Int32N1e9-32                   2.628n ± 2%    2.452n ± 1%   -6.70% (p=0.000 n=20)
Int32N2e9-32                   2.684n ± 2%    2.453n ± 1%   -8.61% (p=0.000 n=20)
Float32-32                     2.240n ± 2%    2.254n ± 1%        ~ (p=0.878 n=20)
Float64-32                     2.253n ± 1%    2.262n ± 1%        ~ (p=0.963 n=20)
ExpFloat64-32                  3.677n ± 1%    3.777n ± 2%   +2.71% (p=0.004 n=20)
NormFloat64-32                 3.761n ± 1%    3.606n ± 1%   -4.15% (p=0.000 n=20)
Perm3-32                       33.55n ± 2%    33.12n ± 2%        ~ (p=0.402 n=20)
Perm30-32                      173.2n ± 1%    176.1n ± 1%   +1.67% (p=0.000 n=20)
Perm30ViaShuffle-32            115.9n ± 1%    109.3n ± 1%   -5.69% (p=0.000 n=20)
ShuffleOverhead-32             101.9n ± 1%    112.5n ± 1%  +10.35% (p=0.000 n=20)
Concurrent-32                  2.107n ± 6%    2.099n ± 0%        ~ (p=0.051 n=20)

goos: darwin
goarch: arm64
pkg: math/rand/v2
cpu: Apple M1
                       │ 2703446c2e.arm64 │          e1bbe739fb.arm64           │
                       │      sec/op      │    sec/op     vs base               │
SourceUint64-8                2.275n ± 0%    2.290n ± 1%       ~ (p=0.044 n=20)
GlobalInt64-8                 2.154n ± 1%    2.180n ± 1%       ~ (p=0.068 n=20)
GlobalInt64Parallel-8        0.4298n ± 0%   0.4294n ± 0%       ~ (p=0.079 n=20)
GlobalUint64-8                2.160n ± 1%    2.170n ± 1%       ~ (p=0.129 n=20)
GlobalUint64Parallel-8       0.4286n ± 0%   0.4283n ± 0%       ~ (p=0.350 n=20)
Int64-8                       2.491n ± 1%    2.481n ± 1%       ~ (p=0.330 n=20)
Uint64-8                      2.458n ± 0%    2.464n ± 1%       ~ (p=0.351 n=20)
GlobalIntN1000-8              2.814n ± 2%    2.814n ± 0%       ~ (p=0.325 n=20)
IntN1000-8                    2.933n ± 0%    2.934n ± 2%       ~ (p=0.079 n=20)
Int64N1000-8                  2.962n ± 1%    2.957n ± 1%       ~ (p=0.259 n=20)
Int64N1e8-8                   2.960n ± 1%    2.935n ± 2%       ~ (p=0.276 n=20)
Int64N1e9-8                   2.935n ± 2%    2.935n ± 2%       ~ (p=0.984 n=20)
Int64N2e9-8                   2.934n ± 0%    2.933n ± 4%       ~ (p=0.463 n=20)
Int64N1e18-8                  3.777n ± 1%    3.781n ± 1%       ~ (p=0.516 n=20)
Int64N2e18-8                  4.359n ± 1%    4.362n ± 0%       ~ (p=0.256 n=20)
Int64N4e18-8                  6.536n ± 1%    6.576n ± 1%       ~ (p=0.224 n=20)
Int32N1000-8                  2.937n ± 0%    2.942n ± 2%       ~ (p=0.312 n=20)
Int32N1e8-8                   2.937n ± 1%    2.941n ± 1%       ~ (p=0.463 n=20)
Int32N1e9-8                   2.936n ± 0%    2.938n ± 2%       ~ (p=0.044 n=20)
Int32N2e9-8                   2.938n ± 2%    2.982n ± 2%       ~ (p=0.174 n=20)
Float32-8                     3.441n ± 0%    3.441n ± 0%       ~ (p=0.064 n=20)
Float64-8                     3.441n ± 0%    3.441n ± 0%       ~ (p=0.826 n=20)
ExpFloat64-8                  4.486n ± 0%    4.472n ± 0%  -0.31% (p=0.000 n=20)
NormFloat64-8                 4.721n ± 0%    4.716n ± 0%       ~ (p=0.051 n=20)
Perm3-8                       26.65n ± 0%    26.66n ± 0%       ~ (p=0.080 n=20)
Perm30-8                      143.2n ± 0%    143.3n ± 0%  +0.10% (p=0.000 n=20)
Perm30ViaShuffle-8            143.0n ± 0%    142.9n ± 0%       ~ (p=0.642 n=20)
ShuffleOverhead-8             120.6n ± 1%    121.1n ± 1%  +0.41% (p=0.010 n=20)
Concurrent-8                  2.399n ± 5%    2.379n ± 2%       ~ (p=0.365 n=20)

goos: linux
goarch: 386
pkg: math/rand/v2
cpu: AMD Ryzen 9 7950X 16-Core Processor
                        │ 2703446c2e.386 │           e1bbe739fb.386            │
                        │     sec/op     │    sec/op     vs base               │
SourceUint64-32             2.072n ±  2%    2.087n ± 1%       ~ (p=0.440 n=20)
GlobalInt64-32              3.546n ± 27%    3.538n ± 2%       ~ (p=0.101 n=20)
GlobalInt64Parallel-32     0.3211n ±  0%   0.3207n ± 1%       ~ (p=0.753 n=20)
GlobalUint64-32             3.522n ±  2%    3.543n ± 1%       ~ (p=0.071 n=20)
GlobalUint64Parallel-32    0.3172n ±  0%   0.3170n ± 0%       ~ (p=0.507 n=20)
Int64-32                    2.520n ±  2%    2.548n ± 1%       ~ (p=0.267 n=20)
Uint64-32                   2.581n ±  1%    2.565n ± 2%       ~ (p=0.143 n=20)
GlobalIntN1000-32           6.171n ±  1%    6.300n ± 1%       ~ (p=0.037 n=20)
IntN1000-32                 4.752n ±  2%    4.750n ± 0%       ~ (p=0.984 n=20)
Int64N1000-32               5.429n ±  1%    5.515n ± 2%       ~ (p=0.292 n=20)
Int64N1e8-32                5.469n ±  2%    5.527n ± 0%       ~ (p=0.013 n=20)
Int64N1e9-32                5.489n ±  2%    5.531n ± 2%       ~ (p=0.256 n=20)
Int64N2e9-32                5.492n ±  2%    5.514n ± 2%       ~ (p=0.606 n=20)
Int64N1e18-32               8.927n ±  1%    9.059n ± 1%       ~ (p=0.229 n=20)
Int64N2e18-32               9.622n ±  1%    9.594n ± 1%       ~ (p=0.703 n=20)
Int64N4e18-32               12.03n ±  1%    12.05n ± 2%       ~ (p=0.733 n=20)
Int32N1000-32               4.817n ±  1%    4.840n ± 2%       ~ (p=0.941 n=20)
Int32N1e8-32                4.801n ±  1%    4.832n ± 2%       ~ (p=0.228 n=20)
Int32N1e9-32                4.798n ±  1%    4.815n ± 2%       ~ (p=0.560 n=20)
Int32N2e9-32                4.840n ±  1%    4.813n ± 1%       ~ (p=0.015 n=20)
Float32-32                  10.51n ±  4%    10.90n ± 2%  +3.71% (p=0.007 n=20)
Float64-32                  20.33n ±  3%    20.32n ± 4%       ~ (p=0.566 n=20)
ExpFloat64-32               12.59n ±  2%    12.95n ± 3%  +2.86% (p=0.002 n=20)
NormFloat64-32              7.350n ±  2%    7.570n ± 1%  +2.99% (p=0.007 n=20)
Perm3-32                    39.29n ±  2%    37.80n ± 2%  -3.79% (p=0.000 n=20)
Perm30-32                   219.1n ±  2%    214.0n ± 1%  -2.33% (p=0.002 n=20)
Perm30ViaShuffle-32         189.8n ±  2%    188.7n ± 2%       ~ (p=0.147 n=20)
ShuffleOverhead-32          158.9n ±  2%    160.8n ± 1%       ~ (p=0.176 n=20)
Concurrent-32               3.306n ±  3%    3.288n ± 0%  -0.54% (p=0.005 n=20)

For #61716.

Change-Id: I4c5fe710b310dc075ae21c97d1805bcc20db5050
Reviewed-on: https://go-review.googlesource.com/c/go/+/516275
Auto-Submit: Russ Cox <rsc@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Reviewed-by: Rob Pike <r@golang.org>
src/math/rand/v2/example_test.go
src/math/rand/v2/exp.go
src/math/rand/v2/normal.go
src/math/rand/v2/regress_test.go

index 590e01d1bb87eb139bd71113c5825097ed51581e..55892097a8d65bf1c094b93acb565954f84f310b 100644 (file)
@@ -83,17 +83,17 @@ func Example_rand() {
        // Perm generates a random permutation of the numbers [0, n).
        show("Perm", r.Perm(5), r.Perm(5), r.Perm(5))
        // Output:
-       // Float32     0.73793465          0.38461488          0.9940225
-       // Float64     0.6919607852308565  0.29140004584133117 0.2262092163027547
-       // ExpFloat64  0.10400903165715357 0.28855743344575835 0.20489656480442942
-       // NormFloat64 -0.5602299711828513 -0.9211692958208376 -1.4262061075859056
-       // Int32       1817075958          91420417            1486590581
-       // Int64       5724354148158589552 5239846799706671610 5927547564735367388
-       // Uint32      2295813601          961197529           3493134579
-       // IntN(10)    4                   5                   1
-       // Int32N(10)  8                   5                   4
-       // Int64N(10)  2                   6                   3
-       // Perm        [3 4 2 1 0]         [4 1 2 0 3]         [0 2 1 3 4]
+       // Float32     0.73793465           0.38461488          0.9940225
+       // Float64     0.6919607852308565   0.29140004584133117 0.2262092163027547
+       // ExpFloat64  0.27263589649304043  1.3214739789908194  2.223639057715668
+       // NormFloat64 -0.09361151905162404 -1.3531915625472757 0.03212053591352371
+       // Int32       1824388269           1817075958          91420417
+       // Int64       3546343826724305832  5724354148158589552 5239846799706671610
+       // Uint32      1380114714           2295813601          961197529
+       // IntN(10)    8                    4                   5
+       // Int32N(10)  1                    8                   5
+       // Int64N(10)  4                    2                   6
+       // Perm        [0 4 1 3 2]          [4 0 1 3 2]         [4 1 3 0 2]
 }
 
 func ExamplePerm() {
index c1162c19b68809520907ac1d0e49155a124c2179..ed7f7277bc591ee1977a50fe4f6d62339e9c64a4 100644 (file)
@@ -29,8 +29,9 @@ const (
 //     sample = ExpFloat64() / desiredRateParameter
 func (r *Rand) ExpFloat64() float64 {
        for {
-               j := r.Uint32()
-               i := j & 0xFF
+               u := r.Uint64()
+               j := uint32(u)
+               i := uint8(u >> 32)
                x := float64(j) * float64(we[i])
                if j < ke[i] {
                        return x
index 6654479a0052cbebff06ca2108147c5f01f49a74..ea1ae409b4cbdf5f5489add24d53f632816f7e9f 100644 (file)
@@ -36,8 +36,9 @@ func absInt32(i int32) uint32 {
 //     sample = NormFloat64() * desiredStdDev + desiredMean
 func (r *Rand) NormFloat64() float64 {
        for {
-               j := int32(r.Uint32()) // Possibly negative
-               i := j & 0x7F
+               u := r.Uint64()
+               j := int32(u) // Possibly negative
+               i := u >> 32 & 0x7F
                x := float64(j) * float64(wn[i])
                if absInt32(j) < kn[i] {
                        // This case should be hit better than 99% of the time.
index 3b886041f7a14db50e403e2dfa33ef854b1b2818..0b9df9b37987a95a361f5f3e48f01e85e5a4e4c0 100644 (file)
@@ -225,26 +225,26 @@ func replace(t *testing.T, file string, new []byte) {
 }
 
 var regressGolden = []any{
-       float64(0.1835616265352068),  // ExpFloat64()
-       float64(0.1747899228736829),  // ExpFloat64()
-       float64(2.369801563222863),   // ExpFloat64()
-       float64(1.8580757676846802),  // ExpFloat64()
-       float64(0.35731123690292155), // ExpFloat64()
-       float64(0.5998175837039783),  // ExpFloat64()
-       float64(0.466149534807967),   // ExpFloat64()
-       float64(1.333748223451787),   // ExpFloat64()
-       float64(0.05019983258513916), // ExpFloat64()
-       float64(1.4143832256421573),  // ExpFloat64()
-       float64(0.7274094466687158),  // ExpFloat64()
-       float64(0.9595398235158843),  // ExpFloat64()
-       float64(1.3010086894917756),  // ExpFloat64()
-       float64(0.8678483737499929),  // ExpFloat64()
-       float64(0.7958895614497015),  // ExpFloat64()
-       float64(0.12235329704897674), // ExpFloat64()
-       float64(1.1625413819613253),  // ExpFloat64()
-       float64(1.2603945934386542),  // ExpFloat64()
-       float64(0.22199446394172706), // ExpFloat64()
-       float64(2.248962105270165),   // ExpFloat64()
+       float64(0.018945741402288857), // ExpFloat64()
+       float64(0.13829043737893842),  // ExpFloat64()
+       float64(1.1409883497761604),   // ExpFloat64()
+       float64(1.2449542292186253),   // ExpFloat64()
+       float64(0.4849966704675476),   // ExpFloat64()
+       float64(0.08948056191408837),  // ExpFloat64()
+       float64(0.41380878045769276),  // ExpFloat64()
+       float64(0.31325729628567145),  // ExpFloat64()
+       float64(0.23118058048615886),  // ExpFloat64()
+       float64(0.2090943007446),      // ExpFloat64()
+       float64(2.6861652769471456),   // ExpFloat64()
+       float64(1.3811947596783387),   // ExpFloat64()
+       float64(1.5595976199841015),   // ExpFloat64()
+       float64(2.3469708688771744),   // ExpFloat64()
+       float64(0.5882760784580738),   // ExpFloat64()
+       float64(0.33463787922271115),  // ExpFloat64()
+       float64(0.8799304551478242),   // ExpFloat64()
+       float64(1.616532211418378),    // ExpFloat64()
+       float64(0.09548420514080316),  // ExpFloat64()
+       float64(2.448910012295588),    // ExpFloat64()
 
        float32(0.39651686),  // Float32()
        float32(0.38516325),  // Float32()
@@ -414,26 +414,26 @@ var regressGolden = []any{
        int64(339542337),           // IntN(1000000000)
        int64(701992307),           // IntN(1073741824)
 
-       float64(0.6694336828657225),  // NormFloat64()
-       float64(0.7506128421991493),  // NormFloat64()
-       float64(-0.5466367925077582), // NormFloat64()
-       float64(-0.8240444698703802), // NormFloat64()
-       float64(0.11563765115029284), // NormFloat64()
-       float64(-1.3442355710948637), // NormFloat64()
-       float64(-1.0654999977586854), // NormFloat64()
-       float64(0.15938628997241455), // NormFloat64()
-       float64(-0.8046314635002316), // NormFloat64()
-       float64(0.8323920113630076),  // NormFloat64()
-       float64(1.0611019472659846),  // NormFloat64()
-       float64(-0.8814992544664111), // NormFloat64()
-       float64(0.9236344788106081),  // NormFloat64()
-       float64(-1.2854378982224413), // NormFloat64()
-       float64(0.4683572952232405),  // NormFloat64()
-       float64(-0.5065217527091702), // NormFloat64()
-       float64(-0.6460803205194869), // NormFloat64()
-       float64(0.7913615856789362),  // NormFloat64()
-       float64(-1.6119549224461807), // NormFloat64()
-       float64(0.16216183438701695), // NormFloat64()
+       float64(0.06909351197715208),  // NormFloat64()
+       float64(0.5938704963270934),   // NormFloat64()
+       float64(1.306028863617345),    // NormFloat64()
+       float64(1.4117443127537266),   // NormFloat64()
+       float64(0.15696085092285333),  // NormFloat64()
+       float64(1.360954184661658),    // NormFloat64()
+       float64(0.34312984093649135),  // NormFloat64()
+       float64(0.7340067314938814),   // NormFloat64()
+       float64(0.22135434353553696),  // NormFloat64()
+       float64(-0.15741313389982836), // NormFloat64()
+       float64(-1.080896970111088),   // NormFloat64()
+       float64(-0.6107370548788273),  // NormFloat64()
+       float64(-2.3550050260853643),  // NormFloat64()
+       float64(1.8363976597396832),   // NormFloat64()
+       float64(-0.7167650947520989),  // NormFloat64()
+       float64(0.6860847654927735),   // NormFloat64()
+       float64(0.3403802538398155),   // NormFloat64()
+       float64(-1.3884780626234523),  // NormFloat64()
+       float64(0.14097321427512907),  // NormFloat64()
+       float64(-1.032800550788109),   // NormFloat64()
 
        []int{},                             // Perm(0)
        []int{0},                            // Perm(1)