hash/maphash: add purego implementation

author Cuong Manh Le <cuong.manhle.vn@gmail.com>

Thu, 16 Feb 2023 07:35:25 +0000 (14:35 +0700)

committer Gopher Robot <gobot@golang.org>

Tue, 28 Feb 2023 17:00:47 +0000 (17:00 +0000)
author Cuong Manh Le <cuong.manhle.vn@gmail.com>
Thu, 16 Feb 2023 07:35:25 +0000 (14:35 +0700)
committer Gopher Robot <gobot@golang.org>
Tue, 28 Feb 2023 17:00:47 +0000 (17:00 +0000)
diff --git a/src/cmd/dist/test.go b/src/cmd/dist/test.go

index a906c0dbdbcfdeadaed12e1a9b77c1a91234545f..a52457efccdc677eef69069095faba39e30e5dd1 100644 (file)
--- a/src/cmd/dist/test.go
+++ b/src/cmd/dist/test.go
@@ -655,6 +655,12 @@ func (t *tester) registerTests() {
                                 tags:    []string{"osusergo"},
                                 pkg:     "os/user",
                         })
+               t.registerTest("purego:hash/maphash", "hash/maphash purego implementation",
+                       &goTest{
+                               timeout: 300 * time.Second,
+                               tags:    []string{"purego"},
+                               pkg:     "hash/maphash",
+                       })
         }
  
         // Test ios/amd64 for the iOS simulator.
diff --git a/src/go/build/deps_test.go b/src/go/build/deps_test.go

index 4ac208685767e7a16cc0933268957338be6cfb7a..36091d5a94c8bf4d748319d95e5493ecde079f2d 100644 (file)
--- a/src/go/build/deps_test.go
+++ b/src/go/build/deps_test.go
@@ -217,7 +217,7 @@ var depsRules = `
         # hashes
         io
         < hash
-       < hash/adler32, hash/crc32, hash/crc64, hash/fnv, hash/maphash;
+       < hash/adler32, hash/crc32, hash/crc64, hash/fnv;
  
         # math/big
         FMT, encoding/binary, math/rand
@@ -472,6 +472,9 @@ var depsRules = `
         crypto/tls
         < net/smtp;
  
+       crypto/rand
+       < hash/maphash; # for purego implementation
+
         # HTTP, King of Dependencies.
  
         FMT
diff --git a/src/hash/maphash/maphash.go b/src/hash/maphash/maphash.go

index 690068a70a29a8b1b32f0ad5b4d6d560c4000563..c2e9e400b9662fb7f7968b6a18a3197f7d46461a 100644 (file)
--- a/src/hash/maphash/maphash.go
+++ b/src/hash/maphash/maphash.go
@@ -12,10 +12,6 @@
  // (See crypto/sha256 and crypto/sha512 for cryptographic use.)
  package maphash
  
-import (
-       "unsafe"
-)
-
  // A Seed is a random value that selects the specific hash function
  // computed by a Hash. If two Hashes use the same Seeds, they
  // will compute the same hash values for any given input.
@@ -44,17 +40,15 @@ func Bytes(seed Seed, b []byte) uint64 {
         if state == 0 {
                 panic("maphash: use of uninitialized Seed")
         }
-       if len(b) == 0 {
-               return rthash(nil, 0, state) // avoid &b[0] index panic below
-       }
+
         if len(b) > bufSize {
                 b = b[:len(b):len(b)] // merge len and cap calculations when reslicing
                 for len(b) > bufSize {
-                       state = rthash(&b[0], bufSize, state)
+                       state = rthash(b[:bufSize], state)
                         b = b[bufSize:]
                 }
         }
-       return rthash(&b[0], len(b), state)
+       return rthash(b, state)
  }
  
  // String returns the hash of s with the given seed.
@@ -71,12 +65,10 @@ func String(seed Seed, s string) uint64 {
                 panic("maphash: use of uninitialized Seed")
         }
         for len(s) > bufSize {
-               p := (*byte)(unsafe.StringData(s))
-               state = rthash(p, bufSize, state)
+               state = rthashString(s[:bufSize], state)
                 s = s[bufSize:]
         }
-       p := (*byte)(unsafe.StringData(s))
-       return rthash(p, len(s), state)
+       return rthashString(s, state)
  }
  
  // A Hash computes a seeded hash of a byte sequence.
@@ -162,7 +154,7 @@ func (h *Hash) Write(b []byte) (int, error) {
         if len(b) > bufSize {
                 h.initSeed()
                 for len(b) > bufSize {
-                       h.state.s = rthash(&b[0], bufSize, h.state.s)
+                       h.state.s = rthash(b[:bufSize], h.state.s)
                         b = b[bufSize:]
                 }
         }
@@ -189,8 +181,7 @@ func (h *Hash) WriteString(s string) (int, error) {
         if len(s) > bufSize {
                 h.initSeed()
                 for len(s) > bufSize {
-                       ptr := (*byte)(unsafe.StringData(s))
-                       h.state.s = rthash(ptr, bufSize, h.state.s)
+                       h.state.s = rthashString(s[:bufSize], h.state.s)
                         s = s[bufSize:]
                 }
         }
@@ -233,7 +224,7 @@ func (h *Hash) flush() {
                 panic("maphash: flush of partially full buffer")
         }
         h.initSeed()
-       h.state.s = rthash(&h.buf[0], h.n, h.state.s)
+       h.state.s = rthash(h.buf[:h.n], h.state.s)
         h.n = 0
  }
  
@@ -246,14 +237,14 @@ func (h *Hash) flush() {
  // by using bit masking, shifting, or modular arithmetic.
  func (h *Hash) Sum64() uint64 {
         h.initSeed()
-       return rthash(&h.buf[0], h.n, h.state.s)
+       return rthash(h.buf[:h.n], h.state.s)
  }
  
  // MakeSeed returns a new random seed.
  func MakeSeed() Seed {
         var s uint64
         for {
-               s = runtime_fastrand64()
+               s = randUint64()
                 // We use seed 0 to indicate an uninitialized seed/hash,
                 // so keep trying until we get a non-zero seed.
                 if s != 0 {
@@ -263,28 +254,6 @@ func MakeSeed() Seed {
         return Seed{s: s}
  }
  
-//go:linkname runtime_fastrand64 runtime.fastrand64
-func runtime_fastrand64() uint64
-
-func rthash(ptr *byte, len int, seed uint64) uint64 {
-       if len == 0 {
-               return seed
-       }
-       // The runtime hasher only works on uintptr. For 64-bit
-       // architectures, we use the hasher directly. Otherwise,
-       // we use two parallel hashers on the lower and upper 32 bits.
-       if unsafe.Sizeof(uintptr(0)) == 8 {
-               return uint64(runtime_memhash(unsafe.Pointer(ptr), uintptr(seed), uintptr(len)))
-       }
-       lo := runtime_memhash(unsafe.Pointer(ptr), uintptr(seed), uintptr(len))
-       hi := runtime_memhash(unsafe.Pointer(ptr), uintptr(seed>>32), uintptr(len))
-       return uint64(hi)<<32 | uint64(lo)
-}
-
-//go:linkname runtime_memhash runtime.memhash
-//go:noescape
-func runtime_memhash(p unsafe.Pointer, seed, s uintptr) uintptr
-
  // Sum appends the hash's current 64-bit value to b.
  // It exists for implementing hash.Hash.
  // For direct calls, it is more efficient to use Sum64.
diff --git a/src/hash/maphash/maphash_purego.go b/src/hash/maphash/maphash_purego.go

new file mode 100644 (file)

index 0000000..d49a44a
--- /dev/null
+++ b/src/hash/maphash/maphash_purego.go
@@ -0,0 +1,104 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build purego
+
+package maphash
+
+import (
+       "crypto/rand"
+       "math/bits"
+)
+
+func rthash(buf []byte, seed uint64) uint64 {
+       if len(buf) == 0 {
+               return seed
+       }
+       return wyhash(buf, seed, uint64(len(buf)))
+}
+
+func rthashString(s string, state uint64) uint64 {
+       return rthash([]byte(s), state)
+}
+
+func randUint64() uint64 {
+       buf := make([]byte, 8)
+       _, _ = rand.Read(buf)
+       return leUint64(buf)
+}
+
+// This is a port of wyhash implementation in runtime/hash64.go,
+// without using unsafe for purego.
+
+const (
+       m1 = 0xa0761d6478bd642f
+       m2 = 0xe7037ed1a0b428db
+       m3 = 0x8ebc6af09c88c6e3
+       m4 = 0x589965cc75374cc3
+       m5 = 0x1d8e4e27c47d124f
+)
+
+func wyhash(key []byte, seed, len uint64) uint64 {
+       p := key
+       i := len
+       var a, b uint64
+       seed ^= m1
+
+       if i > 16 {
+               if i > 48 {
+                       seed1 := seed
+                       seed2 := seed
+                       for ; i > 48; i -= 48 {
+                               seed = mix(r8(p)^m2, r8(p[8:])^seed)
+                               seed1 = mix(r8(p[16:])^m3, r8(p[24:])^seed1)
+                               seed2 = mix(r8(p[32:])^m4, r8(p[40:])^seed2)
+                               p = p[48:]
+                       }
+                       seed ^= seed1 ^ seed2
+               }
+               for ; i > 16; i -= 16 {
+                       seed = mix(r8(p)^m2, r8(p[8:])^seed)
+                       p = p[16:]
+               }
+       }
+       switch {
+       case i == 0:
+               return seed
+       case i < 4:
+               a = r3(p, i)
+       default:
+               n := (i >> 3) << 2
+               a = r4(p)<<32 | r4(p[n:])
+               b = r4(p[i-4:])<<32 | r4(p[i-4-n:])
+       }
+       return mix(m5^len, mix(a^m2, b^seed))
+}
+
+func r3(p []byte, k uint64) uint64 {
+       return (uint64(p[0]) << 16) | (uint64(p[k>>1]) << 8) | uint64(p[k-1])
+}
+
+func r4(p []byte) uint64 {
+       return uint64(leUint32(p))
+}
+
+func r8(p []byte) uint64 {
+       return leUint64(p)
+}
+
+func mix(a, b uint64) uint64 {
+       hi, lo := bits.Mul64(a, b)
+       return hi ^ lo
+}
+
+func leUint32(b []byte) uint32 {
+       _ = b[3] // bounds check hint to compiler; see golang.org/issue/14808
+       return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
+}
+
+func leUint64(b []byte) uint64 {
+       _ = b[7] // bounds check hint to compiler; see golang.org/issue/14808
+       return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
+               uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
+}
diff --git a/src/hash/maphash/maphash_runtime.go b/src/hash/maphash/maphash_runtime.go

new file mode 100644 (file)

index 0000000..98097ff
--- /dev/null
+++ b/src/hash/maphash/maphash_runtime.go
@@ -0,0 +1,43 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !purego
+
+package maphash
+
+import (
+       "unsafe"
+)
+
+//go:linkname runtime_fastrand64 runtime.fastrand64
+func runtime_fastrand64() uint64
+
+//go:linkname runtime_memhash runtime.memhash
+//go:noescape
+func runtime_memhash(p unsafe.Pointer, seed, s uintptr) uintptr
+
+func rthash(buf []byte, seed uint64) uint64 {
+       if len(buf) == 0 {
+               return seed
+       }
+       len := len(buf)
+       // The runtime hasher only works on uintptr. For 64-bit
+       // architectures, we use the hasher directly. Otherwise,
+       // we use two parallel hashers on the lower and upper 32 bits.
+       if unsafe.Sizeof(uintptr(0)) == 8 {
+               return uint64(runtime_memhash(unsafe.Pointer(&buf[0]), uintptr(seed), uintptr(len)))
+       }
+       lo := runtime_memhash(unsafe.Pointer(&buf[0]), uintptr(seed), uintptr(len))
+       hi := runtime_memhash(unsafe.Pointer(&buf[0]), uintptr(seed>>32), uintptr(len))
+       return uint64(hi)<<32 | uint64(lo)
+}
+
+func rthashString(s string, state uint64) uint64 {
+       buf := unsafe.Slice(unsafe.StringData(s), len(s))
+       return rthash(buf, state)
+}
+
+func randUint64() uint64 {
+       return runtime_fastrand64()
+}
author	Cuong Manh Le <cuong.manhle.vn@gmail.com>
	Thu, 16 Feb 2023 07:35:25 +0000 (14:35 +0700)
committer	Gopher Robot <gobot@golang.org>
	Tue, 28 Feb 2023 17:00:47 +0000 (17:00 +0000)
src/cmd/dist/test.go		patch \| blob \| history
src/go/build/deps_test.go		patch \| blob \| history
src/hash/maphash/maphash.go		patch \| blob \| history
src/hash/maphash/maphash_purego.go	[new file with mode: 0644]	patch \| blob
src/hash/maphash/maphash_runtime.go	[new file with mode: 0644]	patch \| blob