runtime: implement experiment to replace heap bitmap with alloc headers

[gostls13.git] / src / runtime / string.go
diff --git a/src/runtime/string.go b/src/runtime/string.go

index 5dc7e0295af770a108df6534d0b0846850866a76..e01b7fc74448e5c41ed9c146f70e1d86e42cef26 100644 (file)
--- a/src/runtime/string.go
+++ b/src/runtime/string.go
@@ -5,7 +5,9 @@
  package runtime
  
  import (
-       "runtime/internal/atomic"
+       "internal/abi"
+       "internal/bytealg"
+       "internal/goarch"
         "unsafe"
  )
  
@@ -47,58 +49,76 @@ func concatstrings(buf *tmpBuf, a []string) string {
                 return a[idx]
         }
         s, b := rawstringtmp(buf, l)
-       l = 0
         for _, x := range a {
-               copy(b[l:], x)
-               l += len(x)
+               copy(b, x)
+               b = b[len(x):]
         }
         return s
  }
  
-func concatstring2(buf *tmpBuf, a [2]string) string {
-       return concatstrings(buf, a[:])
+func concatstring2(buf *tmpBuf, a0, a1 string) string {
+       return concatstrings(buf, []string{a0, a1})
  }
  
-func concatstring3(buf *tmpBuf, a [3]string) string {
-       return concatstrings(buf, a[:])
+func concatstring3(buf *tmpBuf, a0, a1, a2 string) string {
+       return concatstrings(buf, []string{a0, a1, a2})
  }
  
-func concatstring4(buf *tmpBuf, a [4]string) string {
-       return concatstrings(buf, a[:])
+func concatstring4(buf *tmpBuf, a0, a1, a2, a3 string) string {
+       return concatstrings(buf, []string{a0, a1, a2, a3})
  }
  
-func concatstring5(buf *tmpBuf, a [5]string) string {
-       return concatstrings(buf, a[:])
+func concatstring5(buf *tmpBuf, a0, a1, a2, a3, a4 string) string {
+       return concatstrings(buf, []string{a0, a1, a2, a3, a4})
  }
  
+// slicebytetostring converts a byte slice to a string.
+// It is inserted by the compiler into generated code.
+// ptr is a pointer to the first element of the slice;
+// n is the length of the slice.
  // Buf is a fixed-size buffer for the result,
  // it is not nil if the result does not escape.
-func slicebytetostring(buf *tmpBuf, b []byte) string {
-       l := len(b)
-       if l == 0 {
+func slicebytetostring(buf *tmpBuf, ptr *byte, n int) string {
+       if n == 0 {
                 // Turns out to be a relatively common case.
                 // Consider that you want to parse out data between parens in "foo()bar",
                 // you find the indices and convert the subslice to string.
                 return ""
         }
-       if raceenabled && l > 0 {
-               racereadrangepc(unsafe.Pointer(&b[0]),
-                       uintptr(l),
-                       getcallerpc(unsafe.Pointer(&buf)),
-                       funcPC(slicebytetostring))
+       if raceenabled {
+               racereadrangepc(unsafe.Pointer(ptr),
+                       uintptr(n),
+                       getcallerpc(),
+                       abi.FuncPCABIInternal(slicebytetostring))
         }
-       if msanenabled && l > 0 {
-               msanread(unsafe.Pointer(&b[0]), uintptr(l))
+       if msanenabled {
+               msanread(unsafe.Pointer(ptr), uintptr(n))
         }
-       s, c := rawstringtmp(buf, l)
-       copy(c, b)
-       return s
+       if asanenabled {
+               asanread(unsafe.Pointer(ptr), uintptr(n))
+       }
+       if n == 1 {
+               p := unsafe.Pointer(&staticuint64s[*ptr])
+               if goarch.BigEndian {
+                       p = add(p, 7)
+               }
+               return unsafe.String((*byte)(p), 1)
+       }
+
+       var p unsafe.Pointer
+       if buf != nil && n <= len(buf) {
+               p = unsafe.Pointer(buf)
+       } else {
+               p = mallocgc(uintptr(n), nil, false)
+       }
+       memmove(p, unsafe.Pointer(ptr), uintptr(n))
+       return unsafe.String((*byte)(p), n)
  }
  
  // stringDataOnStack reports whether the string's data is
  // stored on the current goroutine's stack.
  func stringDataOnStack(s string) bool {
-       ptr := uintptr(stringStructOf(&s).str)
+       ptr := uintptr(unsafe.Pointer(unsafe.StringData(s)))
         stk := getg().stack
         return stk.lo <= ptr && ptr < stk.hi
  }
@@ -106,40 +126,48 @@ func stringDataOnStack(s string) bool {
  func rawstringtmp(buf *tmpBuf, l int) (s string, b []byte) {
         if buf != nil && l <= len(buf) {
                 b = buf[:l]
-               s = slicebytetostringtmp(b)
+               s = slicebytetostringtmp(&b[0], len(b))
         } else {
                 s, b = rawstring(l)
         }
         return
  }
  
-func slicebytetostringtmp(b []byte) string {
-       // Return a "string" referring to the actual []byte bytes.
-       // This is only for use by internal compiler optimizations
-       // that know that the string form will be discarded before
-       // the calling goroutine could possibly modify the original
-       // slice or synchronize with another goroutine.
-       // First such case is a m[string(k)] lookup where
-       // m is a string-keyed map and k is a []byte.
-       // Second such case is "<"+string(b)+">" concatenation where b is []byte.
-       // Third such case is string(b)=="foo" comparison where b is []byte.
-
-       if raceenabled && len(b) > 0 {
-               racereadrangepc(unsafe.Pointer(&b[0]),
-                       uintptr(len(b)),
-                       getcallerpc(unsafe.Pointer(&b)),
-                       funcPC(slicebytetostringtmp))
-       }
-       if msanenabled && len(b) > 0 {
-               msanread(unsafe.Pointer(&b[0]), uintptr(len(b)))
-       }
-       return *(*string)(unsafe.Pointer(&b))
+// slicebytetostringtmp returns a "string" referring to the actual []byte bytes.
+//
+// Callers need to ensure that the returned string will not be used after
+// the calling goroutine modifies the original slice or synchronizes with
+// another goroutine.
+//
+// The function is only called when instrumenting
+// and otherwise intrinsified by the compiler.
+//
+// Some internal compiler optimizations use this function.
+//   - Used for m[T1{... Tn{..., string(k), ...} ...}] and m[string(k)]
+//     where k is []byte, T1 to Tn is a nesting of struct and array literals.
+//   - Used for "<"+string(b)+">" concatenation where b is []byte.
+//   - Used for string(b)=="foo" comparison where b is []byte.
+func slicebytetostringtmp(ptr *byte, n int) string {
+       if raceenabled && n > 0 {
+               racereadrangepc(unsafe.Pointer(ptr),
+                       uintptr(n),
+                       getcallerpc(),
+                       abi.FuncPCABIInternal(slicebytetostringtmp))
+       }
+       if msanenabled && n > 0 {
+               msanread(unsafe.Pointer(ptr), uintptr(n))
+       }
+       if asanenabled && n > 0 {
+               asanread(unsafe.Pointer(ptr), uintptr(n))
+       }
+       return unsafe.String(ptr, n)
  }
  
  func stringtoslicebyte(buf *tmpBuf, s string) []byte {
         var b []byte
         if buf != nil && len(s) <= len(buf) {
-               b = buf[:len(s):len(s)]
+               *buf = tmpBuf{}
+               b = buf[:len(s)]
         } else {
                 b = rawbyteslice(len(s))
         }
@@ -147,38 +175,24 @@ func stringtoslicebyte(buf *tmpBuf, s string) []byte {
         return b
  }
  
-func stringtoslicebytetmp(s string) []byte {
-       // Return a slice referring to the actual string bytes.
-       // This is only for use by internal compiler optimizations
-       // that know that the slice won't be mutated.
-       // The only such case today is:
-       // for i, c := range []byte(str)
-
-       str := stringStructOf(&s)
-       ret := slice{array: unsafe.Pointer(str.str), len: str.len, cap: str.len}
-       return *(*[]byte)(unsafe.Pointer(&ret))
-}
-
  func stringtoslicerune(buf *[tmpStringBufSize]rune, s string) []rune {
         // two passes.
         // unlike slicerunetostring, no race because strings are immutable.
         n := 0
-       t := s
-       for len(s) > 0 {
-               _, k := charntorune(s)
-               s = s[k:]
+       for range s {
                 n++
         }
+
         var a []rune
         if buf != nil && n <= len(buf) {
-               a = buf[:n:n]
+               *buf = [tmpStringBufSize]rune{}
+               a = buf[:n]
         } else {
                 a = rawruneslice(n)
         }
+
         n = 0
-       for len(t) > 0 {
-               r, k := charntorune(t)
-               t = t[k:]
+       for _, r := range s {
                 a[n] = r
                 n++
         }
@@ -189,16 +203,19 @@ func slicerunetostring(buf *tmpBuf, a []rune) string {
         if raceenabled && len(a) > 0 {
                 racereadrangepc(unsafe.Pointer(&a[0]),
                         uintptr(len(a))*unsafe.Sizeof(a[0]),
-                       getcallerpc(unsafe.Pointer(&buf)),
-                       funcPC(slicerunetostring))
+                       getcallerpc(),
+                       abi.FuncPCABIInternal(slicerunetostring))
         }
         if msanenabled && len(a) > 0 {
                 msanread(unsafe.Pointer(&a[0]), uintptr(len(a))*unsafe.Sizeof(a[0]))
         }
+       if asanenabled && len(a) > 0 {
+               asanread(unsafe.Pointer(&a[0]), uintptr(len(a))*unsafe.Sizeof(a[0]))
+       }
         var dum [4]byte
         size1 := 0
         for _, r := range a {
-               size1 += runetochar(dum[:], r)
+               size1 += encoderune(dum[:], r)
         }
         s, b := rawstringtmp(buf, size1+3)
         size2 := 0
@@ -207,7 +224,7 @@ func slicerunetostring(buf *tmpBuf, a []rune) string {
                 if size2 >= size1 {
                         break
                 }
-               size2 += runetochar(b[size2:], r)
+               size2 += encoderune(b[size2:], r)
         }
         return s[:size2]
  }
@@ -227,53 +244,19 @@ func stringStructOf(sp *string) *stringStruct {
         return (*stringStruct)(unsafe.Pointer(sp))
  }
  
-func intstring(buf *[4]byte, v int64) string {
-       var s string
+func intstring(buf *[4]byte, v int64) (s string) {
         var b []byte
         if buf != nil {
                 b = buf[:]
-               s = slicebytetostringtmp(b)
+               s = slicebytetostringtmp(&b[0], len(b))
         } else {
                 s, b = rawstring(4)
         }
-       n := runetochar(b, rune(v))
-       return s[:n]
-}
-
-// stringiter returns the index of the next
-// rune after the rune that starts at s[k].
-func stringiter(s string, k int) int {
-       if k >= len(s) {
-               // 0 is end of iteration
-               return 0
-       }
-
-       c := s[k]
-       if c < runeself {
-               return k + 1
+       if int64(rune(v)) != v {
+               v = runeError
         }
-
-       // multi-char rune
-       _, n := charntorune(s[k:])
-       return k + n
-}
-
-// stringiter2 returns the rune that starts at s[k]
-// and the index where the next rune starts.
-func stringiter2(s string, k int) (int, rune) {
-       if k >= len(s) {
-               // 0 is end of iteration
-               return 0, 0
-       }
-
-       c := s[k]
-       if c < runeself {
-               return k + 1, rune(c)
-       }
-
-       // multi-char rune
-       r, n := charntorune(s[k:])
-       return k + n, r
+       n := encoderune(b, rune(v))
+       return s[:n]
  }
  
  // rawstring allocates storage for a new string. The returned
@@ -281,27 +264,16 @@ func stringiter2(s string, k int) (int, rune) {
  // The storage is not zeroed. Callers should use
  // b to set the string contents and then drop b.
  func rawstring(size int) (s string, b []byte) {
-       p := mallocgc(uintptr(size), nil, flagNoScan|flagNoZero)
-
-       stringStructOf(&s).str = p
-       stringStructOf(&s).len = size
-
-       *(*slice)(unsafe.Pointer(&b)) = slice{p, size, size}
-
-       for {
-               ms := maxstring
-               if uintptr(size) <= uintptr(ms) || atomic.Casuintptr((*uintptr)(unsafe.Pointer(&maxstring)), uintptr(ms), uintptr(size)) {
-                       return
-               }
-       }
+       p := mallocgc(uintptr(size), nil, false)
+       return unsafe.String((*byte)(p), size), unsafe.Slice((*byte)(p), size)
  }
  
  // rawbyteslice allocates a new byte slice. The byte slice is not zeroed.
  func rawbyteslice(size int) (b []byte) {
-       cap := roundupsize(uintptr(size))
-       p := mallocgc(cap, nil, flagNoScan|flagNoZero)
+       cap := roundupsize(uintptr(size), true)
+       p := mallocgc(cap, nil, false)
         if cap != uintptr(size) {
-               memclr(add(p, uintptr(size)), cap-uintptr(size))
+               memclrNoHeapPointers(add(p, uintptr(size)), cap-uintptr(size))
         }
  
         *(*slice)(unsafe.Pointer(&b)) = slice{p, size, int(cap)}
@@ -310,13 +282,13 @@ func rawbyteslice(size int) (b []byte) {
  
  // rawruneslice allocates a new rune slice. The rune slice is not zeroed.
  func rawruneslice(size int) (b []rune) {
-       if uintptr(size) > _MaxMem/4 {
+       if uintptr(size) > maxAlloc/4 {
                 throw("out of memory")
         }
-       mem := roundupsize(uintptr(size) * 4)
-       p := mallocgc(mem, nil, flagNoScan|flagNoZero)
+       mem := roundupsize(uintptr(size)*4, true)
+       p := mallocgc(mem, nil, false)
         if mem != uintptr(size)*4 {
-               memclr(add(p, uintptr(size)*4), mem-uintptr(size)*4)
+               memclrNoHeapPointers(add(p, uintptr(size)*4), mem-uintptr(size)*4)
         }
  
         *(*slice)(unsafe.Pointer(&b)) = slice{p, size, int(mem / 4)}
@@ -324,15 +296,25 @@ func rawruneslice(size int) (b []rune) {
  }
  
  // used by cmd/cgo
-func gobytes(p *byte, n int) []byte {
+func gobytes(p *byte, n int) (b []byte) {
         if n == 0 {
                 return make([]byte, 0)
         }
-       x := make([]byte, n)
-       memmove(unsafe.Pointer(&x[0]), unsafe.Pointer(p), uintptr(n))
-       return x
+
+       if n < 0 || uintptr(n) > maxAlloc {
+               panic(errorString("gobytes: length out of range"))
+       }
+
+       bp := mallocgc(uintptr(n), nil, false)
+       memmove(bp, unsafe.Pointer(p), uintptr(n))
+
+       *(*slice)(unsafe.Pointer(&b)) = slice{bp, n, n}
+       return
  }
  
+// This is exported via linkname to assembly in syscall (for Plan9).
+//
+//go:linkname gostring
  func gostring(p *byte) string {
         l := findnull(p)
         if l == 0 {
@@ -343,6 +325,13 @@ func gostring(p *byte) string {
         return s
  }
  
+// internal_syscall_gostring is a version of gostring for internal/syscall/unix.
+//
+//go:linkname internal_syscall_gostring internal/syscall/unix.gostring
+func internal_syscall_gostring(p *byte) string {
+       return gostring(p)
+}
+
  func gostringn(p *byte, l int) string {
         if l == 0 {
                 return ""
@@ -352,33 +341,167 @@ func gostringn(p *byte, l int) string {
         return s
  }
  
-func index(s, t string) int {
-       if len(t) == 0 {
-               return 0
+func hasPrefix(s, prefix string) bool {
+       return len(s) >= len(prefix) && s[:len(prefix)] == prefix
+}
+
+func hasSuffix(s, suffix string) bool {
+       return len(s) >= len(suffix) && s[len(s)-len(suffix):] == suffix
+}
+
+const (
+       maxUint64 = ^uint64(0)
+       maxInt64  = int64(maxUint64 >> 1)
+)
+
+// atoi64 parses an int64 from a string s.
+// The bool result reports whether s is a number
+// representable by a value of type int64.
+func atoi64(s string) (int64, bool) {
+       if s == "" {
+               return 0, false
+       }
+
+       neg := false
+       if s[0] == '-' {
+               neg = true
+               s = s[1:]
         }
+
+       un := uint64(0)
         for i := 0; i < len(s); i++ {
-               if s[i] == t[0] && hasprefix(s[i:], t) {
-                       return i
+               c := s[i]
+               if c < '0' || c > '9' {
+                       return 0, false
+               }
+               if un > maxUint64/10 {
+                       // overflow
+                       return 0, false
                 }
+               un *= 10
+               un1 := un + uint64(c) - '0'
+               if un1 < un {
+                       // overflow
+                       return 0, false
+               }
+               un = un1
+       }
+
+       if !neg && un > uint64(maxInt64) {
+               return 0, false
         }
-       return -1
+       if neg && un > uint64(maxInt64)+1 {
+               return 0, false
+       }
+
+       n := int64(un)
+       if neg {
+               n = -n
+       }
+
+       return n, true
  }
  
-func contains(s, t string) bool {
-       return index(s, t) >= 0
+// atoi is like atoi64 but for integers
+// that fit into an int.
+func atoi(s string) (int, bool) {
+       if n, ok := atoi64(s); n == int64(int(n)) {
+               return int(n), ok
+       }
+       return 0, false
  }
  
-func hasprefix(s, t string) bool {
-       return len(s) >= len(t) && s[:len(t)] == t
+// atoi32 is like atoi but for integers
+// that fit into an int32.
+func atoi32(s string) (int32, bool) {
+       if n, ok := atoi64(s); n == int64(int32(n)) {
+               return int32(n), ok
+       }
+       return 0, false
  }
  
-func atoi(s string) int {
-       n := 0
-       for len(s) > 0 && '0' <= s[0] && s[0] <= '9' {
-               n = n*10 + int(s[0]) - '0'
-               s = s[1:]
+// parseByteCount parses a string that represents a count of bytes.
+//
+// s must match the following regular expression:
+//
+//     ^[0-9]+(([KMGT]i)?B)?$
+//
+// In other words, an integer byte count with an optional unit
+// suffix. Acceptable suffixes include one of
+// - KiB, MiB, GiB, TiB which represent binary IEC/ISO 80000 units, or
+// - B, which just represents bytes.
+//
+// Returns an int64 because that's what its callers want and receive,
+// but the result is always non-negative.
+func parseByteCount(s string) (int64, bool) {
+       // The empty string is not valid.
+       if s == "" {
+               return 0, false
         }
-       return n
+       // Handle the easy non-suffix case.
+       last := s[len(s)-1]
+       if last >= '0' && last <= '9' {
+               n, ok := atoi64(s)
+               if !ok || n < 0 {
+                       return 0, false
+               }
+               return n, ok
+       }
+       // Failing a trailing digit, this must always end in 'B'.
+       // Also at this point there must be at least one digit before
+       // that B.
+       if last != 'B' || len(s) < 2 {
+               return 0, false
+       }
+       // The one before that must always be a digit or 'i'.
+       if c := s[len(s)-2]; c >= '0' && c <= '9' {
+               // Trivial 'B' suffix.
+               n, ok := atoi64(s[:len(s)-1])
+               if !ok || n < 0 {
+                       return 0, false
+               }
+               return n, ok
+       } else if c != 'i' {
+               return 0, false
+       }
+       // Finally, we need at least 4 characters now, for the unit
+       // prefix and at least one digit.
+       if len(s) < 4 {
+               return 0, false
+       }
+       power := 0
+       switch s[len(s)-3] {
+       case 'K':
+               power = 1
+       case 'M':
+               power = 2
+       case 'G':
+               power = 3
+       case 'T':
+               power = 4
+       default:
+               // Invalid suffix.
+               return 0, false
+       }
+       m := uint64(1)
+       for i := 0; i < power; i++ {
+               m *= 1024
+       }
+       n, ok := atoi64(s[:len(s)-3])
+       if !ok || n < 0 {
+               return 0, false
+       }
+       un := uint64(n)
+       if un > maxUint64/m {
+               // Overflow.
+               return 0, false
+       }
+       un *= m
+       if un > uint64(maxInt64) {
+               // Overflow.
+               return 0, false
+       }
+       return int64(un), true
  }
  
  //go:nosplit
@@ -386,19 +509,50 @@ func findnull(s *byte) int {
         if s == nil {
                 return 0
         }
-       p := (*[_MaxMem/2 - 1]byte)(unsafe.Pointer(s))
-       l := 0
-       for p[l] != 0 {
-               l++
+
+       // Avoid IndexByteString on Plan 9 because it uses SSE instructions
+       // on x86 machines, and those are classified as floating point instructions,
+       // which are illegal in a note handler.
+       if GOOS == "plan9" {
+               p := (*[maxAlloc/2 - 1]byte)(unsafe.Pointer(s))
+               l := 0
+               for p[l] != 0 {
+                       l++
+               }
+               return l
+       }
+
+       // pageSize is the unit we scan at a time looking for NULL.
+       // It must be the minimum page size for any architecture Go
+       // runs on. It's okay (just a minor performance loss) if the
+       // actual system page size is larger than this value.
+       const pageSize = 4096
+
+       offset := 0
+       ptr := unsafe.Pointer(s)
+       // IndexByteString uses wide reads, so we need to be careful
+       // with page boundaries. Call IndexByteString on
+       // [ptr, endOfPage) interval.
+       safeLen := int(pageSize - uintptr(ptr)%pageSize)
+
+       for {
+               t := *(*string)(unsafe.Pointer(&stringStruct{ptr, safeLen}))
+               // Check one page at a time.
+               if i := bytealg.IndexByteString(t, 0); i != -1 {
+                       return offset + i
+               }
+               // Move to next page
+               ptr = unsafe.Pointer(uintptr(ptr) + uintptr(safeLen))
+               offset += safeLen
+               safeLen = pageSize
         }
-       return l
  }
  
  func findnullw(s *uint16) int {
         if s == nil {
                 return 0
         }
-       p := (*[_MaxMem/2/2 - 1]uint16)(unsafe.Pointer(s))
+       p := (*[maxAlloc/2/2 - 1]uint16)(unsafe.Pointer(s))
         l := 0
         for p[l] != 0 {
                 l++
@@ -406,27 +560,19 @@ func findnullw(s *uint16) int {
         return l
  }
  
-var maxstring uintptr = 256 // a hint for print
-
  //go:nosplit
  func gostringnocopy(str *byte) string {
         ss := stringStruct{str: unsafe.Pointer(str), len: findnull(str)}
         s := *(*string)(unsafe.Pointer(&ss))
-       for {
-               ms := maxstring
-               if uintptr(len(s)) <= ms || atomic.Casuintptr(&maxstring, ms, uintptr(len(s))) {
-                       break
-               }
-       }
         return s
  }
  
  func gostringw(strw *uint16) string {
         var buf [8]byte
-       str := (*[_MaxMem/2/2 - 1]uint16)(unsafe.Pointer(strw))
+       str := (*[maxAlloc/2/2 - 1]uint16)(unsafe.Pointer(strw))
         n1 := 0
         for i := 0; str[i] != 0; i++ {
-               n1 += runetochar(buf[:], rune(str[i]))
+               n1 += encoderune(buf[:], rune(str[i]))
         }
         s, b := rawstring(n1 + 4)
         n2 := 0
@@ -435,7 +581,7 @@ func gostringw(strw *uint16) string {
                 if n2 >= n1 {
                         break
                 }
-               n2 += runetochar(b[n2:], rune(str[i]))
+               n2 += encoderune(b[n2:], rune(str[i]))
         }
         b[n2] = 0 // for luck
         return s[:n2]