src/runtime/string.go

   1 // Copyright 2014 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package runtime
   6
   7 import (
   8         "internal/bytealg"
   9         "runtime/internal/sys"
  10         "unsafe"
  11 )
  12
  13 // The constant is known to the compiler.
  14 // There is no fundamental theory behind this number.
  15 const tmpStringBufSize = 32
  16
  17 type tmpBuf [tmpStringBufSize]byte
  18
  19 // concatstrings implements a Go string concatenation x+y+z+...
  20 // The operands are passed in the slice a.
  21 // If buf != nil, the compiler has determined that the result does not
  22 // escape the calling function, so the string data can be stored in buf
  23 // if small enough.
  24 func concatstrings(buf *tmpBuf, a []string) string {
  25         idx := 0
  26         l := 0
  27         count := 0
  28         for i, x := range a {
  29                 n := len(x)
  30                 if n == 0 {
  31                         continue
  32                 }
  33                 if l+n < l {
  34                         throw("string concatenation too long")
  35                 }
  36                 l += n
  37                 count++
  38                 idx = i
  39         }
  40         if count == 0 {
  41                 return ""
  42         }
  43
  44         // If there is just one string and either it is not on the stack
  45         // or our result does not escape the calling frame (buf != nil),
  46         // then we can return that string directly.
  47         if count == 1 && (buf != nil || !stringDataOnStack(a[idx])) {
  48                 return a[idx]
  49         }
  50         s, b := rawstringtmp(buf, l)
  51         for _, x := range a {
  52                 copy(b, x)
  53                 b = b[len(x):]
  54         }
  55         return s
  56 }
  57
  58 func concatstring2(buf *tmpBuf, a [2]string) string {
  59         return concatstrings(buf, a[:])
  60 }
  61
  62 func concatstring3(buf *tmpBuf, a [3]string) string {
  63         return concatstrings(buf, a[:])
  64 }
  65
  66 func concatstring4(buf *tmpBuf, a [4]string) string {
  67         return concatstrings(buf, a[:])
  68 }
  69
  70 func concatstring5(buf *tmpBuf, a [5]string) string {
  71         return concatstrings(buf, a[:])
  72 }
  73
  74 // Buf is a fixed-size buffer for the result,
  75 // it is not nil if the result does not escape.
  76 func slicebytetostring(buf *tmpBuf, b []byte) (str string) {
  77         l := len(b)
  78         if l == 0 {
  79                 // Turns out to be a relatively common case.
  80                 // Consider that you want to parse out data between parens in "foo()bar",
  81                 // you find the indices and convert the subslice to string.
  82                 return ""
  83         }
  84         if raceenabled {
  85                 racereadrangepc(unsafe.Pointer(&b[0]),
  86                         uintptr(l),
  87                         getcallerpc(),
  88                         funcPC(slicebytetostring))
  89         }
  90         if msanenabled {
  91                 msanread(unsafe.Pointer(&b[0]), uintptr(l))
  92         }
  93         if l == 1 {
  94                 p := unsafe.Pointer(&staticuint64s[b[0]])
  95                 if sys.BigEndian {
  96                         p = add(p, 7)
  97                 }
  98                 stringStructOf(&str).str = p
  99                 stringStructOf(&str).len = 1
 100                 return
 101         }
 102
 103         var p unsafe.Pointer
 104         if buf != nil && len(b) <= len(buf) {
 105                 p = unsafe.Pointer(buf)
 106         } else {
 107                 p = mallocgc(uintptr(len(b)), nil, false)
 108         }
 109         stringStructOf(&str).str = p
 110         stringStructOf(&str).len = len(b)
 111         memmove(p, (*(*slice)(unsafe.Pointer(&b))).array, uintptr(len(b)))
 112         return
 113 }
 114
 115 // stringDataOnStack reports whether the string's data is
 116 // stored on the current goroutine's stack.
 117 func stringDataOnStack(s string) bool {
 118         ptr := uintptr(stringStructOf(&s).str)
 119         stk := getg().stack
 120         return stk.lo <= ptr && ptr < stk.hi
 121 }
 122
 123 func rawstringtmp(buf *tmpBuf, l int) (s string, b []byte) {
 124         if buf != nil && l <= len(buf) {
 125                 b = buf[:l]
 126                 s = slicebytetostringtmp(b)
 127         } else {
 128                 s, b = rawstring(l)
 129         }
 130         return
 131 }
 132
 133 // slicebytetostringtmp returns a "string" referring to the actual []byte bytes.
 134 //
 135 // Callers need to ensure that the returned string will not be used after
 136 // the calling goroutine modifies the original slice or synchronizes with
 137 // another goroutine.
 138 //
 139 // The function is only called when instrumenting
 140 // and otherwise intrinsified by the compiler.
 141 //
 142 // Some internal compiler optimizations use this function.
 143 // - Used for m[T1{... Tn{..., string(k), ...} ...}] and m[string(k)]
 144 //   where k is []byte, T1 to Tn is a nesting of struct and array literals.
 145 // - Used for "<"+string(b)+">" concatenation where b is []byte.
 146 // - Used for string(b)=="foo" comparison where b is []byte.
 147 func slicebytetostringtmp(b []byte) string {
 148         if raceenabled && len(b) > 0 {
 149                 racereadrangepc(unsafe.Pointer(&b[0]),
 150                         uintptr(len(b)),
 151                         getcallerpc(),
 152                         funcPC(slicebytetostringtmp))
 153         }
 154         if msanenabled && len(b) > 0 {
 155                 msanread(unsafe.Pointer(&b[0]), uintptr(len(b)))
 156         }
 157         return *(*string)(unsafe.Pointer(&b))
 158 }
 159
 160 func stringtoslicebyte(buf *tmpBuf, s string) []byte {
 161         var b []byte
 162         if buf != nil && len(s) <= len(buf) {
 163                 *buf = tmpBuf{}
 164                 b = buf[:len(s)]
 165         } else {
 166                 b = rawbyteslice(len(s))
 167         }
 168         copy(b, s)
 169         return b
 170 }
 171
 172 func stringtoslicerune(buf *[tmpStringBufSize]rune, s string) []rune {
 173         // two passes.
 174         // unlike slicerunetostring, no race because strings are immutable.
 175         n := 0
 176         for range s {
 177                 n++
 178         }
 179
 180         var a []rune
 181         if buf != nil && n <= len(buf) {
 182                 *buf = [tmpStringBufSize]rune{}
 183                 a = buf[:n]
 184         } else {
 185                 a = rawruneslice(n)
 186         }
 187
 188         n = 0
 189         for _, r := range s {
 190                 a[n] = r
 191                 n++
 192         }
 193         return a
 194 }
 195
 196 func slicerunetostring(buf *tmpBuf, a []rune) string {
 197         if raceenabled && len(a) > 0 {
 198                 racereadrangepc(unsafe.Pointer(&a[0]),
 199                         uintptr(len(a))*unsafe.Sizeof(a[0]),
 200                         getcallerpc(),
 201                         funcPC(slicerunetostring))
 202         }
 203         if msanenabled && len(a) > 0 {
 204                 msanread(unsafe.Pointer(&a[0]), uintptr(len(a))*unsafe.Sizeof(a[0]))
 205         }
 206         var dum [4]byte
 207         size1 := 0
 208         for _, r := range a {
 209                 size1 += encoderune(dum[:], r)
 210         }
 211         s, b := rawstringtmp(buf, size1+3)
 212         size2 := 0
 213         for _, r := range a {
 214                 // check for race
 215                 if size2 >= size1 {
 216                         break
 217                 }
 218                 size2 += encoderune(b[size2:], r)
 219         }
 220         return s[:size2]
 221 }
 222
 223 type stringStruct struct {
 224         str unsafe.Pointer
 225         len int
 226 }
 227
 228 // Variant with *byte pointer type for DWARF debugging.
 229 type stringStructDWARF struct {
 230         str *byte
 231         len int
 232 }
 233
 234 func stringStructOf(sp *string) *stringStruct {
 235         return (*stringStruct)(unsafe.Pointer(sp))
 236 }
 237
 238 func intstring(buf *[4]byte, v int64) (s string) {
 239         var b []byte
 240         if buf != nil {
 241                 b = buf[:]
 242                 s = slicebytetostringtmp(b)
 243         } else {
 244                 s, b = rawstring(4)
 245         }
 246         if int64(rune(v)) != v {
 247                 v = runeError
 248         }
 249         n := encoderune(b, rune(v))
 250         return s[:n]
 251 }
 252
 253 // rawstring allocates storage for a new string. The returned
 254 // string and byte slice both refer to the same storage.
 255 // The storage is not zeroed. Callers should use
 256 // b to set the string contents and then drop b.
 257 func rawstring(size int) (s string, b []byte) {
 258         p := mallocgc(uintptr(size), nil, false)
 259
 260         stringStructOf(&s).str = p
 261         stringStructOf(&s).len = size
 262
 263         *(*slice)(unsafe.Pointer(&b)) = slice{p, size, size}
 264
 265         return
 266 }
 267
 268 // rawbyteslice allocates a new byte slice. The byte slice is not zeroed.
 269 func rawbyteslice(size int) (b []byte) {
 270         cap := roundupsize(uintptr(size))
 271         p := mallocgc(cap, nil, false)
 272         if cap != uintptr(size) {
 273                 memclrNoHeapPointers(add(p, uintptr(size)), cap-uintptr(size))
 274         }
 275
 276         *(*slice)(unsafe.Pointer(&b)) = slice{p, size, int(cap)}
 277         return
 278 }
 279
 280 // rawruneslice allocates a new rune slice. The rune slice is not zeroed.
 281 func rawruneslice(size int) (b []rune) {
 282         if uintptr(size) > maxAlloc/4 {
 283                 throw("out of memory")
 284         }
 285         mem := roundupsize(uintptr(size) * 4)
 286         p := mallocgc(mem, nil, false)
 287         if mem != uintptr(size)*4 {
 288                 memclrNoHeapPointers(add(p, uintptr(size)*4), mem-uintptr(size)*4)
 289         }
 290
 291         *(*slice)(unsafe.Pointer(&b)) = slice{p, size, int(mem / 4)}
 292         return
 293 }
 294
 295 // used by cmd/cgo
 296 func gobytes(p *byte, n int) (b []byte) {
 297         if n == 0 {
 298                 return make([]byte, 0)
 299         }
 300
 301         if n < 0 || uintptr(n) > maxAlloc {
 302                 panic(errorString("gobytes: length out of range"))
 303         }
 304
 305         bp := mallocgc(uintptr(n), nil, false)
 306         memmove(bp, unsafe.Pointer(p), uintptr(n))
 307
 308         *(*slice)(unsafe.Pointer(&b)) = slice{bp, n, n}
 309         return
 310 }
 311
 312 // This is exported via linkname to assembly in syscall (for Plan9).
 313 //go:linkname gostring
 314 func gostring(p *byte) string {
 315         l := findnull(p)
 316         if l == 0 {
 317                 return ""
 318         }
 319         s, b := rawstring(l)
 320         memmove(unsafe.Pointer(&b[0]), unsafe.Pointer(p), uintptr(l))
 321         return s
 322 }
 323
 324 func gostringn(p *byte, l int) string {
 325         if l == 0 {
 326                 return ""
 327         }
 328         s, b := rawstring(l)
 329         memmove(unsafe.Pointer(&b[0]), unsafe.Pointer(p), uintptr(l))
 330         return s
 331 }
 332
 333 func index(s, t string) int {
 334         if len(t) == 0 {
 335                 return 0
 336         }
 337         for i := 0; i < len(s); i++ {
 338                 if s[i] == t[0] && hasPrefix(s[i:], t) {
 339                         return i
 340                 }
 341         }
 342         return -1
 343 }
 344
 345 func contains(s, t string) bool {
 346         return index(s, t) >= 0
 347 }
 348
 349 func hasPrefix(s, prefix string) bool {
 350         return len(s) >= len(prefix) && s[:len(prefix)] == prefix
 351 }
 352
 353 const (
 354         maxUint = ^uint(0)
 355         maxInt  = int(maxUint >> 1)
 356 )
 357
 358 // atoi parses an int from a string s.
 359 // The bool result reports whether s is a number
 360 // representable by a value of type int.
 361 func atoi(s string) (int, bool) {
 362         if s == "" {
 363                 return 0, false
 364         }
 365
 366         neg := false
 367         if s[0] == '-' {
 368                 neg = true
 369                 s = s[1:]
 370         }
 371
 372         un := uint(0)
 373         for i := 0; i < len(s); i++ {
 374                 c := s[i]
 375                 if c < '0' || c > '9' {
 376                         return 0, false
 377                 }
 378                 if un > maxUint/10 {
 379                         // overflow
 380                         return 0, false
 381                 }
 382                 un *= 10
 383                 un1 := un + uint(c) - '0'
 384                 if un1 < un {
 385                         // overflow
 386                         return 0, false
 387                 }
 388                 un = un1
 389         }
 390
 391         if !neg && un > uint(maxInt) {
 392                 return 0, false
 393         }
 394         if neg && un > uint(maxInt)+1 {
 395                 return 0, false
 396         }
 397
 398         n := int(un)
 399         if neg {
 400                 n = -n
 401         }
 402
 403         return n, true
 404 }
 405
 406 // atoi32 is like atoi but for integers
 407 // that fit into an int32.
 408 func atoi32(s string) (int32, bool) {
 409         if n, ok := atoi(s); n == int(int32(n)) {
 410                 return int32(n), ok
 411         }
 412         return 0, false
 413 }
 414
 415 //go:nosplit
 416 func findnull(s *byte) int {
 417         if s == nil {
 418                 return 0
 419         }
 420
 421         // Avoid IndexByteString on Plan 9 because it uses SSE instructions
 422         // on x86 machines, and those are classified as floating point instructions,
 423         // which are illegal in a note handler.
 424         if GOOS == "plan9" {
 425                 p := (*[maxAlloc/2 - 1]byte)(unsafe.Pointer(s))
 426                 l := 0
 427                 for p[l] != 0 {
 428                         l++
 429                 }
 430                 return l
 431         }
 432
 433         // pageSize is the unit we scan at a time looking for NULL.
 434         // It must be the minimum page size for any architecture Go
 435         // runs on. It's okay (just a minor performance loss) if the
 436         // actual system page size is larger than this value.
 437         const pageSize = 4096
 438
 439         offset := 0
 440         ptr := unsafe.Pointer(s)
 441         // IndexByteString uses wide reads, so we need to be careful
 442         // with page boundaries. Call IndexByteString on
 443         // [ptr, endOfPage) interval.
 444         safeLen := int(pageSize - uintptr(ptr)%pageSize)
 445
 446         for {
 447                 t := *(*string)(unsafe.Pointer(&stringStruct{ptr, safeLen}))
 448                 // Check one page at a time.
 449                 if i := bytealg.IndexByteString(t, 0); i != -1 {
 450                         return offset + i
 451                 }
 452                 // Move to next page
 453                 ptr = unsafe.Pointer(uintptr(ptr) + uintptr(safeLen))
 454                 offset += safeLen
 455                 safeLen = pageSize
 456         }
 457 }
 458
 459 func findnullw(s *uint16) int {
 460         if s == nil {
 461                 return 0
 462         }
 463         p := (*[maxAlloc/2/2 - 1]uint16)(unsafe.Pointer(s))
 464         l := 0
 465         for p[l] != 0 {
 466                 l++
 467         }
 468         return l
 469 }
 470
 471 //go:nosplit
 472 func gostringnocopy(str *byte) string {
 473         ss := stringStruct{str: unsafe.Pointer(str), len: findnull(str)}
 474         s := *(*string)(unsafe.Pointer(&ss))
 475         return s
 476 }
 477
 478 func gostringw(strw *uint16) string {
 479         var buf [8]byte
 480         str := (*[maxAlloc/2/2 - 1]uint16)(unsafe.Pointer(strw))
 481         n1 := 0
 482         for i := 0; str[i] != 0; i++ {
 483                 n1 += encoderune(buf[:], rune(str[i]))
 484         }
 485         s, b := rawstring(n1 + 4)
 486         n2 := 0
 487         for i := 0; str[i] != 0; i++ {
 488                 // check for race
 489                 if n2 >= n1 {
 490                         break
 491                 }
 492                 n2 += encoderune(b[n2:], rune(str[i]))
 493         }
 494         b[n2] = 0 // for luck
 495         return s[:n2]
 496 }
 497
 498 // parseRelease parses a dot-separated version number. It follows the
 499 // semver syntax, but allows the minor and patch versions to be
 500 // elided.
 501 func parseRelease(rel string) (major, minor, patch int, ok bool) {
 502         // Strip anything after a dash or plus.
 503         for i := 0; i < len(rel); i++ {
 504                 if rel[i] == '-' || rel[i] == '+' {
 505                         rel = rel[:i]
 506                         break
 507                 }
 508         }
 509
 510         next := func() (int, bool) {
 511                 for i := 0; i < len(rel); i++ {
 512                         if rel[i] == '.' {
 513                                 ver, ok := atoi(rel[:i])
 514                                 rel = rel[i+1:]
 515                                 return ver, ok
 516                         }
 517                 }
 518                 ver, ok := atoi(rel)
 519                 rel = ""
 520                 return ver, ok
 521         }
 522         if major, ok = next(); !ok || rel == "" {
 523                 return
 524         }
 525         if minor, ok = next(); !ok || rel == "" {
 526                 return
 527         }
 528         patch, ok = next()
 529         return
 530 }