src/cmd/go/internal/modindex/build_read.go

   1 // Copyright 2012 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 // This file is a lightly modified copy go/build/read.go with unused parts
   6 // removed.
   7
   8 package modindex
   9
  10 import (
  11         "bufio"
  12         "bytes"
  13         "errors"
  14         "fmt"
  15         "go/ast"
  16         "go/build"
  17         "go/parser"
  18         "go/token"
  19         "io"
  20         "strconv"
  21         "strings"
  22         "unicode"
  23         "unicode/utf8"
  24 )
  25
  26 type importReader struct {
  27         b    *bufio.Reader
  28         buf  []byte
  29         peek byte
  30         err  error
  31         eof  bool
  32         nerr int
  33         pos  token.Position
  34 }
  35
  36 var bom = []byte{0xef, 0xbb, 0xbf}
  37
  38 func newImportReader(name string, r io.Reader) *importReader {
  39         b := bufio.NewReader(r)
  40         // Remove leading UTF-8 BOM.
  41         // Per https://golang.org/ref/spec#Source_code_representation:
  42         // a compiler may ignore a UTF-8-encoded byte order mark (U+FEFF)
  43         // if it is the first Unicode code point in the source text.
  44         if leadingBytes, err := b.Peek(3); err == nil && bytes.Equal(leadingBytes, bom) {
  45                 b.Discard(3)
  46         }
  47         return &importReader{
  48                 b: b,
  49                 pos: token.Position{
  50                         Filename: name,
  51                         Line:     1,
  52                         Column:   1,
  53                 },
  54         }
  55 }
  56
  57 func isIdent(c byte) bool {
  58         return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' || c == '_' || c >= utf8.RuneSelf
  59 }
  60
  61 var (
  62         errSyntax = errors.New("syntax error")
  63         errNUL    = errors.New("unexpected NUL in input")
  64 )
  65
  66 // syntaxError records a syntax error, but only if an I/O error has not already been recorded.
  67 func (r *importReader) syntaxError() {
  68         if r.err == nil {
  69                 r.err = errSyntax
  70         }
  71 }
  72
  73 // readByte reads the next byte from the input, saves it in buf, and returns it.
  74 // If an error occurs, readByte records the error in r.err and returns 0.
  75 func (r *importReader) readByte() byte {
  76         c, err := r.b.ReadByte()
  77         if err == nil {
  78                 r.buf = append(r.buf, c)
  79                 if c == 0 {
  80                         err = errNUL
  81                 }
  82         }
  83         if err != nil {
  84                 if err == io.EOF {
  85                         r.eof = true
  86                 } else if r.err == nil {
  87                         r.err = err
  88                 }
  89                 c = 0
  90         }
  91         return c
  92 }
  93
  94 // readByteNoBuf is like readByte but doesn't buffer the byte.
  95 // It exhausts r.buf before reading from r.b.
  96 func (r *importReader) readByteNoBuf() byte {
  97         var c byte
  98         var err error
  99         if len(r.buf) > 0 {
 100                 c = r.buf[0]
 101                 r.buf = r.buf[1:]
 102         } else {
 103                 c, err = r.b.ReadByte()
 104                 if err == nil && c == 0 {
 105                         err = errNUL
 106                 }
 107         }
 108
 109         if err != nil {
 110                 if err == io.EOF {
 111                         r.eof = true
 112                 } else if r.err == nil {
 113                         r.err = err
 114                 }
 115                 return 0
 116         }
 117         r.pos.Offset++
 118         if c == '\n' {
 119                 r.pos.Line++
 120                 r.pos.Column = 1
 121         } else {
 122                 r.pos.Column++
 123         }
 124         return c
 125 }
 126
 127 // peekByte returns the next byte from the input reader but does not advance beyond it.
 128 // If skipSpace is set, peekByte skips leading spaces and comments.
 129 func (r *importReader) peekByte(skipSpace bool) byte {
 130         if r.err != nil {
 131                 if r.nerr++; r.nerr > 10000 {
 132                         panic("go/build: import reader looping")
 133                 }
 134                 return 0
 135         }
 136
 137         // Use r.peek as first input byte.
 138         // Don't just return r.peek here: it might have been left by peekByte(false)
 139         // and this might be peekByte(true).
 140         c := r.peek
 141         if c == 0 {
 142                 c = r.readByte()
 143         }
 144         for r.err == nil && !r.eof {
 145                 if skipSpace {
 146                         // For the purposes of this reader, semicolons are never necessary to
 147                         // understand the input and are treated as spaces.
 148                         switch c {
 149                         case ' ', '\f', '\t', '\r', '\n', ';':
 150                                 c = r.readByte()
 151                                 continue
 152
 153                         case '/':
 154                                 c = r.readByte()
 155                                 if c == '/' {
 156                                         for c != '\n' && r.err == nil && !r.eof {
 157                                                 c = r.readByte()
 158                                         }
 159                                 } else if c == '*' {
 160                                         var c1 byte
 161                                         for (c != '*' || c1 != '/') && r.err == nil {
 162                                                 if r.eof {
 163                                                         r.syntaxError()
 164                                                 }
 165                                                 c, c1 = c1, r.readByte()
 166                                         }
 167                                 } else {
 168                                         r.syntaxError()
 169                                 }
 170                                 c = r.readByte()
 171                                 continue
 172                         }
 173                 }
 174                 break
 175         }
 176         r.peek = c
 177         return r.peek
 178 }
 179
 180 // nextByte is like peekByte but advances beyond the returned byte.
 181 func (r *importReader) nextByte(skipSpace bool) byte {
 182         c := r.peekByte(skipSpace)
 183         r.peek = 0
 184         return c
 185 }
 186
 187 var goEmbed = []byte("go:embed")
 188
 189 // findEmbed advances the input reader to the next //go:embed comment.
 190 // It reports whether it found a comment.
 191 // (Otherwise it found an error or EOF.)
 192 func (r *importReader) findEmbed(first bool) bool {
 193         // The import block scan stopped after a non-space character,
 194         // so the reader is not at the start of a line on the first call.
 195         // After that, each //go:embed extraction leaves the reader
 196         // at the end of a line.
 197         startLine := !first
 198         var c byte
 199         for r.err == nil && !r.eof {
 200                 c = r.readByteNoBuf()
 201         Reswitch:
 202                 switch c {
 203                 default:
 204                         startLine = false
 205
 206                 case '\n':
 207                         startLine = true
 208
 209                 case ' ', '\t':
 210                         // leave startLine alone
 211
 212                 case '"':
 213                         startLine = false
 214                         for r.err == nil {
 215                                 if r.eof {
 216                                         r.syntaxError()
 217                                 }
 218                                 c = r.readByteNoBuf()
 219                                 if c == '\\' {
 220                                         r.readByteNoBuf()
 221                                         if r.err != nil {
 222                                                 r.syntaxError()
 223                                                 return false
 224                                         }
 225                                         continue
 226                                 }
 227                                 if c == '"' {
 228                                         c = r.readByteNoBuf()
 229                                         goto Reswitch
 230                                 }
 231                         }
 232                         goto Reswitch
 233
 234                 case '`':
 235                         startLine = false
 236                         for r.err == nil {
 237                                 if r.eof {
 238                                         r.syntaxError()
 239                                 }
 240                                 c = r.readByteNoBuf()
 241                                 if c == '`' {
 242                                         c = r.readByteNoBuf()
 243                                         goto Reswitch
 244                                 }
 245                         }
 246
 247                 case '\'':
 248                         startLine = false
 249                         for r.err == nil {
 250                                 if r.eof {
 251                                         r.syntaxError()
 252                                 }
 253                                 c = r.readByteNoBuf()
 254                                 if c == '\\' {
 255                                         r.readByteNoBuf()
 256                                         if r.err != nil {
 257                                                 r.syntaxError()
 258                                                 return false
 259                                         }
 260                                         continue
 261                                 }
 262                                 if c == '\'' {
 263                                         c = r.readByteNoBuf()
 264                                         goto Reswitch
 265                                 }
 266                         }
 267
 268                 case '/':
 269                         c = r.readByteNoBuf()
 270                         switch c {
 271                         default:
 272                                 startLine = false
 273                                 goto Reswitch
 274
 275                         case '*':
 276                                 var c1 byte
 277                                 for (c != '*' || c1 != '/') && r.err == nil {
 278                                         if r.eof {
 279                                                 r.syntaxError()
 280                                         }
 281                                         c, c1 = c1, r.readByteNoBuf()
 282                                 }
 283                                 startLine = false
 284
 285                         case '/':
 286                                 if startLine {
 287                                         // Try to read this as a //go:embed comment.
 288                                         for i := range goEmbed {
 289                                                 c = r.readByteNoBuf()
 290                                                 if c != goEmbed[i] {
 291                                                         goto SkipSlashSlash
 292                                                 }
 293                                         }
 294                                         c = r.readByteNoBuf()
 295                                         if c == ' ' || c == '\t' {
 296                                                 // Found one!
 297                                                 return true
 298                                         }
 299                                 }
 300                         SkipSlashSlash:
 301                                 for c != '\n' && r.err == nil && !r.eof {
 302                                         c = r.readByteNoBuf()
 303                                 }
 304                                 startLine = true
 305                         }
 306                 }
 307         }
 308         return false
 309 }
 310
 311 // readKeyword reads the given keyword from the input.
 312 // If the keyword is not present, readKeyword records a syntax error.
 313 func (r *importReader) readKeyword(kw string) {
 314         r.peekByte(true)
 315         for i := 0; i < len(kw); i++ {
 316                 if r.nextByte(false) != kw[i] {
 317                         r.syntaxError()
 318                         return
 319                 }
 320         }
 321         if isIdent(r.peekByte(false)) {
 322                 r.syntaxError()
 323         }
 324 }
 325
 326 // readIdent reads an identifier from the input.
 327 // If an identifier is not present, readIdent records a syntax error.
 328 func (r *importReader) readIdent() {
 329         c := r.peekByte(true)
 330         if !isIdent(c) {
 331                 r.syntaxError()
 332                 return
 333         }
 334         for isIdent(r.peekByte(false)) {
 335                 r.peek = 0
 336         }
 337 }
 338
 339 // readString reads a quoted string literal from the input.
 340 // If an identifier is not present, readString records a syntax error.
 341 func (r *importReader) readString() {
 342         switch r.nextByte(true) {
 343         case '`':
 344                 for r.err == nil {
 345                         if r.nextByte(false) == '`' {
 346                                 break
 347                         }
 348                         if r.eof {
 349                                 r.syntaxError()
 350                         }
 351                 }
 352         case '"':
 353                 for r.err == nil {
 354                         c := r.nextByte(false)
 355                         if c == '"' {
 356                                 break
 357                         }
 358                         if r.eof || c == '\n' {
 359                                 r.syntaxError()
 360                         }
 361                         if c == '\\' {
 362                                 r.nextByte(false)
 363                         }
 364                 }
 365         default:
 366                 r.syntaxError()
 367         }
 368 }
 369
 370 // readImport reads an import clause - optional identifier followed by quoted string -
 371 // from the input.
 372 func (r *importReader) readImport() {
 373         c := r.peekByte(true)
 374         if c == '.' {
 375                 r.peek = 0
 376         } else if isIdent(c) {
 377                 r.readIdent()
 378         }
 379         r.readString()
 380 }
 381
 382 // readComments is like io.ReadAll, except that it only reads the leading
 383 // block of comments in the file.
 384 func readComments(f io.Reader) ([]byte, error) {
 385         r := newImportReader("", f)
 386         r.peekByte(true)
 387         if r.err == nil && !r.eof {
 388                 // Didn't reach EOF, so must have found a non-space byte. Remove it.
 389                 r.buf = r.buf[:len(r.buf)-1]
 390         }
 391         return r.buf, r.err
 392 }
 393
 394 // readGoInfo expects a Go file as input and reads the file up to and including the import section.
 395 // It records what it learned in *info.
 396 // If info.fset is non-nil, readGoInfo parses the file and sets info.parsed, info.parseErr,
 397 // info.imports and info.embeds.
 398 //
 399 // It only returns an error if there are problems reading the file,
 400 // not for syntax errors in the file itself.
 401 func readGoInfo(f io.Reader, info *fileInfo) error {
 402         r := newImportReader(info.name, f)
 403
 404         r.readKeyword("package")
 405         r.readIdent()
 406         for r.peekByte(true) == 'i' {
 407                 r.readKeyword("import")
 408                 if r.peekByte(true) == '(' {
 409                         r.nextByte(false)
 410                         for r.peekByte(true) != ')' && r.err == nil {
 411                                 r.readImport()
 412                         }
 413                         r.nextByte(false)
 414                 } else {
 415                         r.readImport()
 416                 }
 417         }
 418
 419         info.header = r.buf
 420
 421         // If we stopped successfully before EOF, we read a byte that told us we were done.
 422         // Return all but that last byte, which would cause a syntax error if we let it through.
 423         if r.err == nil && !r.eof {
 424                 info.header = r.buf[:len(r.buf)-1]
 425         }
 426
 427         // If we stopped for a syntax error, consume the whole file so that
 428         // we are sure we don't change the errors that go/parser returns.
 429         if r.err == errSyntax {
 430                 r.err = nil
 431                 for r.err == nil && !r.eof {
 432                         r.readByte()
 433                 }
 434                 info.header = r.buf
 435         }
 436         if r.err != nil {
 437                 return r.err
 438         }
 439
 440         if info.fset == nil {
 441                 return nil
 442         }
 443
 444         // Parse file header & record imports.
 445         info.parsed, info.parseErr = parser.ParseFile(info.fset, info.name, info.header, parser.ImportsOnly|parser.ParseComments)
 446         if info.parseErr != nil {
 447                 return nil
 448         }
 449
 450         hasEmbed := false
 451         for _, decl := range info.parsed.Decls {
 452                 d, ok := decl.(*ast.GenDecl)
 453                 if !ok {
 454                         continue
 455                 }
 456                 for _, dspec := range d.Specs {
 457                         spec, ok := dspec.(*ast.ImportSpec)
 458                         if !ok {
 459                                 continue
 460                         }
 461                         quoted := spec.Path.Value
 462                         path, err := strconv.Unquote(quoted)
 463                         if err != nil {
 464                                 return fmt.Errorf("parser returned invalid quoted string: <%s>", quoted)
 465                         }
 466                         if path == "embed" {
 467                                 hasEmbed = true
 468                         }
 469
 470                         doc := spec.Doc
 471                         if doc == nil && len(d.Specs) == 1 {
 472                                 doc = d.Doc
 473                         }
 474                         info.imports = append(info.imports, fileImport{path, spec.Pos(), doc})
 475                 }
 476         }
 477
 478         // Extract directives.
 479         for _, group := range info.parsed.Comments {
 480                 if group.Pos() >= info.parsed.Package {
 481                         break
 482                 }
 483                 for _, c := range group.List {
 484                         if strings.HasPrefix(c.Text, "//go:") {
 485                                 info.directives = append(info.directives, build.Directive{Text: c.Text, Pos: info.fset.Position(c.Slash)})
 486                         }
 487                 }
 488         }
 489
 490         // If the file imports "embed",
 491         // we have to look for //go:embed comments
 492         // in the remainder of the file.
 493         // The compiler will enforce the mapping of comments to
 494         // declared variables. We just need to know the patterns.
 495         // If there were //go:embed comments earlier in the file
 496         // (near the package statement or imports), the compiler
 497         // will reject them. They can be (and have already been) ignored.
 498         if hasEmbed {
 499                 var line []byte
 500                 for first := true; r.findEmbed(first); first = false {
 501                         line = line[:0]
 502                         pos := r.pos
 503                         for {
 504                                 c := r.readByteNoBuf()
 505                                 if c == '\n' || r.err != nil || r.eof {
 506                                         break
 507                                 }
 508                                 line = append(line, c)
 509                         }
 510                         // Add args if line is well-formed.
 511                         // Ignore badly-formed lines - the compiler will report them when it finds them,
 512                         // and we can pretend they are not there to help go list succeed with what it knows.
 513                         embs, err := parseGoEmbed(string(line), pos)
 514                         if err == nil {
 515                                 info.embeds = append(info.embeds, embs...)
 516                         }
 517                 }
 518         }
 519
 520         return nil
 521 }
 522
 523 // parseGoEmbed parses the text following "//go:embed" to extract the glob patterns.
 524 // It accepts unquoted space-separated patterns as well as double-quoted and back-quoted Go strings.
 525 // This is based on a similar function in cmd/compile/internal/gc/noder.go;
 526 // this version calculates position information as well.
 527 func parseGoEmbed(args string, pos token.Position) ([]fileEmbed, error) {
 528         trimBytes := func(n int) {
 529                 pos.Offset += n
 530                 pos.Column += utf8.RuneCountInString(args[:n])
 531                 args = args[n:]
 532         }
 533         trimSpace := func() {
 534                 trim := strings.TrimLeftFunc(args, unicode.IsSpace)
 535                 trimBytes(len(args) - len(trim))
 536         }
 537
 538         var list []fileEmbed
 539         for trimSpace(); args != ""; trimSpace() {
 540                 var path string
 541                 pathPos := pos
 542         Switch:
 543                 switch args[0] {
 544                 default:
 545                         i := len(args)
 546                         for j, c := range args {
 547                                 if unicode.IsSpace(c) {
 548                                         i = j
 549                                         break
 550                                 }
 551                         }
 552                         path = args[:i]
 553                         trimBytes(i)
 554
 555                 case '`':
 556                         var ok bool
 557                         path, _, ok = strings.Cut(args[1:], "`")
 558                         if !ok {
 559                                 return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
 560                         }
 561                         trimBytes(1 + len(path) + 1)
 562
 563                 case '"':
 564                         i := 1
 565                         for ; i < len(args); i++ {
 566                                 if args[i] == '\\' {
 567                                         i++
 568                                         continue
 569                                 }
 570                                 if args[i] == '"' {
 571                                         q, err := strconv.Unquote(args[:i+1])
 572                                         if err != nil {
 573                                                 return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args[:i+1])
 574                                         }
 575                                         path = q
 576                                         trimBytes(i + 1)
 577                                         break Switch
 578                                 }
 579                         }
 580                         if i >= len(args) {
 581                                 return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
 582                         }
 583                 }
 584
 585                 if args != "" {
 586                         r, _ := utf8.DecodeRuneInString(args)
 587                         if !unicode.IsSpace(r) {
 588                                 return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
 589                         }
 590                 }
 591                 list = append(list, fileEmbed{path, pathPos})
 592         }
 593         return list, nil
 594 }