]> Cypherpunks.ru repositories - gostls13.git/blob - src/cmd/go/internal/modindex/build_read.go
cmd/go, go/build: parse directives in file headers
[gostls13.git] / src / cmd / go / internal / modindex / build_read.go
1 // Copyright 2012 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // This file is a lightly modified copy go/build/read.go with unused parts
6 // removed.
7
8 package modindex
9
10 import (
11         "bufio"
12         "bytes"
13         "errors"
14         "fmt"
15         "go/ast"
16         "go/build"
17         "go/parser"
18         "go/token"
19         "io"
20         "strconv"
21         "strings"
22         "unicode"
23         "unicode/utf8"
24 )
25
26 type importReader struct {
27         b    *bufio.Reader
28         buf  []byte
29         peek byte
30         err  error
31         eof  bool
32         nerr int
33         pos  token.Position
34 }
35
36 var bom = []byte{0xef, 0xbb, 0xbf}
37
38 func newImportReader(name string, r io.Reader) *importReader {
39         b := bufio.NewReader(r)
40         // Remove leading UTF-8 BOM.
41         // Per https://golang.org/ref/spec#Source_code_representation:
42         // a compiler may ignore a UTF-8-encoded byte order mark (U+FEFF)
43         // if it is the first Unicode code point in the source text.
44         if leadingBytes, err := b.Peek(3); err == nil && bytes.Equal(leadingBytes, bom) {
45                 b.Discard(3)
46         }
47         return &importReader{
48                 b: b,
49                 pos: token.Position{
50                         Filename: name,
51                         Line:     1,
52                         Column:   1,
53                 },
54         }
55 }
56
57 func isIdent(c byte) bool {
58         return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' || c == '_' || c >= utf8.RuneSelf
59 }
60
61 var (
62         errSyntax = errors.New("syntax error")
63         errNUL    = errors.New("unexpected NUL in input")
64 )
65
66 // syntaxError records a syntax error, but only if an I/O error has not already been recorded.
67 func (r *importReader) syntaxError() {
68         if r.err == nil {
69                 r.err = errSyntax
70         }
71 }
72
73 // readByte reads the next byte from the input, saves it in buf, and returns it.
74 // If an error occurs, readByte records the error in r.err and returns 0.
75 func (r *importReader) readByte() byte {
76         c, err := r.b.ReadByte()
77         if err == nil {
78                 r.buf = append(r.buf, c)
79                 if c == 0 {
80                         err = errNUL
81                 }
82         }
83         if err != nil {
84                 if err == io.EOF {
85                         r.eof = true
86                 } else if r.err == nil {
87                         r.err = err
88                 }
89                 c = 0
90         }
91         return c
92 }
93
94 // readByteNoBuf is like readByte but doesn't buffer the byte.
95 // It exhausts r.buf before reading from r.b.
96 func (r *importReader) readByteNoBuf() byte {
97         var c byte
98         var err error
99         if len(r.buf) > 0 {
100                 c = r.buf[0]
101                 r.buf = r.buf[1:]
102         } else {
103                 c, err = r.b.ReadByte()
104                 if err == nil && c == 0 {
105                         err = errNUL
106                 }
107         }
108
109         if err != nil {
110                 if err == io.EOF {
111                         r.eof = true
112                 } else if r.err == nil {
113                         r.err = err
114                 }
115                 return 0
116         }
117         r.pos.Offset++
118         if c == '\n' {
119                 r.pos.Line++
120                 r.pos.Column = 1
121         } else {
122                 r.pos.Column++
123         }
124         return c
125 }
126
127 // peekByte returns the next byte from the input reader but does not advance beyond it.
128 // If skipSpace is set, peekByte skips leading spaces and comments.
129 func (r *importReader) peekByte(skipSpace bool) byte {
130         if r.err != nil {
131                 if r.nerr++; r.nerr > 10000 {
132                         panic("go/build: import reader looping")
133                 }
134                 return 0
135         }
136
137         // Use r.peek as first input byte.
138         // Don't just return r.peek here: it might have been left by peekByte(false)
139         // and this might be peekByte(true).
140         c := r.peek
141         if c == 0 {
142                 c = r.readByte()
143         }
144         for r.err == nil && !r.eof {
145                 if skipSpace {
146                         // For the purposes of this reader, semicolons are never necessary to
147                         // understand the input and are treated as spaces.
148                         switch c {
149                         case ' ', '\f', '\t', '\r', '\n', ';':
150                                 c = r.readByte()
151                                 continue
152
153                         case '/':
154                                 c = r.readByte()
155                                 if c == '/' {
156                                         for c != '\n' && r.err == nil && !r.eof {
157                                                 c = r.readByte()
158                                         }
159                                 } else if c == '*' {
160                                         var c1 byte
161                                         for (c != '*' || c1 != '/') && r.err == nil {
162                                                 if r.eof {
163                                                         r.syntaxError()
164                                                 }
165                                                 c, c1 = c1, r.readByte()
166                                         }
167                                 } else {
168                                         r.syntaxError()
169                                 }
170                                 c = r.readByte()
171                                 continue
172                         }
173                 }
174                 break
175         }
176         r.peek = c
177         return r.peek
178 }
179
180 // nextByte is like peekByte but advances beyond the returned byte.
181 func (r *importReader) nextByte(skipSpace bool) byte {
182         c := r.peekByte(skipSpace)
183         r.peek = 0
184         return c
185 }
186
187 var goEmbed = []byte("go:embed")
188
189 // findEmbed advances the input reader to the next //go:embed comment.
190 // It reports whether it found a comment.
191 // (Otherwise it found an error or EOF.)
192 func (r *importReader) findEmbed(first bool) bool {
193         // The import block scan stopped after a non-space character,
194         // so the reader is not at the start of a line on the first call.
195         // After that, each //go:embed extraction leaves the reader
196         // at the end of a line.
197         startLine := !first
198         var c byte
199         for r.err == nil && !r.eof {
200                 c = r.readByteNoBuf()
201         Reswitch:
202                 switch c {
203                 default:
204                         startLine = false
205
206                 case '\n':
207                         startLine = true
208
209                 case ' ', '\t':
210                         // leave startLine alone
211
212                 case '"':
213                         startLine = false
214                         for r.err == nil {
215                                 if r.eof {
216                                         r.syntaxError()
217                                 }
218                                 c = r.readByteNoBuf()
219                                 if c == '\\' {
220                                         r.readByteNoBuf()
221                                         if r.err != nil {
222                                                 r.syntaxError()
223                                                 return false
224                                         }
225                                         continue
226                                 }
227                                 if c == '"' {
228                                         c = r.readByteNoBuf()
229                                         goto Reswitch
230                                 }
231                         }
232                         goto Reswitch
233
234                 case '`':
235                         startLine = false
236                         for r.err == nil {
237                                 if r.eof {
238                                         r.syntaxError()
239                                 }
240                                 c = r.readByteNoBuf()
241                                 if c == '`' {
242                                         c = r.readByteNoBuf()
243                                         goto Reswitch
244                                 }
245                         }
246
247                 case '\'':
248                         startLine = false
249                         for r.err == nil {
250                                 if r.eof {
251                                         r.syntaxError()
252                                 }
253                                 c = r.readByteNoBuf()
254                                 if c == '\\' {
255                                         r.readByteNoBuf()
256                                         if r.err != nil {
257                                                 r.syntaxError()
258                                                 return false
259                                         }
260                                         continue
261                                 }
262                                 if c == '\'' {
263                                         c = r.readByteNoBuf()
264                                         goto Reswitch
265                                 }
266                         }
267
268                 case '/':
269                         c = r.readByteNoBuf()
270                         switch c {
271                         default:
272                                 startLine = false
273                                 goto Reswitch
274
275                         case '*':
276                                 var c1 byte
277                                 for (c != '*' || c1 != '/') && r.err == nil {
278                                         if r.eof {
279                                                 r.syntaxError()
280                                         }
281                                         c, c1 = c1, r.readByteNoBuf()
282                                 }
283                                 startLine = false
284
285                         case '/':
286                                 if startLine {
287                                         // Try to read this as a //go:embed comment.
288                                         for i := range goEmbed {
289                                                 c = r.readByteNoBuf()
290                                                 if c != goEmbed[i] {
291                                                         goto SkipSlashSlash
292                                                 }
293                                         }
294                                         c = r.readByteNoBuf()
295                                         if c == ' ' || c == '\t' {
296                                                 // Found one!
297                                                 return true
298                                         }
299                                 }
300                         SkipSlashSlash:
301                                 for c != '\n' && r.err == nil && !r.eof {
302                                         c = r.readByteNoBuf()
303                                 }
304                                 startLine = true
305                         }
306                 }
307         }
308         return false
309 }
310
311 // readKeyword reads the given keyword from the input.
312 // If the keyword is not present, readKeyword records a syntax error.
313 func (r *importReader) readKeyword(kw string) {
314         r.peekByte(true)
315         for i := 0; i < len(kw); i++ {
316                 if r.nextByte(false) != kw[i] {
317                         r.syntaxError()
318                         return
319                 }
320         }
321         if isIdent(r.peekByte(false)) {
322                 r.syntaxError()
323         }
324 }
325
326 // readIdent reads an identifier from the input.
327 // If an identifier is not present, readIdent records a syntax error.
328 func (r *importReader) readIdent() {
329         c := r.peekByte(true)
330         if !isIdent(c) {
331                 r.syntaxError()
332                 return
333         }
334         for isIdent(r.peekByte(false)) {
335                 r.peek = 0
336         }
337 }
338
339 // readString reads a quoted string literal from the input.
340 // If an identifier is not present, readString records a syntax error.
341 func (r *importReader) readString() {
342         switch r.nextByte(true) {
343         case '`':
344                 for r.err == nil {
345                         if r.nextByte(false) == '`' {
346                                 break
347                         }
348                         if r.eof {
349                                 r.syntaxError()
350                         }
351                 }
352         case '"':
353                 for r.err == nil {
354                         c := r.nextByte(false)
355                         if c == '"' {
356                                 break
357                         }
358                         if r.eof || c == '\n' {
359                                 r.syntaxError()
360                         }
361                         if c == '\\' {
362                                 r.nextByte(false)
363                         }
364                 }
365         default:
366                 r.syntaxError()
367         }
368 }
369
370 // readImport reads an import clause - optional identifier followed by quoted string -
371 // from the input.
372 func (r *importReader) readImport() {
373         c := r.peekByte(true)
374         if c == '.' {
375                 r.peek = 0
376         } else if isIdent(c) {
377                 r.readIdent()
378         }
379         r.readString()
380 }
381
382 // readComments is like io.ReadAll, except that it only reads the leading
383 // block of comments in the file.
384 func readComments(f io.Reader) ([]byte, error) {
385         r := newImportReader("", f)
386         r.peekByte(true)
387         if r.err == nil && !r.eof {
388                 // Didn't reach EOF, so must have found a non-space byte. Remove it.
389                 r.buf = r.buf[:len(r.buf)-1]
390         }
391         return r.buf, r.err
392 }
393
394 // readGoInfo expects a Go file as input and reads the file up to and including the import section.
395 // It records what it learned in *info.
396 // If info.fset is non-nil, readGoInfo parses the file and sets info.parsed, info.parseErr,
397 // info.imports and info.embeds.
398 //
399 // It only returns an error if there are problems reading the file,
400 // not for syntax errors in the file itself.
401 func readGoInfo(f io.Reader, info *fileInfo) error {
402         r := newImportReader(info.name, f)
403
404         r.readKeyword("package")
405         r.readIdent()
406         for r.peekByte(true) == 'i' {
407                 r.readKeyword("import")
408                 if r.peekByte(true) == '(' {
409                         r.nextByte(false)
410                         for r.peekByte(true) != ')' && r.err == nil {
411                                 r.readImport()
412                         }
413                         r.nextByte(false)
414                 } else {
415                         r.readImport()
416                 }
417         }
418
419         info.header = r.buf
420
421         // If we stopped successfully before EOF, we read a byte that told us we were done.
422         // Return all but that last byte, which would cause a syntax error if we let it through.
423         if r.err == nil && !r.eof {
424                 info.header = r.buf[:len(r.buf)-1]
425         }
426
427         // If we stopped for a syntax error, consume the whole file so that
428         // we are sure we don't change the errors that go/parser returns.
429         if r.err == errSyntax {
430                 r.err = nil
431                 for r.err == nil && !r.eof {
432                         r.readByte()
433                 }
434                 info.header = r.buf
435         }
436         if r.err != nil {
437                 return r.err
438         }
439
440         if info.fset == nil {
441                 return nil
442         }
443
444         // Parse file header & record imports.
445         info.parsed, info.parseErr = parser.ParseFile(info.fset, info.name, info.header, parser.ImportsOnly|parser.ParseComments)
446         if info.parseErr != nil {
447                 return nil
448         }
449
450         hasEmbed := false
451         for _, decl := range info.parsed.Decls {
452                 d, ok := decl.(*ast.GenDecl)
453                 if !ok {
454                         continue
455                 }
456                 for _, dspec := range d.Specs {
457                         spec, ok := dspec.(*ast.ImportSpec)
458                         if !ok {
459                                 continue
460                         }
461                         quoted := spec.Path.Value
462                         path, err := strconv.Unquote(quoted)
463                         if err != nil {
464                                 return fmt.Errorf("parser returned invalid quoted string: <%s>", quoted)
465                         }
466                         if path == "embed" {
467                                 hasEmbed = true
468                         }
469
470                         doc := spec.Doc
471                         if doc == nil && len(d.Specs) == 1 {
472                                 doc = d.Doc
473                         }
474                         info.imports = append(info.imports, fileImport{path, spec.Pos(), doc})
475                 }
476         }
477
478         // Extract directives.
479         for _, group := range info.parsed.Comments {
480                 if group.Pos() >= info.parsed.Package {
481                         break
482                 }
483                 for _, c := range group.List {
484                         if strings.HasPrefix(c.Text, "//go:") {
485                                 info.directives = append(info.directives, build.Directive{Text: c.Text, Pos: info.fset.Position(c.Slash)})
486                         }
487                 }
488         }
489
490         // If the file imports "embed",
491         // we have to look for //go:embed comments
492         // in the remainder of the file.
493         // The compiler will enforce the mapping of comments to
494         // declared variables. We just need to know the patterns.
495         // If there were //go:embed comments earlier in the file
496         // (near the package statement or imports), the compiler
497         // will reject them. They can be (and have already been) ignored.
498         if hasEmbed {
499                 var line []byte
500                 for first := true; r.findEmbed(first); first = false {
501                         line = line[:0]
502                         pos := r.pos
503                         for {
504                                 c := r.readByteNoBuf()
505                                 if c == '\n' || r.err != nil || r.eof {
506                                         break
507                                 }
508                                 line = append(line, c)
509                         }
510                         // Add args if line is well-formed.
511                         // Ignore badly-formed lines - the compiler will report them when it finds them,
512                         // and we can pretend they are not there to help go list succeed with what it knows.
513                         embs, err := parseGoEmbed(string(line), pos)
514                         if err == nil {
515                                 info.embeds = append(info.embeds, embs...)
516                         }
517                 }
518         }
519
520         return nil
521 }
522
523 // parseGoEmbed parses the text following "//go:embed" to extract the glob patterns.
524 // It accepts unquoted space-separated patterns as well as double-quoted and back-quoted Go strings.
525 // This is based on a similar function in cmd/compile/internal/gc/noder.go;
526 // this version calculates position information as well.
527 func parseGoEmbed(args string, pos token.Position) ([]fileEmbed, error) {
528         trimBytes := func(n int) {
529                 pos.Offset += n
530                 pos.Column += utf8.RuneCountInString(args[:n])
531                 args = args[n:]
532         }
533         trimSpace := func() {
534                 trim := strings.TrimLeftFunc(args, unicode.IsSpace)
535                 trimBytes(len(args) - len(trim))
536         }
537
538         var list []fileEmbed
539         for trimSpace(); args != ""; trimSpace() {
540                 var path string
541                 pathPos := pos
542         Switch:
543                 switch args[0] {
544                 default:
545                         i := len(args)
546                         for j, c := range args {
547                                 if unicode.IsSpace(c) {
548                                         i = j
549                                         break
550                                 }
551                         }
552                         path = args[:i]
553                         trimBytes(i)
554
555                 case '`':
556                         var ok bool
557                         path, _, ok = strings.Cut(args[1:], "`")
558                         if !ok {
559                                 return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
560                         }
561                         trimBytes(1 + len(path) + 1)
562
563                 case '"':
564                         i := 1
565                         for ; i < len(args); i++ {
566                                 if args[i] == '\\' {
567                                         i++
568                                         continue
569                                 }
570                                 if args[i] == '"' {
571                                         q, err := strconv.Unquote(args[:i+1])
572                                         if err != nil {
573                                                 return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args[:i+1])
574                                         }
575                                         path = q
576                                         trimBytes(i + 1)
577                                         break Switch
578                                 }
579                         }
580                         if i >= len(args) {
581                                 return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
582                         }
583                 }
584
585                 if args != "" {
586                         r, _ := utf8.DecodeRuneInString(args)
587                         if !unicode.IsSpace(r) {
588                                 return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
589                         }
590                 }
591                 list = append(list, fileEmbed{path, pathPos})
592         }
593         return list, nil
594 }