// lexer holds the state of the scanner.
type lexer struct {
- name string // the name of the input; used only for error reports
- input string // the string being scanned
- leftDelim string // start of action
- rightDelim string // end of action
- emitComment bool // emit itemComment tokens.
- pos Pos // current position in the input
- start Pos // start position of this item
- atEOF bool // we have hit the end of input and returned eof
- items chan item // channel of scanned items
- parenDepth int // nesting depth of ( ) exprs
- line int // 1+number of newlines seen
- startLine int // start line of this item
- breakOK bool // break keyword allowed
- continueOK bool // continue keyword allowed
+ name string // the name of the input; used only for error reports
+ input string // the string being scanned
+ leftDelim string // start of action marker
+ rightDelim string // end of action marker
+ pos Pos // current position in the input
+ start Pos // start position of this item
+ atEOF bool // we have hit the end of input and returned eof
+ parenDepth int // nesting depth of ( ) exprs
+ line int // 1+number of newlines seen
+ startLine int // start line of this item
+ item item // item to return to parser
+ insideAction bool // are we inside an action?
+ options lexOptions
+}
+
+// lexOptions control behavior of the lexer. All default to false.
+type lexOptions struct {
+ emitComment bool // emit itemComment tokens.
+ breakOK bool // break keyword allowed
+ continueOK bool // continue keyword allowed
}
// next returns the next rune in the input.
}
}
-// emit passes an item back to the client.
-func (l *lexer) emit(t itemType) {
- l.items <- item{t, l.start, l.input[l.start:l.pos], l.startLine}
+// thisItem returns the item at the current input point with the specified type
+// and advances the input.
+func (l *lexer) thisItem(t itemType) item {
+ i := item{t, l.start, l.input[l.start:l.pos], l.startLine}
l.start = l.pos
l.startLine = l.line
+ return i
+}
+
+// emit passes the trailing text as an item back to the parser.
+func (l *lexer) emit(t itemType) stateFn {
+ return l.emitItem(l.thisItem(t))
+}
+
+// emitItem passes the specified item to the parser.
+func (l *lexer) emitItem(i item) stateFn {
+ l.item = i
+ return nil
}
// ignore skips over the pending input before this point.
+// It tracks newlines in the ignored text, so use it only
+// for text that is skipped without calling l.next.
func (l *lexer) ignore() {
l.line += strings.Count(l.input[l.start:l.pos], "\n")
l.start = l.pos
// errorf returns an error token and terminates the scan by passing
// back a nil pointer that will be the next state, terminating l.nextItem.
func (l *lexer) errorf(format string, args ...any) stateFn {
- l.items <- item{itemError, l.start, fmt.Sprintf(format, args...), l.startLine}
+ l.item = item{itemError, l.start, fmt.Sprintf(format, args...), l.startLine}
+ l.start = 0
+ l.pos = 0
+ l.input = l.input[:0]
return nil
}
// nextItem returns the next item from the input.
// Called by the parser, not in the lexing goroutine.
func (l *lexer) nextItem() item {
- return <-l.items
-}
-
-// drain drains the output so the lexing goroutine will exit.
-// Called by the parser, not in the lexing goroutine.
-func (l *lexer) drain() {
- for range l.items {
+ l.item = item{itemEOF, l.pos, "EOF", l.startLine}
+ state := lexText
+ if l.insideAction {
+ state = lexInsideAction
+ }
+ for {
+ state = state(l)
+ if state == nil {
+ return l.item
+ }
}
}
// lex creates a new scanner for the input string.
-func lex(name, input, left, right string, emitComment, breakOK, continueOK bool) *lexer {
+func lex(name, input, left, right string) *lexer {
if left == "" {
left = leftDelim
}
right = rightDelim
}
l := &lexer{
- name: name,
- input: input,
- leftDelim: left,
- rightDelim: right,
- emitComment: emitComment,
- breakOK: breakOK,
- continueOK: continueOK,
- items: make(chan item),
- line: 1,
- startLine: 1,
- }
- go l.run()
- return l
-}
-
-// run runs the state machine for the lexer.
-func (l *lexer) run() {
- for state := lexText; state != nil; {
- state = state(l)
+ name: name,
+ input: input,
+ leftDelim: left,
+ rightDelim: right,
+ line: 1,
+ startLine: 1,
+ insideAction: false,
}
- close(l.items)
+ return l
}
// state functions
// lexText scans until an opening action delimiter, "{{".
func lexText(l *lexer) stateFn {
if x := strings.Index(l.input[l.pos:], l.leftDelim); x >= 0 {
- ldn := Pos(len(l.leftDelim))
- l.pos += Pos(x)
- trimLength := Pos(0)
- if hasLeftTrimMarker(l.input[l.pos+ldn:]) {
- trimLength = rightTrimLength(l.input[l.start:l.pos])
- }
- l.pos -= trimLength
- if l.pos > l.start {
+ if x > 0 {
+ l.pos += Pos(x)
+ // Do we trim any trailing space?
+ trimLength := Pos(0)
+ delimEnd := l.pos + Pos(len(l.leftDelim))
+ if hasLeftTrimMarker(l.input[delimEnd:]) {
+ trimLength = rightTrimLength(l.input[l.start:l.pos])
+ }
+ l.pos -= trimLength
l.line += strings.Count(l.input[l.start:l.pos], "\n")
- l.emit(itemText)
+ i := l.thisItem(itemText)
+ l.pos += trimLength
+ l.ignore()
+ if len(i.val) > 0 {
+ return l.emitItem(i)
+ }
}
- l.pos += trimLength
- l.ignore()
return lexLeftDelim
}
l.pos = Pos(len(l.input))
// Correctly reached EOF.
if l.pos > l.start {
l.line += strings.Count(l.input[l.start:l.pos], "\n")
- l.emit(itemText)
+ return l.emit(itemText)
}
- l.emit(itemEOF)
- return nil
+ return l.emit(itemEOF)
}
// rightTrimLength returns the length of the spaces at the end of the string.
}
// lexLeftDelim scans the left delimiter, which is known to be present, possibly with a trim marker.
+// (The text to be trimmed has already been emitted.)
func lexLeftDelim(l *lexer) stateFn {
l.pos += Pos(len(l.leftDelim))
trimSpace := hasLeftTrimMarker(l.input[l.pos:])
l.ignore()
return lexComment
}
- l.emit(itemLeftDelim)
+ i := l.thisItem(itemLeftDelim)
+ l.insideAction = true
l.pos += afterMarker
l.ignore()
l.parenDepth = 0
- return lexInsideAction
+ return l.emitItem(i)
}
// lexComment scans a comment. The left comment marker is known to be present.
func lexComment(l *lexer) stateFn {
l.pos += Pos(len(leftComment))
- i := strings.Index(l.input[l.pos:], rightComment)
- if i < 0 {
+ x := strings.Index(l.input[l.pos:], rightComment)
+ if x < 0 {
return l.errorf("unclosed comment")
}
- l.pos += Pos(i + len(rightComment))
+ l.pos += Pos(x + len(rightComment))
delim, trimSpace := l.atRightDelim()
if !delim {
return l.errorf("comment ends before closing delimiter")
}
- if l.emitComment {
- l.emit(itemComment)
- }
+ i := l.thisItem(itemComment)
if trimSpace {
l.pos += trimMarkerLen
}
l.pos += leftTrimLength(l.input[l.pos:])
}
l.ignore()
+ if l.options.emitComment {
+ return l.emitItem(i)
+ }
return lexText
}
l.ignore()
}
l.pos += Pos(len(l.rightDelim))
- l.emit(itemRightDelim)
+ i := l.thisItem(itemRightDelim)
if trimSpace {
l.pos += leftTrimLength(l.input[l.pos:])
l.ignore()
}
- return lexText
+ l.insideAction = false
+ return l.emitItem(i)
}
// lexInsideAction scans the elements inside action delimiters.
l.backup() // Put space back in case we have " -}}".
return lexSpace
case r == '=':
- l.emit(itemAssign)
+ return l.emit(itemAssign)
case r == ':':
if l.next() != '=' {
return l.errorf("expected :=")
}
- l.emit(itemDeclare)
+ return l.emit(itemDeclare)
case r == '|':
- l.emit(itemPipe)
+ return l.emit(itemPipe)
case r == '"':
return lexQuote
case r == '`':
l.backup()
return lexIdentifier
case r == '(':
- l.emit(itemLeftParen)
l.parenDepth++
+ return l.emit(itemLeftParen)
case r == ')':
- l.emit(itemRightParen)
l.parenDepth--
if l.parenDepth < 0 {
- return l.errorf("unexpected right paren %#U", r)
+ return l.errorf("unexpected right paren")
}
+ return l.emit(itemRightParen)
case r <= unicode.MaxASCII && unicode.IsPrint(r):
- l.emit(itemChar)
+ return l.emit(itemChar)
default:
return l.errorf("unrecognized character in action: %#U", r)
}
- return lexInsideAction
}
// lexSpace scans a run of space characters.
return lexRightDelim // On the delim, so go right to that.
}
}
- l.emit(itemSpace)
- return lexInsideAction
+ return l.emit(itemSpace)
}
// lexIdentifier scans an alphanumeric.
func lexIdentifier(l *lexer) stateFn {
-Loop:
for {
switch r := l.next(); {
case isAlphaNumeric(r):
switch {
case key[word] > itemKeyword:
item := key[word]
- if item == itemBreak && !l.breakOK || item == itemContinue && !l.continueOK {
- l.emit(itemIdentifier)
- } else {
- l.emit(item)
+ if item == itemBreak && !l.options.breakOK || item == itemContinue && !l.options.continueOK {
+ return l.emit(itemIdentifier)
}
+ return l.emit(item)
case word[0] == '.':
- l.emit(itemField)
+ return l.emit(itemField)
case word == "true", word == "false":
- l.emit(itemBool)
+ return l.emit(itemBool)
default:
- l.emit(itemIdentifier)
+ return l.emit(itemIdentifier)
}
- break Loop
}
}
- return lexInsideAction
}
// lexField scans a field: .Alphanumeric.
// The $ has been scanned.
func lexVariable(l *lexer) stateFn {
if l.atTerminator() { // Nothing interesting follows -> "$".
- l.emit(itemVariable)
- return lexInsideAction
+ return l.emit(itemVariable)
}
return lexFieldOrVariable(l, itemVariable)
}
func lexFieldOrVariable(l *lexer, typ itemType) stateFn {
if l.atTerminator() { // Nothing interesting follows -> "." or "$".
if typ == itemVariable {
- l.emit(itemVariable)
- } else {
- l.emit(itemDot)
+ return l.emit(itemVariable)
}
- return lexInsideAction
+ return l.emit(itemDot)
}
var r rune
for {
if !l.atTerminator() {
return l.errorf("bad character %#U", r)
}
- l.emit(typ)
- return lexInsideAction
+ return l.emit(typ)
}
// atTerminator reports whether the input is at valid termination character to
break Loop
}
}
- l.emit(itemCharConstant)
- return lexInsideAction
+ return l.emit(itemCharConstant)
}
// lexNumber scans a number: decimal, octal, hex, float, or imaginary. This
if !l.scanNumber() || l.input[l.pos-1] != 'i' {
return l.errorf("bad number syntax: %q", l.input[l.start:l.pos])
}
- l.emit(itemComplex)
- } else {
- l.emit(itemNumber)
+ return l.emit(itemComplex)
}
- return lexInsideAction
+ return l.emit(itemNumber)
}
func (l *lexer) scanNumber() bool {
break Loop
}
}
- l.emit(itemString)
- return lexInsideAction
+ return l.emit(itemString)
}
// lexRawQuote scans a raw quoted string.
break Loop
}
}
- l.emit(itemRawString)
- return lexInsideAction
+ return l.emit(itemRawString)
}
// isSpace reports whether r is a space character.
{"extra right paren", "{{3)}}", []item{
tLeft,
mkItem(itemNumber, "3"),
- tRpar,
- mkItem(itemError, `unexpected right paren U+0029 ')'`),
+ mkItem(itemError, "unexpected right paren"),
}},
// Fixed bugs
// collect gathers the emitted items into a slice.
func collect(t *lexTest, left, right string) (items []item) {
- l := lex(t.name, t.input, left, right, true, true, true)
+ l := lex(t.name, t.input, left, right)
+ l.options = lexOptions{
+ emitComment: true,
+ breakOK: true,
+ continueOK: true,
+ }
for {
item := l.nextItem()
items = append(items, item)
items := collect(&test, "", "")
if !equal(items, test.items, false) {
t.Errorf("%s: got\n\t%+v\nexpected\n\t%v", test.name, items, test.items)
+ return // TODO
}
+ t.Log(test.name, "OK")
}
}
}
}
-// Test that an error shuts down the lexing goroutine.
-func TestShutdown(t *testing.T) {
- // We need to duplicate template.Parse here to hold on to the lexer.
- const text = "erroneous{{define}}{{else}}1234"
- lexer := lex("foo", text, "{{", "}}", false, true, true)
- _, err := New("root").parseLexer(lexer)
- if err == nil {
- t.Fatalf("expected error")
- }
- // The error should have drained the input. Therefore, the lexer should be shut down.
- token, ok := <-lexer.items
- if ok {
- t.Fatalf("input was not drained; got %v", token)
- }
-}
-
// parseLexer is a local version of parse that lets us pass in the lexer instead of building it.
// We expect an error, so the tree set and funcs list are explicitly nil.
func (t *Tree) parseLexer(lex *lexer) (tree *Tree, err error) {