lex.go
// Copyright (c) 2014 Alex Kalyvitis
// Portions Copyright (c) 2011 The Go Authors
package mustache
import (
"bytes"
"fmt"
"strings"
"unicode"
"unicode/utf8"
)
// token represents a token or text string returned from the scanner.
type token struct {
typ tokenType
val string
line int
col int
}
// String satisfies the fmt.Stringer interface making it easier to print tokens.
func (i token) String() string {
return fmt.Sprintf("%s:%q", i.typ, i.val)
}
// tokenType identifies the type of lex tokens.
type tokenType int
const (
tokenError tokenType = iota // error occurred; value is text of error
tokenEOF
tokenIdentifier // alphanumeric identifier
tokenLeftDelim // {{ left action delimiter
tokenRightDelim // }} right action delimiter
tokenText // plain text
tokenComment // {{! this is a comment and is ignored}}
tokenSectionStart // {{#foo}} denotes a section start
tokenSectionInverse // {{^foo}} denotes an inverse section start
tokenSectionEnd // {{/foo}} denotes the closing of a section
tokenRawStart // { denotes the beginning of an unencoded identifier
tokenRawEnd // } denotes the end of an unencoded identifier
tokenRawAlt // {{&foo}} is an alternative way to define raw tags
tokenPartial // {{>foo}} denotes a partial
tokenSetDelim // {{={% %}=}} sets delimiters to {% and %}
tokenSetLeftDelim // denotes a custom left delimiter
tokenSetRightDelim // denotes a custom right delimiter
)
// Make the types prettyprint.
var tokenName = map[tokenType]string{
tokenError: "t_error",
tokenEOF: "t_eof",
tokenIdentifier: "t_ident",
tokenLeftDelim: "t_left_delim",
tokenRightDelim: "t_right_delim",
tokenText: "t_text",
tokenComment: "t_comment",
tokenSectionStart: "t_section_start",
tokenSectionInverse: "t_section_inverse",
tokenSectionEnd: "t_section_end",
tokenRawStart: "t_raw_start",
tokenRawEnd: "t_raw_end",
tokenRawAlt: "t_raw_alt",
tokenPartial: "t_partial",
tokenSetDelim: "t_set_delim",
tokenSetLeftDelim: "t_set_left_delim",
tokenSetRightDelim: "t_set_right_delim",
}
// String satisfies the fmt.Stringer interface making it easier to print tokens.
func (i tokenType) String() string {
s := tokenName[i]
if s == "" {
return fmt.Sprintf("t_unknown_%d", int(i))
}
return s
}
const eof = -1
// stateFn represents the state of the scanner as a function that returns the
// next state.
type stateFn func(*lexer) stateFn
// lexer holds the state of the scanner.
type lexer struct {
name string // the name of the input; used only for error reports.
input string // the string being scanned.
leftDelim string // start of action.
rightDelim string // end of action.
state stateFn // the next lexing function to enter.
pos int // current position in the input.
start int // start position of this token.
width int // width of last rune read from input.
tokens chan token // channel of scanned tokens.
}
// next returns the next rune in the input.
func (l *lexer) next() (r rune) {
if l.pos >= len(l.input) {
l.width = 0
return eof
}
r, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
l.pos += l.width
return r
}
// seek advances the pointer by n spaces.
func (l *lexer) seek(n int) {
l.pos += n
}
// peek returns but does not consume the next rune in the input.
func (l *lexer) peek() rune {
r := l.next()
l.backup()
return r
}
// backup steps back one rune. Can only be called once per call of next.
func (l *lexer) backup() {
l.pos -= l.width
}
// emit passes an token back to the client.
func (l *lexer) emit(t tokenType) {
l.tokens <- token{
t,
l.input[l.start:l.pos],
l.lineNum(),
l.columnNum(),
}
l.start = l.pos
}
// ignore skips over the pending input before this point.
func (l *lexer) ignore() {
l.start = l.pos
}
// lineNum reports which line we're on. Doing it this way
// means we don't have to worry about peek double counting.
func (l *lexer) lineNum() int {
return 1 + strings.Count(l.input[:l.pos], "\n")
}
// columnNum reports the character of the current line we're on.
func (l *lexer) columnNum() int {
if lf := strings.LastIndex(l.input[:l.pos], "\n"); lf != -1 {
return len(l.input[lf+1 : l.pos])
}
return len(l.input[:l.pos])
}
// error returns an error token and terminates the scan by passing
// back a nil pointer that will be the next state, terminating l.token.
func (l *lexer) errorf(format string, args ...interface{}) stateFn {
l.tokens <- token{
tokenError,
fmt.Sprintf(format, args...),
l.lineNum(),
l.columnNum(),
}
return nil
}
// token returns the next token from the input.
func (l *lexer) token() token {
for {
select {
case token := <-l.tokens:
return token
default:
l.state = l.state(l)
}
}
}
func (l *lexer) String() string {
w := bytes.NewBuffer(nil)
fmt.Fprintf(w, "Template: %q\n", l.input)
fmt.Fprintf(w, "Index : %q\n", l.pos)
fmt.Fprintf(w, "Current : %q\n", l.input[l.pos])
fmt.Fprintf(w, "Buffer : %q\n", l.input[l.start:l.pos])
return w.String()
}
// newLexer creates a new scanner for the input string.
func newLexer(input, left, right string) *lexer {
l := &lexer{
input: input,
leftDelim: left,
rightDelim: right,
tokens: make(chan token, 2),
}
l.state = stateText // initial state
return l
}
// state functions.
// stateText scans until an opening action delimiter, "{{".
func stateText(l *lexer) stateFn {
for {
// Lookahead for {{ which should switch to lexing an open tag instead of
// regular text tokens.
if strings.HasPrefix(l.input[l.pos:], l.leftDelim) {
if l.pos > l.start {
l.emit(tokenText)
}
return stateLeftDelim
}
// Produce a token and exit the loop if we have reached the end of file.
if l.next() == eof {
break
}
}
// Emit whatever we gathered so far as text.
if l.pos > l.start {
l.emit(tokenText)
}
// Always end with EOF token. The parser will keep asking for tokens until
// an tokenEOF or tokenError token are encountered.
l.emit(tokenEOF)
// The text state doesn't have a default next state.
return nil
}
// stateLeftDelim scans the left delimiter, which is known to be present.
func stateLeftDelim(l *lexer) stateFn {
l.seek(len(l.leftDelim))
if l.peek() == '=' {
// When the lexer encounters "{{=" it proceeds to the set delimiter
// state which alters the left and right delimiters. This operation is
// hidden from the parser and no tokens are emited.
l.next()
return stateSetDelim
}
l.emit(tokenLeftDelim)
return stateTag
}
// stateRightDelim scans the right delimiter, which is known to be present.
func stateRightDelim(l *lexer) stateFn {
l.seek(len(l.rightDelim))
l.emit(tokenRightDelim)
return stateText
}
// stateTag scans the elements inside action delimiters.
func stateTag(l *lexer) stateFn {
if strings.HasPrefix(l.input[l.pos:], "}"+l.rightDelim) {
l.seek(1)
l.emit(tokenRawEnd)
return stateRightDelim
}
if strings.HasPrefix(l.input[l.pos:], l.rightDelim) {
return stateRightDelim
}
switch r := l.next(); {
case r == eof || r == '\n':
return l.errorf("unclosed action")
case whitespace(r):
l.ignore()
case r == '!':
l.emit(tokenComment)
return stateComment
case r == '#':
l.emit(tokenSectionStart)
case r == '^':
l.emit(tokenSectionInverse)
case r == '/':
l.emit(tokenSectionEnd)
case r == '&':
l.emit(tokenRawAlt)
case r == '>':
l.emit(tokenPartial)
case r == '{':
l.emit(tokenRawStart)
case alphanum(r):
l.backup()
return stateIdent
default:
return l.errorf("unrecognized character in action: %#U", r)
}
return stateTag
}
// stateIdent scans an alphanumeric or field.
func stateIdent(l *lexer) stateFn {
Loop:
for {
switch r := l.next(); {
case alphanum(r):
// absorb.
default:
l.backup()
l.emit(tokenIdentifier)
break Loop
}
}
return stateTag
}
// stateComment scans a comment. The left comment marker is known to be present.
func stateComment(l *lexer) stateFn {
i := strings.Index(l.input[l.pos:], l.rightDelim)
if i < 0 {
return l.errorf("unclosed tag")
}
l.seek(i)
l.emit(tokenText)
return stateRightDelim
}
// stateSetDelim scans a set of set delimiter tags and replaces the lexers left
// and right delimiters to new values.
func stateSetDelim(l *lexer) stateFn {
end := "=" + l.rightDelim
i := strings.Index(l.input[l.pos:], end)
if i < 0 {
return l.errorf("unclosed tag")
}
delims := strings.Split(l.input[l.pos:l.pos+i], " ") // " | | "
if len(delims) < 2 {
l.errorf("set delimiters should be separated by a space")
}
delimFn := leftFn
for _, delim := range delims {
if delim != "" {
if delimFn != nil {
delimFn = delimFn(l, delim)
}
}
}
l.seek(i + len(end))
l.ignore()
l.emit(tokenSetDelim)
return stateText
}
// delimFn is a self referencing function which helps with setting the right
// delimiter in the right order.
type delimFn func(l *lexer, s string) delimFn
// leftFn sets the left delimiter to s and returns a rightFn.
func leftFn(l *lexer, s string) delimFn {
l.leftDelim = s
return rightFn
}
// rightFn sets the right delimiter to s.
func rightFn(l *lexer, s string) delimFn {
l.rightDelim = s
return nil
}
// whitespace reports whether r is a space character.
func whitespace(r rune) bool {
switch r {
case ' ', '\t', '\n', '\r':
return true
}
return false
}
// alphanum reports whether r is an alphabetic, digit, or underscore.
func alphanum(r rune) bool {
return r == '_' || r == '.' || unicode.IsLetter(r) || unicode.IsDigit(r)
}