import (
"fmt"
"io"
- "iter"
+ "log"
"slices"
"strings"
)
-type tokens iter.Seq[*Token]
+type ParseState int
-func (toks tokens) nextOf(tts ...TokenType) *Token {
- var out *Token
- for tok := range toks.until(tts...) {
- out = tok
- }
- if slices.Contains(tts, out.Type) {
- return out
- } else {
- return nil
- }
-}
+const (
+ PSTATE_CONTENT = ParseState(iota)
+ PSTATE_ATTRS
+)
-// untilInclusive gets the next N tokens in p, until one is in types, or is EOF.
-func (toks tokens) until(tts ...TokenType) tokens {
- return func(yield func(*Token) bool) {
- for tok := range toks {
- if !yield(tok) || slices.Contains(tts, tok.Type) || tok.Type == EOF {
- return
- }
- }
- }
-}
+// contentFunc is a reader over tokens that may return any number of
+// arbitrary nodes, like readContent and readAttributes. This is used to provide
+// callbacks to nodes where reading of mixed content is interrupted, like with
+// templates. This usage typically means a contentFunc passing itself as
+// a callback to readTemplate.
+type contentFunc func(until ...TokenType) ([]any, error)
-func (toks tokens) discardUntil(tts ...TokenType) {
- for range toks.until(tts...) {
- }
+// parser is a parser over a token iterator.
+// Parsers have two types of readers: content and node. Content readers have a
+// signature that conforms to contentFunc, and may return any number of nodes of
+// types depending on the reader. Node readers return a single node of defined
+// type. Node readers may also accept a callback to a content reader, if they
+// may contain variable types of content.
+type parser struct {
+ tokens tokens
+
+ debug *log.Logger
}
-func (toks tokens) seq() iter.Seq[*Token] {
- return iter.Seq[*Token](toks)
+// newParser returns a new parser with optional debug logging.
+// If debug is nil, log output is discarded. Debug logging is also passed to the
+// underlying tokenizer.
+func newParser(r io.Reader, debug *log.Logger) *parser {
+ if debug == nil {
+ debug = log.New(io.Discard, "", 0)
+ }
+ tokenizer := newTokenizer(r, debug)
+ return &parser{
+ tokens: tokenizer.all(),
+ debug: debug,
+ }
}
-// readContent returns a list of "content tokens", wherever they're acceptable.
-func readContent(toks tokens) ([]any, error) {
+// anyContent is a content reader for the base types of content in an HTML
+// template file; text, HTML, and templates.
+func (p *parser) anyContent(until ...TokenType) ([]any, error) {
+ p.debug.Printf("reading content until any %v", until)
out := make([]any, 0)
var text *TextNode
resolveText := func() {
if text != nil {
+ p.debug.Printf("content resolving text %s", text.Value)
out = append(out, text)
}
text = nil
}
- for tok := range toks {
+ for tok := range p.tokens.until(until...) {
switch tok.Type {
case TEXT, WHITESPACE:
if text == nil {
text.Value += tok.Literal
case TEMPLATE_OPEN:
resolveText()
- node, err := readTemplate(toks)
+ node, err := p.templateNode(p.anyContent)
if err != nil {
return nil, err
}
out = append(out, node)
case TAG_OPEN:
resolveText()
- node, err := readElement(toks)
+ node, err := p.elementNode()
if err != nil {
return nil, err
}
out = append(out, node)
case TAG_END_OPEN:
- toks.discardUntil(TAG_CLOSE)
+ p.tokens.discardUntil(TAG_CLOSE)
default:
}
}
resolveText()
+ p.debug.Printf("read content %v", out)
return out, nil
}
return out
}
-func (d *Document) read(toks tokens) (err error) {
- d.Children, err = readContent(toks)
- return err
+func (p *parser) docNode() (d *Document, err error) {
+ p.debug.Println("reading document")
+ d = new(Document)
+ d.Children, err = p.anyContent()
+ return d, err
}
type TextNode struct {
return fmt.Sprintf("{{ %s }}", node.Value)
}
-// read reads tokens into the TemplateNode.
-func readTemplate(toks tokens) (any, error) {
+// templateNode is a node reader, and it may return a TemplateNode or any of
+// the template "block" nodes (TemplateIf, etc.) if the template contains the
+// corresponding keyword.
+func (p *parser) templateNode(callback contentFunc) (any, error) {
+ p.debug.Println("reading template")
node := new(TemplateNode)
- for tok := range toks.until(TEMPLATE_CLOSE) {
- switch tok.Type {
- case TEMPLATE_KEYWORD:
+ first := true
+
+readTemplate:
+ for tok := range p.tokens.until(TEMPLATE_CLOSE) {
+ if first && tok.Type == TEMPLATE_KEYWORD {
+ p.debug.Printf("template starts with keyword '%s'; deferring", tok.Literal)
switch tok.Literal {
case "if":
- return readTemplateIf(toks)
+ return p.templateIfNode(callback)
case "else":
- next := toks.nextOf(TEMPLATE_KEYWORD, TEMPLATE_CLOSE)
+ next := p.tokens.nextOf(TEMPLATE_KEYWORD, TEMPLATE_CLOSE)
if next.Literal == "if" {
- return readTemplateIf(toks)
+ return p.templateIfNode(callback)
} else if next.Type == TEXT {
- toks.discardUntil(TEMPLATE_CLOSE)
+ p.tokens.discardUntil(TEMPLATE_CLOSE)
}
return &TemplateElse{}, nil
+ case "with":
+ return p.templateWithNode(callback)
case "end":
- toks.discardUntil(TEMPLATE_CLOSE)
+ p.tokens.discardUntil(TEMPLATE_CLOSE)
return &TemplateEnd{}, nil
default:
return nil, fmt.Errorf("unrecognized template keyword %s", tok.Literal)
}
+ }
+ switch tok.Type {
case TEXT:
node.Value += " " + tok.Literal
+ case TEMPLATE_CLOSE:
+ break readTemplate
+ default:
+ return nil, fmt.Errorf("unexpected token %s in templateNode", tok)
}
}
node.Value = strings.TrimSpace(node.Value)
+ p.debug.Printf("read template expression %s", node)
return node, nil
}
type TemplateElse struct{}
+
+func (node *TemplateElse) String() string {
+ return "TemplateElse"
+}
+
type TemplateEnd struct{}
+func (node *TemplateEnd) String() string {
+ return "TemplateEnd"
+}
+
type TemplateIf struct {
Condition string
Then []any
return fmt.Sprintf("{{ if %s }}%s%s%s{{ end }}", node.Condition, JoinAny(node.Then, ""), elifStr, elseStr)
}
-func readTemplateIf(toks tokens) (*TemplateIf, error) {
+func (p *parser) templateIfNode(callback contentFunc) (*TemplateIf, error) {
+ p.debug.Println("reading template if")
node := new(TemplateIf)
- for tok := range toks.until(TEMPLATE_CLOSE) {
+ for tok := range p.tokens.until(TEMPLATE_CLOSE) {
switch tok.Type {
case TEXT, WHITESPACE:
node.Condition += tok.Literal
}
}
node.Condition = strings.TrimSpace(node.Condition)
+
+ acc := make([]any, 0)
isElse := false
+readBlock:
for {
- then, err := readContent(toks.until(TEMPLATE_OPEN))
+ then, err := callback(TEMPLATE_OPEN)
if err != nil {
return nil, err
}
if len(then) == 0 {
break
}
- term := then[len(then)-1]
- body := then[:len(then)-1]
- switch v := term.(type) {
+ acc = append(acc, then...)
+ last := acc[len(acc)-1]
+ switch v := last.(type) {
case *TemplateIf:
- node.Then = append(node.Then, body...)
+ node.Then = acc[:len(acc)-1]
+ acc = make([]any, 0)
// Elifs steal the else. Take it back.
- node.Else = append(node.Else, v.Else...)
+ node.Else = v.Else
v.Else = nil
node.Elif = append(node.Elif, v)
case *TemplateElse:
- node.Then = append(node.Then, body...)
+ node.Then = acc[:len(acc)-1]
+ acc = make([]any, 0)
isElse = true
case *TemplateEnd:
if !isElse {
- node.Then = body
+ node.Then = acc[:len(acc)-1]
} else {
- node.Else = body
+ node.Else = acc[:len(acc)-1]
isElse = false
}
- default:
+ break readBlock
+ }
+ }
+ p.debug.Printf("read template if %s", node)
+ return node, nil
+}
+
+type TemplateWith struct {
+ Expression string
+ Content []any
+}
+
+func (node *TemplateWith) String() string {
+ return fmt.Sprintf("{{ with %s }}%s{{ end }}", node.Expression, fmt.Sprint(node.Content...))
+}
+
+func (p *parser) templateWithNode(callback contentFunc) (*TemplateWith, error) {
+ p.debug.Println("reading template with")
+ node := new(TemplateWith)
+ for tok := range p.tokens.until(TEMPLATE_CLOSE) {
+ switch tok.Type {
+ case TEXT, WHITESPACE:
+ node.Expression += tok.Literal
}
}
+ node.Expression = strings.TrimSpace(node.Expression)
+
+ acc := make([]any, 0)
+ for {
+ then, err := callback(TEMPLATE_OPEN)
+ if err != nil {
+ return nil, err
+ }
+ if len(then) == 0 {
+ break
+ }
+ acc = append(acc, then...)
+ last := acc[len(acc)-1]
+ if _, ok := last.(*TemplateEnd); ok {
+ node.Content = acc[:len(acc)-1]
+ break
+ }
+ }
+ p.debug.Printf("read template with %s", node)
return node, nil
}
type Attribute struct {
Name string
- Value string
+ Value []any
// Boolean is true if attr is a "boolean attribute"; it has no string value
// in the source HTML, so it just represents true-if-present. Boolean is
// *not* the value of the boolean attribute.
if attr.Boolean {
return attr.Name
} else {
- return fmt.Sprintf(`%s="%s"`, attr.Name, attr.Value)
+ valStrs := make([]string, len(attr.Value))
+ for i, val := range attr.Value {
+ valStrs[i] = fmt.Sprint(val)
+ }
+ return fmt.Sprintf(`%s="%s"`, attr.Name, strings.Join(valStrs, " "))
+ }
+}
+
+func (p *parser) attrContent(until ...TokenType) ([]any, error) {
+ p.debug.Println("reading tag attributes")
+ attrs := make([]any, 0)
+ attr := Attribute{}
+ for tok := range p.tokens.until(until...) {
+ switch tok.Type {
+ case TEXT:
+ attr.Name = tok.Literal
+ case TAG_EQ:
+ p.tokens.discardUntil(TAG_QUOTE)
+ values, err := p.anyContent()
+ if err != nil {
+ return nil, err
+ }
+ attr.Value = append(attr.Value, values...)
+ attrs = append(attrs, attr)
+ attr = Attribute{}
+ case TEMPLATE_OPEN:
+ tmpl, err := p.templateNode(p.attrContent)
+ if err != nil {
+ return nil, err
+ }
+ attrs = append(attrs, tmpl)
+ }
}
+ return attrs, nil
}
type ElementNode struct {
Name string
- Attributes []Attribute
+ Attributes []any
Children []any
void bool
}
func (node *ElementNode) String() string {
- attrsRaw := make([]string, len(node.Attributes))
- for i, attr := range node.Attributes {
- attrsRaw[i] = attr.String()
- }
- attrs := ""
- if len(attrsRaw) > 0 {
- attrs = strings.Join(attrsRaw, " ")
+ attrs := fmt.Sprint(node.Attributes...)
+ if attrs != "" {
+ attrs = " " + attrs
}
if node.void {
- return fmt.Sprintf("<%s %s />", node.Name, attrs)
+ return fmt.Sprintf("<%s%s />", node.Name, attrs)
} else {
inner := ""
for _, child := range node.Children {
inner += fmt.Sprint(child)
}
- return fmt.Sprintf("<%s %s >%s</%s>", node.Name, attrs, inner, node.Name)
+ return fmt.Sprintf("<%s%s>%s</%s>", node.Name, attrs, inner, node.Name)
}
}
-func readElement(toks tokens) (*ElementNode, error) {
+func (p *parser) elementNode() (*ElementNode, error) {
+ p.debug.Println("reading element")
node := new(ElementNode)
- node.Name = toks.nextOf(TEXT).Literal
+ node.Name = p.tokens.nextOf(TEXT).Literal
node.void = slices.Contains(voidElems, node.Name)
- tagToks := toks.until(TAG_CLOSE, TAG_VOID_CLOSE)
- next := tagToks.nextOf(TEXT)
- for next != nil {
- name := next.Literal
- next = tagToks.nextOf(TEXT, TAG_EQ)
- // If it's text, this is a boolean attribute. Otherwise, it has a value.
- if next == nil || next.Type == TEXT {
- node.Attributes = append(node.Attributes, Attribute{
- Name: name,
- Boolean: true,
- })
- } else {
- value := ""
- // Advance to the start of the value...
- tagToks.discardUntil(TAG_QUOTE)
- // Then read until the end.
- for tok := range tagToks.until(TAG_QUOTE) {
- switch tok.Type {
- case TEXT:
- value += tok.Literal
- }
- }
- node.Attributes = append(node.Attributes, Attribute{
- Name: name,
- Value: value,
- })
- next = tagToks.nextOf(TEXT)
- }
+ attrs, err := p.attrContent(TAG_CLOSE, TAG_VOID_CLOSE)
+ if err != nil {
+ return nil, err
}
+ node.Attributes = attrs
if node.void {
return node, nil
}
- children, err := readContent(toks.until(TAG_END_OPEN))
+ children, err := p.anyContent()
if err != nil {
return nil, err
}
}
func Parse(r io.Reader) (any, error) {
- doc := new(Document)
- err := doc.read(Tokenize(r))
- return doc, err
+ parser := newParser(r, nil)
+ return parser.docNode()
}
package parse
import (
- "fmt"
- "slices"
+ "log"
+ "os"
"strings"
"testing"
)
+// basicParseTest is a parser test.
+// if expected is given, the test result for input, stringified, is compared
+// to it; otherwise, input is expected to pass a roundtrip.
+type basicParseTest struct {
+ name string
+ input string
+
+ expected string
+}
+
func TestParseBasic(t *testing.T) {
- testStrings := map[string]string{
- "hello": "Hello, World!",
- "template": "Hello, {{ `template` }}!",
- "html": "<div><p>Hello, HTML!</p><br></div>",
- "html+template": "{{ if .condition }}<p>{{- .text -}}</p>{{ end }}",
+ tests := []basicParseTest{
+ {name: "expression", input: "Hello, {{ `template` }}!"},
+ {name: "if", input: "{{ if .condition }}hello{{ end }}"},
+ {name: "if-nested", input: "{{ if .message }}{{ .message }}{{ end }}"},
+ {name: "if-else", input: "{{ if .condition }}foo{{ else }}bar{{ end }}"},
+ {name: "if-else-nested", input: "{{ if .m1 }}{{ .m1 }}{{ else }}{{ .m2 }}{{ end }}"},
+ {name: "if-elif", input: "{{ if .c1 }}foo{{ else if .c2 }}bar{{ end }}"},
+ {name: "if-elif-nested", input: "{{ if .m1 }}{{ .m1 }}{{ else if .m2 }}{{ .m2 }}{{ end }}"},
+ {name: "if-elif-else", input: "{{ if .c1 }}foo{{ else if .c2 }}bar{{ else }}baz{{ end }}"},
+ {name: "if-elif-else-nested", input: "{{ if .m1 }}{{ .m1 }}{{ else if m2 }}{{ .m2 }}{{ else }}{{ .m3 }}{{ end }}"},
+ {name: "with", input: "{{ with .message }}hello{{ end }}"},
+ {name: "with-nested", input: "{{ with .message }}{{ . }}{{ end }}"},
}
- for name, val := range testStrings {
- t.Run(name, func(t *testing.T) {
- doc, err := Parse(strings.NewReader(val))
- t.Log(val)
+ /*
+ testStrings := map[string]string{
+ //"if": "{{ if .condition }}hello{{ end }}",
+ //"if-else": "{{ if .condition }}hello{{ else }}{{ .else }}{{ end }}",
+ //"if-elif": "{{ if .condition }}hello{{ else if .other }}world{{ end }}",
+ //"if-elif-else": "{{ if .condition }}hello{{ else if .other }}foo{{ else }}bar{{ end }}",
+ //"with": "{{ with .value }}abc{{ . }}{{ end }}",
+ //"html": "<div><p>Hello, HTML!</p><br></div>",
+ //"html+template": "{{ if .condition }}<p>{{- .text -}}</p>{{ end }}",
+ }
+ */
+ for _, test := range tests {
+ debug := log.New(os.Stderr, "", log.Lshortfile)
+ t.Run(test.name, func(t *testing.T) {
+ parser := newParser(strings.NewReader(test.input), debug)
+ doc, err := parser.docNode()
+ t.Log(test.input)
if err != nil {
t.Fatal(err)
}
t.Log(doc)
+ if test.expected != "" && doc.String() != test.expected {
+ t.Fatal("result document doesn't match")
+ } else if doc.String() != test.input {
+ t.Fatal("result document didn't pass roundtrip")
+ }
})
}
}
})
}
}
-func TestParseTemplate(t *testing.T) {
- testStrings := map[string]string{
- "if": "{{ if .condition }}Hello{{ end }}",
- "if-else": "{{ if .condition }}Hello{{ else }}World{{ end }}",
- "if-elif": "{{ if .condition }}Hello{{ else if .other.condition }}World{{ end }}",
- "if-elif-else": "{{ if .condition }}One{{ else if .other.condition }}Two{{ else }}Three{{ end }}",
- }
- for name, val := range testStrings {
- t.Run(name, func(t *testing.T) {
- doc, err := Parse(strings.NewReader(val))
- t.Log(val)
- if err != nil {
- t.Fatal(err)
- }
- t.Log(doc)
- })
- }
-}
func TestParseComplex(t *testing.T) {
testStrings := map[string]string{
- "template-attr": `<img src="{{ .img }}">`,
+ "attr-template-value": `<img src="{{ .img }}">`,
+ "attr-if-template": `<div {{ if .red }}class="red"{{ end }}></div>`,
+ "attr-if-else-template": `<div {{ if .red }}class="red"{{ else }}class="blue"{{ end }}></div>`,
}
for name, val := range testStrings {
t.Run(name, func(t *testing.T) {
- toks := slices.Collect(Tokenize(strings.NewReader(val)).seq())
- fmt.Println(toks)
doc, err := Parse(strings.NewReader(val))
t.Log(val)
if err != nil {
"errors"
"fmt"
"io"
+ "iter"
+ "log"
+ "slices"
"strings"
)
type TokenState int
const (
- STATE_CONTENT = TokenState(iota)
- STATE_TEMPLATE
- STATE_TAG
+ TSTATE_CONTENT = TokenState(iota)
+ TSTATE_TEMPLATE
+ TSTATE_TAG
+ TSTATE_ATTRS
)
func (ts TokenState) String() string {
switch ts {
- case STATE_CONTENT:
+ case TSTATE_CONTENT:
return "STATE_CONTENT"
- case STATE_TEMPLATE:
+ case TSTATE_TEMPLATE:
return "STATE_TEMPLATE"
- case STATE_TAG:
+ case TSTATE_TAG:
return "STATE_TAG"
+ case TSTATE_ATTRS:
+ return "STATE_ATTRS"
default:
return "STATE_UNKNOWN"
}
}
type tokenizer struct {
- r *bufio.Reader
- state TokenState
- nextToken *Token
- err error
+ r *bufio.Reader
+ state TokenState
+ prevStates []TokenState
+ nextToken *Token
+ err error
+
+ debug *log.Logger
+}
+
+func newTokenizer(r io.Reader, debug *log.Logger) *tokenizer {
+ if debug == nil {
+ debug = log.New(io.Discard, "", 0)
+ }
+ return &tokenizer{
+ r: bufio.NewReader(r),
+ debug: debug,
+ }
}
// peek returns the next i characters, and "true" if i characters were found.
t.r.Discard(i)
}
+func (t *tokenizer) setState(ts TokenState) {
+ t.debug.Printf("pushing tokenizer state %s", ts)
+ t.prevStates = append(t.prevStates, t.state)
+ t.state = ts
+}
+
+func (t *tokenizer) revertState() {
+ if len(t.prevStates) > 0 {
+ t.state = t.prevStates[len(t.prevStates)-1]
+ t.prevStates = t.prevStates[:len(t.prevStates)-1]
+ t.debug.Printf("popping tokenizer state %s", t.state)
+ }
+}
+
func (t *tokenizer) nextContent() (*Token, error) {
acc := make([]rune, 100)
cursor := 0
switch token {
case "{{-":
out := &Token{TEXT, string(acc[:cursor])}
- t.state = STATE_TEMPLATE
+ t.setState(TSTATE_TEMPLATE)
return out, nil
}
}
switch token {
case "{{":
out := &Token{TEXT, string(acc[:cursor])}
- t.state = STATE_TEMPLATE
+ t.setState(TSTATE_TEMPLATE)
return out, nil
case "</":
out := &Token{TEXT, string(acc[:cursor])}
- t.state = STATE_TAG
+ t.setState(TSTATE_TAG)
return out, nil
}
}
switch token {
case "<":
out := &Token{TEXT, string(acc[:cursor])}
- t.state = STATE_TAG
+ t.setState(TSTATE_TAG)
return out, nil
}
}
cursor++
}
for {
- if token, ok := t.peek(5); ok {
- switch token {
- case "range":
- out := &Token{TEMPLATE_KEYWORD, token}
- t.advance(5)
- return out, nil
- }
- }
- if token, ok := t.peek(4); ok {
- switch token {
- case "with":
- out := &Token{TEMPLATE_KEYWORD, token}
- t.advance(4)
- return out, nil
- case "else":
- out := &Token{TEMPLATE_KEYWORD, token}
- t.advance(4)
- return out, nil
- }
- }
if token, ok := t.peek(3); ok {
switch token {
- case "end":
- out := &Token{TEMPLATE_KEYWORD, token}
- t.advance(3)
- return out, nil
case "{{-":
out := &Token{TEMPLATE_OPEN, token}
t.advance(3)
return out, nil
case "-}}":
- out := &Token{TEMPLATE_CLOSE, token}
+ out := &Token{TEXT, string(acc[:cursor])}
+ t.nextToken = &Token{TEMPLATE_CLOSE, token}
t.advance(3)
- t.state = STATE_CONTENT
+ t.revertState()
return out, nil
}
}
if token, ok := t.peek(2); ok {
switch token {
- case "if":
- out := &Token{TEMPLATE_KEYWORD, token}
- t.advance(2)
- return out, nil
case "{{":
out := &Token{TEMPLATE_OPEN, token}
t.advance(2)
return out, nil
case "}}":
- out := &Token{TEMPLATE_CLOSE, token}
+ out := &Token{TEXT, string(acc[:cursor])}
+ t.nextToken = &Token{TEMPLATE_CLOSE, token}
t.advance(2)
- t.state = STATE_CONTENT
+ t.revertState()
return out, nil
}
}
return nil, err
}
if strings.ContainsRune(whitespace, r) {
- return &Token{TEXT, string(acc[:cursor])}, nil
+ token := string(acc[:cursor])
+ switch token {
+ case "if", "else", "with", "range", "end":
+ return &Token{TEMPLATE_KEYWORD, token}, nil
+ default:
+ return &Token{TEXT, token}, nil
+ }
} else {
accumulate(r)
}
out := &Token{TEXT, string(acc[:cursor])}
t.nextToken = &Token{TAG_VOID_CLOSE, token}
t.advance(2)
- t.state = STATE_CONTENT
+ t.setState(TSTATE_CONTENT)
return out, nil
}
}
out := &Token{TEXT, string(acc[:cursor])}
t.nextToken = &Token{TAG_CLOSE, token}
t.advance(1)
- t.state = STATE_CONTENT
+ t.setState(TSTATE_CONTENT)
+ return out, nil
+ }
+ }
+ r, _, err := t.r.ReadRune()
+ if errors.Is(err, io.EOF) {
+ return &Token{EOF, ""}, nil
+ } else if err != nil {
+ return nil, err
+ }
+ if strings.ContainsRune(whitespace, r) {
+ if cursor > 0 {
+ t.setState(TSTATE_ATTRS)
+ }
+ return &Token{TEXT, string(acc[:cursor])}, nil
+ } else {
+ accumulate(r)
+ }
+ }
+}
+
+func (t *tokenizer) nextAttrs() (*Token, error) {
+ acc := make([]rune, 10)
+ cursor := 0
+ accumulate := func(r rune) {
+ if cursor >= len(acc) {
+ acc = append(acc, make([]rune, 10)...)
+ }
+ acc[cursor] = r
+ cursor++
+ }
+ for {
+ if token, ok := t.peek(2); ok {
+ switch token {
+ case "{{":
+ out := &Token{TEXT, string(acc[:cursor])}
+ t.setState(TSTATE_TEMPLATE)
+ return out, nil
+ case "/>":
+ out := &Token{TEXT, string(acc[:cursor])}
+ t.setState(TSTATE_TAG)
+ return out, nil
+ }
+ }
+ if token, ok := t.peek(1); ok {
+ switch token {
+ case "<":
+ out := &Token{TAG_OPEN, token}
+ t.advance(1)
+ return out, nil
+ case "=":
+ out := &Token{TEXT, string(acc[:cursor])}
+ t.nextToken = &Token{TAG_EQ, token}
+ t.advance(1)
+ return out, nil
+ case `"`:
+ out := &Token{TEXT, string(acc[:cursor])}
+ t.nextToken = &Token{TAG_QUOTE, token}
+ t.advance(1)
+ return out, nil
+ case ">":
+ out := &Token{TEXT, string(acc[:cursor])}
+ t.setState(TSTATE_TAG)
return out, nil
}
}
}
}
-func (t *tokenizer) next2() (*Token, error) {
+func (t *tokenizer) next() (*Token, error) {
var next *Token
var err error
for next == nil && err == nil {
return next, nil
}
switch t.state {
- case STATE_CONTENT:
+ case TSTATE_CONTENT:
next, err = t.nextContent()
- case STATE_TEMPLATE:
+ case TSTATE_TEMPLATE:
next, err = t.nextTemplate()
- case STATE_TAG:
+ case TSTATE_TAG:
next, err = t.nextTag()
+ case TSTATE_ATTRS:
+ next, err = t.nextAttrs()
default:
return nil, fmt.Errorf("unknown state %s", t.state)
}
next = nil
}
}
+ t.debug.Printf("got %s", next)
return next, err
}
+type tokens iter.Seq[*Token]
+
// all returns an iterator over all tokens produced by the tokenizer.
// Stops iterating on EOF or error.
func (t *tokenizer) all() tokens {
if t.err != nil {
return
}
- tok, err := t.next2()
+ tok, err := t.next()
if err != nil {
yield(&Token{ERROR, t.err.Error()})
break
}
}
-func Tokenize(r io.Reader) tokens {
- tkns := new(tokenizer)
- tkns.r = bufio.NewReader(r)
- return tkns.all()
+func (toks tokens) nextOf(tts ...TokenType) *Token {
+ var out *Token
+ for tok := range toks.until(tts...) {
+ out = tok
+ }
+ if out != nil && slices.Contains(tts, out.Type) {
+ return out
+ } else {
+ return nil
+ }
+}
+
+// untilInclusive gets the next N tokens in p, until one is in types, or is EOF.
+func (toks tokens) until(tts ...TokenType) tokens {
+ return func(yield func(*Token) bool) {
+ for tok := range toks {
+ if !yield(tok) || slices.Contains(tts, tok.Type) || tok.Type == EOF {
+ return
+ }
+ }
+ }
+}
+
+func (toks tokens) discardUntil(tts ...TokenType) {
+ for range toks.until(tts...) {
+ }
+}
+
+func (toks tokens) seq() iter.Seq[*Token] {
+ return iter.Seq[*Token](toks)
}