"strings"
)
-type ParseState int
-
-const (
- PSTATE_CONTENT = ParseState(iota)
- PSTATE_ATTRS
-)
-
// contentFunc is a reader over tokens that may return any number of
// arbitrary nodes, like readContent and readAttributes. This is used to provide
// callbacks to nodes where reading of mixed content is interrupted, like with
// templates. This usage typically means a contentFunc passing itself as
// a callback to readTemplate.
-type contentFunc func(until ...TokenType) ([]any, error)
+type contentFunc func() ([]any, error)
// parser is a parser over a token iterator.
// Parsers have two types of readers: content and node. Content readers have a
// may contain variable types of content.
type parser struct {
tokens tokens
+ // The parser tracks what TokenTypes terminate the current parsing sequence
+ // with the "terms" stack. Any TokenType in term should exit the current
+ // sequence. By default, terms is empty and term is EOF.
+ // terms is the stack of terminator sets, not including the current set.
+ terms [][]TokenType
+ // term is the current terminator set.
+ // You should use *parser.shouldTerminate instead of accessing this
+ // directly, as shouldTerminate also manages *parser.lastTerm's state.
+ term []TokenType
+ // lastTerm is the last token for which *parser.shouldTerminate returned
+ // "true".
+ // Some sequences benefit from knowing why a callback/callout terminated,
+ // so this is provided as a convenience for those cases.
+ lastTerm *Token
debug *log.Logger
}
}
}
+// pushTerminator pushes a set of TokenTypes to terminate parse sequences.
+// The purpose of this function is to communicate *across functions* where the
+// current parsing behavior should stop; using it internally in a function to
+// limit a loop can cause confusing behavior on recursive calls.
+//
+// Use *parser.popTerminator after the intended sequence has been exhausted.
+func (p *parser) pushTerminator(term ...TokenType) {
+ term = append(p.term, term...)
+ p.debug.Println("(push) terminating sequences on", term)
+ p.terms = append(p.terms, p.term)
+ p.term = term
+}
+
+func (p *parser) popTerminator() {
+ term := p.terms[len(p.terms)-1]
+ p.debug.Println("(pop) terminating sequences on", term)
+ p.term = term
+ p.terms = p.terms[:len(p.terms)-1]
+}
+
+// shouldTerminate returns true if the current parser state indicates "on"
+// should end the current parsing sequence.
+// This also sets p.lastTerm to on if it returns true, allowing for functions to
+// read why callbacks/callouts terminated their parsing.
+func (p *parser) shouldTerminate(on *Token) bool {
+ if on.Type == EOF || slices.Contains(p.term, on.Type) {
+ p.lastTerm = on
+ return true
+ }
+ return false
+}
+
// anyContent is a content reader for the base types of content in an HTML
// template file; text, HTML, and templates.
-func (p *parser) anyContent(until ...TokenType) ([]any, error) {
- p.debug.Printf("reading content until any %v", until)
+func (p *parser) anyContent() ([]any, error) {
+ p.debug.Printf("reading content until any %v", p.term)
out := make([]any, 0)
var text *TextNode
resolveText := func() {
}
text = nil
}
- for tok := range p.tokens.until(until...) {
+
+ for tok := range p.tokens {
+ done := p.shouldTerminate(tok)
switch tok.Type {
case TEXT, WHITESPACE:
if text == nil {
case TAG_END_OPEN:
p.tokens.discardUntil(TAG_CLOSE)
default:
+ if !done {
+ return nil, fmt.Errorf("unexpected token %s in anyContent", tok)
+ }
+ }
+
+ if done {
+ break
}
}
resolveText()
node := new(TemplateNode)
first := true
-readTemplate:
- for tok := range p.tokens.until(TEMPLATE_CLOSE) {
+sequence:
+ for tok := range p.tokens {
if first && tok.Type == TEMPLATE_KEYWORD {
p.debug.Printf("template starts with keyword '%s'; deferring", tok.Literal)
switch tok.Literal {
case TEXT:
node.Value += " " + tok.Literal
case TEMPLATE_CLOSE:
- break readTemplate
+ break sequence
default:
return nil, fmt.Errorf("unexpected token %s in templateNode", tok)
}
}
node.Condition = strings.TrimSpace(node.Condition)
+ p.pushTerminator(TEMPLATE_OPEN)
acc := make([]any, 0)
isElse := false
readBlock:
for {
- then, err := callback(TEMPLATE_OPEN)
+ then, err := callback()
if err != nil {
return nil, err
}
break readBlock
}
}
+ p.popTerminator()
p.debug.Printf("read template if %s", node)
return node, nil
}
}
node.Expression = strings.TrimSpace(node.Expression)
+ p.pushTerminator(TEMPLATE_OPEN)
acc := make([]any, 0)
for {
- then, err := callback(TEMPLATE_OPEN)
+ then, err := callback()
if err != nil {
return nil, err
}
break
}
}
+ p.popTerminator()
p.debug.Printf("read template with %s", node)
return node, nil
}
}
node.Expression = strings.TrimSpace(node.Expression)
+ p.pushTerminator(TEMPLATE_OPEN)
acc := make([]any, 0)
for {
- then, err := callback(TEMPLATE_OPEN)
+ then, err := callback()
if err != nil {
return nil, err
}
break
}
}
+ p.popTerminator()
p.debug.Printf("read template range %s", node)
return node, nil
}
}
}
-func (p *parser) attrContent(until ...TokenType) ([]any, error) {
+func (p *parser) attrContent() ([]any, error) {
p.debug.Println("reading tag attributes")
attrs := make([]any, 0)
attr := Attribute{}
- for tok := range p.tokens.until(until...) {
+ resolveAttr := func() {
+ if attr.Name == "" {
+ return
+ }
+ if len(attr.Value) == 0 {
+ attr.Boolean = true
+ }
+ attrs = append(attrs, attr)
+ attr = Attribute{}
+ }
+
+ for tok := range p.tokens {
+ done := p.shouldTerminate(tok)
+ fmt.Println(tok, done)
switch tok.Type {
case TEXT:
+ resolveAttr()
attr.Name = tok.Literal
case TAG_EQ:
p.tokens.discardUntil(TAG_QUOTE)
+ p.pushTerminator(TAG_QUOTE)
values, err := p.anyContent()
+ p.popTerminator()
if err != nil {
return nil, err
}
attr.Value = append(attr.Value, values...)
- attrs = append(attrs, attr)
- attr = Attribute{}
+ resolveAttr()
case TEMPLATE_OPEN:
+ resolveAttr()
tmpl, err := p.templateNode(p.attrContent)
if err != nil {
return nil, err
}
+ done = done || p.shouldTerminate(p.lastTerm)
attrs = append(attrs, tmpl)
+ default:
+ if !done {
+ return nil, fmt.Errorf("unexpected token %s in attrContent", tok)
+ }
+ }
+ if done {
+ break
}
}
+ resolveAttr()
+ p.debug.Println("read attributes", attrs)
return attrs, nil
}
node.Name = p.tokens.nextOf(TEXT).Literal
node.void = slices.Contains(voidElems, node.Name)
- attrs, err := p.attrContent(TAG_CLOSE, TAG_VOID_CLOSE)
+ p.pushTerminator(TAG_CLOSE, TAG_VOID_CLOSE)
+ attrs, err := p.attrContent()
+ p.popTerminator()
+ fmt.Println("element", attrs)
if err != nil {
return nil, err
}
if node.void {
return node, nil
}
+ p.pushTerminator(TAG_END_OPEN)
children, err := p.anyContent()
+ p.popTerminator()
if err != nil {
return nil, err
}
expected string
}
-func TestParseBasic(t *testing.T) {
+func TestParseBasicTemplate(t *testing.T) {
tests := []basicParseTest{
{name: "expression", input: "Hello, {{ `template` }}!"},
{name: "if", input: "{{ if .condition }}hello{{ end }}"},
{name: "range", input: "{{ range .messages }}hello{{ end }}"},
{name: "range-nested", input: "{{ range .messages }}{{ . }}{{ end }}"},
}
- /*
- testStrings := map[string]string{
- //"if": "{{ if .condition }}hello{{ end }}",
- //"if-else": "{{ if .condition }}hello{{ else }}{{ .else }}{{ end }}",
- //"if-elif": "{{ if .condition }}hello{{ else if .other }}world{{ end }}",
- //"if-elif-else": "{{ if .condition }}hello{{ else if .other }}foo{{ else }}bar{{ end }}",
- //"with": "{{ with .value }}abc{{ . }}{{ end }}",
- //"html": "<div><p>Hello, HTML!</p><br></div>",
- //"html+template": "{{ if .condition }}<p>{{- .text -}}</p>{{ end }}",
- }
- */
for _, test := range tests {
debug := log.New(os.Stderr, "", log.Lshortfile)
t.Run(test.name, func(t *testing.T) {
}
}
-func TestParseHTML(t *testing.T) {
- testStrings := map[string]string{
- "void": "<p>Hello</p><br><p>World</p>",
- "attrs": `<div class = "outer"><div my-boolean class="inner"></div></div>`,
+func TestParseBasicHTML(t *testing.T) {
+ tests := []basicParseTest{
+ {name: "element", input: "<div>Hello, 1!</div><div>Hello, 2!</div>"},
+ {name: "element-nested", input: "<div>Hello, <div>world!</div></div>"},
+ {name: "element-void-1", input: "<div>Hello<br />World</div>"},
+ {name: "element-void-2", input: "<div>Hello<br>World", expected: "<div>Hello<br />World</div>"},
+ {name: "element-class", input: `<div class="red">Hello, world!</div>`},
+ {name: "element-class-bool", input: `<div my-bool>Hello, world!</div>`},
+ {name: "element-class-bool-multi", input: `<div my-bool1 my-bool2>Hello, world!</div>`},
+ {name: "element-class-combo", input: `<div class="red" my-bool my-bool2 id="test-div">Hello, world!</div>`},
}
- for name, val := range testStrings {
- t.Run(name, func(t *testing.T) {
- doc, err := Parse(strings.NewReader(val))
- t.Log(val)
+ for _, test := range tests {
+ debug := log.New(os.Stderr, "", log.Lshortfile)
+ t.Run(test.name, func(t *testing.T) {
+ parser := newParser(strings.NewReader(test.input), debug)
+ doc, err := parser.docNode()
+ t.Log(test.input)
if err != nil {
t.Fatal(err)
}
t.Log(doc)
+ if test.expected != "" {
+ if doc.String() != test.expected {
+ t.Fatal("result document doesn't match")
+ } else {
+ t.Log("document matched expected output")
+ }
+ } else if doc.String() != test.input {
+ t.Fatal("result document didn't pass roundtrip")
+ } else {
+ t.Log("document passed roundtrip")
+ }
})
}
}
func TestParseComplex(t *testing.T) {
- testStrings := map[string]string{
- "attr-template-value": `<img src="{{ .img }}">`,
- "attr-if-template": `<div {{ if .red }}class="red"{{ end }}></div>`,
- "attr-if-else-template": `<div {{ if .red }}class="red"{{ else }}class="blue"{{ end }}></div>`,
+ tests := []basicParseTest{
+ {name: "attr-expr", input: `<div class="{{ .class }}">Hello, world!</div>`},
+ {name: "attr-if", input: `<div {{ if .is_my_attr }}my-attr{{ end }}>Hello, world!</div>`},
+ {name: "attr-if-else", input: `<div {{ if .is_1 }}my-attr1{{ else }}my-attr2{{ end }}>Hello, world!</div>`},
+ {name: "attr-if-elif", input: `<div {{ if .is_1 }}my-attr{{ else if .is_2 }}my-attr2{{ end }}>Hello, world!</div>`},
+ {name: "attr-if-elif-else", input: `<div {{ if .is_1 }}my-attr{{ else if .is_2 }}my-attr2{{ else }}my-attr3{{ end }}>Hello, world!</div>`},
}
- for name, val := range testStrings {
- t.Run(name, func(t *testing.T) {
- doc, err := Parse(strings.NewReader(val))
- t.Log(val)
+ for _, test := range tests {
+ debug := log.New(os.Stderr, "", log.Lshortfile)
+ t.Run(test.name, func(t *testing.T) {
+ parser := newParser(strings.NewReader(test.input), debug)
+ doc, err := parser.docNode()
+ t.Log(test.input)
if err != nil {
t.Fatal(err)
}
t.Log(doc)
+ if test.expected != "" {
+ if doc.String() != test.expected {
+ t.Fatal("result document doesn't match")
+ } else {
+ t.Log("document matched expected output")
+ }
+ } else if doc.String() != test.input {
+ t.Fatal("result document didn't pass roundtrip")
+ } else {
+ t.Log("document passed roundtrip")
+ }
})
}
}