From: early Date: Sun, 5 Jan 2025 03:47:26 +0000 (-0700) Subject: update parser, but rework incoming X-Git-Url: https://git.earlybird.gay/?a=commitdiff_plain;h=refs%2Fheads%2Finternal-rebuild;p=today update parser, but rework incoming --- diff --git a/web/htmlt/parse/parse.go b/web/htmlt/parse/parse.go index cb0f682..336c24e 100644 --- a/web/htmlt/parse/parse.go +++ b/web/htmlt/parse/parse.go @@ -8,19 +8,12 @@ import ( "strings" ) -type ParseState int - -const ( - PSTATE_CONTENT = ParseState(iota) - PSTATE_ATTRS -) - // contentFunc is a reader over tokens that may return any number of // arbitrary nodes, like readContent and readAttributes. This is used to provide // callbacks to nodes where reading of mixed content is interrupted, like with // templates. This usage typically means a contentFunc passing itself as // a callback to readTemplate. -type contentFunc func(until ...TokenType) ([]any, error) +type contentFunc func() ([]any, error) // parser is a parser over a token iterator. // Parsers have two types of readers: content and node. Content readers have a @@ -30,6 +23,20 @@ type contentFunc func(until ...TokenType) ([]any, error) // may contain variable types of content. type parser struct { tokens tokens + // The parser tracks what TokenTypes terminate the current parsing sequence + // with the "terms" stack. Any TokenType in term should exit the current + // sequence. By default, terms is empty and term is EOF. + // terms is the stack of terminator sets, not including the current set. + terms [][]TokenType + // term is the current terminator set. + // You should use *parser.shouldTerminate instead of accessing this + // directly, as shouldTerminate also manages *parser.lastTerm's state. + term []TokenType + // lastTerm is the last token for which *parser.shouldTerminate returned + // "true". + // Some sequences benefit from knowing why a callback/callout terminated, + // so this is provided as a convenience for those cases. + lastTerm *Token debug *log.Logger } @@ -48,10 +55,42 @@ func newParser(r io.Reader, debug *log.Logger) *parser { } } +// pushTerminator pushes a set of TokenTypes to terminate parse sequences. +// The purpose of this function is to communicate *across functions* where the +// current parsing behavior should stop; using it internally in a function to +// limit a loop can cause confusing behavior on recursive calls. +// +// Use *parser.popTerminator after the intended sequence has been exhausted. +func (p *parser) pushTerminator(term ...TokenType) { + term = append(p.term, term...) + p.debug.Println("(push) terminating sequences on", term) + p.terms = append(p.terms, p.term) + p.term = term +} + +func (p *parser) popTerminator() { + term := p.terms[len(p.terms)-1] + p.debug.Println("(pop) terminating sequences on", term) + p.term = term + p.terms = p.terms[:len(p.terms)-1] +} + +// shouldTerminate returns true if the current parser state indicates "on" +// should end the current parsing sequence. +// This also sets p.lastTerm to on if it returns true, allowing for functions to +// read why callbacks/callouts terminated their parsing. +func (p *parser) shouldTerminate(on *Token) bool { + if on.Type == EOF || slices.Contains(p.term, on.Type) { + p.lastTerm = on + return true + } + return false +} + // anyContent is a content reader for the base types of content in an HTML // template file; text, HTML, and templates. -func (p *parser) anyContent(until ...TokenType) ([]any, error) { - p.debug.Printf("reading content until any %v", until) +func (p *parser) anyContent() ([]any, error) { + p.debug.Printf("reading content until any %v", p.term) out := make([]any, 0) var text *TextNode resolveText := func() { @@ -61,7 +100,9 @@ func (p *parser) anyContent(until ...TokenType) ([]any, error) { } text = nil } - for tok := range p.tokens.until(until...) { + + for tok := range p.tokens { + done := p.shouldTerminate(tok) switch tok.Type { case TEXT, WHITESPACE: if text == nil { @@ -85,6 +126,13 @@ func (p *parser) anyContent(until ...TokenType) ([]any, error) { case TAG_END_OPEN: p.tokens.discardUntil(TAG_CLOSE) default: + if !done { + return nil, fmt.Errorf("unexpected token %s in anyContent", tok) + } + } + + if done { + break } } resolveText() @@ -135,8 +183,8 @@ func (p *parser) templateNode(callback contentFunc) (any, error) { node := new(TemplateNode) first := true -readTemplate: - for tok := range p.tokens.until(TEMPLATE_CLOSE) { +sequence: + for tok := range p.tokens { if first && tok.Type == TEMPLATE_KEYWORD { p.debug.Printf("template starts with keyword '%s'; deferring", tok.Literal) switch tok.Literal { @@ -165,7 +213,7 @@ readTemplate: case TEXT: node.Value += " " + tok.Literal case TEMPLATE_CLOSE: - break readTemplate + break sequence default: return nil, fmt.Errorf("unexpected token %s in templateNode", tok) } @@ -217,11 +265,12 @@ func (p *parser) templateIfNode(callback contentFunc) (*TemplateIf, error) { } node.Condition = strings.TrimSpace(node.Condition) + p.pushTerminator(TEMPLATE_OPEN) acc := make([]any, 0) isElse := false readBlock: for { - then, err := callback(TEMPLATE_OPEN) + then, err := callback() if err != nil { return nil, err } @@ -252,6 +301,7 @@ readBlock: break readBlock } } + p.popTerminator() p.debug.Printf("read template if %s", node) return node, nil } @@ -276,9 +326,10 @@ func (p *parser) templateWithNode(callback contentFunc) (*TemplateWith, error) { } node.Expression = strings.TrimSpace(node.Expression) + p.pushTerminator(TEMPLATE_OPEN) acc := make([]any, 0) for { - then, err := callback(TEMPLATE_OPEN) + then, err := callback() if err != nil { return nil, err } @@ -292,6 +343,7 @@ func (p *parser) templateWithNode(callback contentFunc) (*TemplateWith, error) { break } } + p.popTerminator() p.debug.Printf("read template with %s", node) return node, nil } @@ -316,9 +368,10 @@ func (p *parser) templateRangeNode(callback contentFunc) (*TemplateRange, error) } node.Expression = strings.TrimSpace(node.Expression) + p.pushTerminator(TEMPLATE_OPEN) acc := make([]any, 0) for { - then, err := callback(TEMPLATE_OPEN) + then, err := callback() if err != nil { return nil, err } @@ -332,6 +385,7 @@ func (p *parser) templateRangeNode(callback contentFunc) (*TemplateRange, error) break } } + p.popTerminator() p.debug.Printf("read template range %s", node) return node, nil } @@ -364,31 +418,57 @@ func (attr Attribute) String() string { } } -func (p *parser) attrContent(until ...TokenType) ([]any, error) { +func (p *parser) attrContent() ([]any, error) { p.debug.Println("reading tag attributes") attrs := make([]any, 0) attr := Attribute{} - for tok := range p.tokens.until(until...) { + resolveAttr := func() { + if attr.Name == "" { + return + } + if len(attr.Value) == 0 { + attr.Boolean = true + } + attrs = append(attrs, attr) + attr = Attribute{} + } + + for tok := range p.tokens { + done := p.shouldTerminate(tok) + fmt.Println(tok, done) switch tok.Type { case TEXT: + resolveAttr() attr.Name = tok.Literal case TAG_EQ: p.tokens.discardUntil(TAG_QUOTE) + p.pushTerminator(TAG_QUOTE) values, err := p.anyContent() + p.popTerminator() if err != nil { return nil, err } attr.Value = append(attr.Value, values...) - attrs = append(attrs, attr) - attr = Attribute{} + resolveAttr() case TEMPLATE_OPEN: + resolveAttr() tmpl, err := p.templateNode(p.attrContent) if err != nil { return nil, err } + done = done || p.shouldTerminate(p.lastTerm) attrs = append(attrs, tmpl) + default: + if !done { + return nil, fmt.Errorf("unexpected token %s in attrContent", tok) + } + } + if done { + break } } + resolveAttr() + p.debug.Println("read attributes", attrs) return attrs, nil } @@ -422,7 +502,10 @@ func (p *parser) elementNode() (*ElementNode, error) { node.Name = p.tokens.nextOf(TEXT).Literal node.void = slices.Contains(voidElems, node.Name) - attrs, err := p.attrContent(TAG_CLOSE, TAG_VOID_CLOSE) + p.pushTerminator(TAG_CLOSE, TAG_VOID_CLOSE) + attrs, err := p.attrContent() + p.popTerminator() + fmt.Println("element", attrs) if err != nil { return nil, err } @@ -431,7 +514,9 @@ func (p *parser) elementNode() (*ElementNode, error) { if node.void { return node, nil } + p.pushTerminator(TAG_END_OPEN) children, err := p.anyContent() + p.popTerminator() if err != nil { return nil, err } diff --git a/web/htmlt/parse/parse_test.go b/web/htmlt/parse/parse_test.go index 1137378..f5fccd7 100644 --- a/web/htmlt/parse/parse_test.go +++ b/web/htmlt/parse/parse_test.go @@ -17,7 +17,7 @@ type basicParseTest struct { expected string } -func TestParseBasic(t *testing.T) { +func TestParseBasicTemplate(t *testing.T) { tests := []basicParseTest{ {name: "expression", input: "Hello, {{ `template` }}!"}, {name: "if", input: "{{ if .condition }}hello{{ end }}"}, @@ -33,17 +33,6 @@ func TestParseBasic(t *testing.T) { {name: "range", input: "{{ range .messages }}hello{{ end }}"}, {name: "range-nested", input: "{{ range .messages }}{{ . }}{{ end }}"}, } - /* - testStrings := map[string]string{ - //"if": "{{ if .condition }}hello{{ end }}", - //"if-else": "{{ if .condition }}hello{{ else }}{{ .else }}{{ end }}", - //"if-elif": "{{ if .condition }}hello{{ else if .other }}world{{ end }}", - //"if-elif-else": "{{ if .condition }}hello{{ else if .other }}foo{{ else }}bar{{ end }}", - //"with": "{{ with .value }}abc{{ . }}{{ end }}", - //"html": "

Hello, HTML!


", - //"html+template": "{{ if .condition }}

{{- .text -}}

{{ end }}", - } - */ for _, test := range tests { debug := log.New(os.Stderr, "", log.Lshortfile) t.Run(test.name, func(t *testing.T) { @@ -63,37 +52,71 @@ func TestParseBasic(t *testing.T) { } } -func TestParseHTML(t *testing.T) { - testStrings := map[string]string{ - "void": "

Hello


World

", - "attrs": `
`, +func TestParseBasicHTML(t *testing.T) { + tests := []basicParseTest{ + {name: "element", input: "
Hello, 1!
Hello, 2!
"}, + {name: "element-nested", input: "
Hello,
world!
"}, + {name: "element-void-1", input: "
Hello
World
"}, + {name: "element-void-2", input: "
Hello
World", expected: "
Hello
World
"}, + {name: "element-class", input: `
Hello, world!
`}, + {name: "element-class-bool", input: `
Hello, world!
`}, + {name: "element-class-bool-multi", input: `
Hello, world!
`}, + {name: "element-class-combo", input: `
Hello, world!
`}, } - for name, val := range testStrings { - t.Run(name, func(t *testing.T) { - doc, err := Parse(strings.NewReader(val)) - t.Log(val) + for _, test := range tests { + debug := log.New(os.Stderr, "", log.Lshortfile) + t.Run(test.name, func(t *testing.T) { + parser := newParser(strings.NewReader(test.input), debug) + doc, err := parser.docNode() + t.Log(test.input) if err != nil { t.Fatal(err) } t.Log(doc) + if test.expected != "" { + if doc.String() != test.expected { + t.Fatal("result document doesn't match") + } else { + t.Log("document matched expected output") + } + } else if doc.String() != test.input { + t.Fatal("result document didn't pass roundtrip") + } else { + t.Log("document passed roundtrip") + } }) } } func TestParseComplex(t *testing.T) { - testStrings := map[string]string{ - "attr-template-value": ``, - "attr-if-template": `
`, - "attr-if-else-template": `
`, + tests := []basicParseTest{ + {name: "attr-expr", input: `
Hello, world!
`}, + {name: "attr-if", input: `
Hello, world!
`}, + {name: "attr-if-else", input: `
Hello, world!
`}, + {name: "attr-if-elif", input: `
Hello, world!
`}, + {name: "attr-if-elif-else", input: `
Hello, world!
`}, } - for name, val := range testStrings { - t.Run(name, func(t *testing.T) { - doc, err := Parse(strings.NewReader(val)) - t.Log(val) + for _, test := range tests { + debug := log.New(os.Stderr, "", log.Lshortfile) + t.Run(test.name, func(t *testing.T) { + parser := newParser(strings.NewReader(test.input), debug) + doc, err := parser.docNode() + t.Log(test.input) if err != nil { t.Fatal(err) } t.Log(doc) + if test.expected != "" { + if doc.String() != test.expected { + t.Fatal("result document doesn't match") + } else { + t.Log("document matched expected output") + } + } else if doc.String() != test.input { + t.Fatal("result document didn't pass roundtrip") + } else { + t.Log("document passed roundtrip") + } }) } } diff --git a/web/htmlt/parse/tokens.go b/web/htmlt/parse/tokens.go index 14d7e90..5d58ce3 100644 --- a/web/htmlt/parse/tokens.go +++ b/web/htmlt/parse/tokens.go @@ -75,6 +75,8 @@ func (tt TokenType) String() string { return "TAG_END_OPEN" case TAG_CLOSE: return "TAG_CLOSE" + case TAG_VOID_CLOSE: + return "TAG_VOID_CLOSE" case TAG_EQ: return "TAG_EQ" case TAG_QUOTE: @@ -386,6 +388,7 @@ func (t *tokenizer) next() (*Token, error) { if t.nextToken != nil { next := t.nextToken t.nextToken = nil + t.debug.Printf("got %s", next) return next, nil } switch t.state {