From f499d9b9ce5a0285098af3f8987fe16fae545b02 Mon Sep 17 00:00:00 2001 From: early Date: Mon, 30 Dec 2024 22:54:36 -0700 Subject: [PATCH] improve testing/debugging; fix discrepancy between with/if parsing --- .local/.gitignore | 3 + .local/README | 1 + web/htmlt/parse/parse.go | 297 +++++++++++++++++++++------------ web/htmlt/parse/parse_test.go | 81 +++++---- web/htmlt/parse/tokens.go | 222 +++++++++++++++++------- web/htmlt/parse/tokens_test.go | 6 +- 6 files changed, 414 insertions(+), 196 deletions(-) create mode 100644 .local/.gitignore create mode 100644 .local/README diff --git a/.local/.gitignore b/.local/.gitignore new file mode 100644 index 0000000..4a16a23 --- /dev/null +++ b/.local/.gitignore @@ -0,0 +1,3 @@ +* +!README +!.gitignore \ No newline at end of file diff --git a/.local/README b/.local/README new file mode 100644 index 0000000..f72f32b --- /dev/null +++ b/.local/README @@ -0,0 +1 @@ +This folder is safe to save your local artifacts in! \ No newline at end of file diff --git a/web/htmlt/parse/parse.go b/web/htmlt/parse/parse.go index e3786bd..6c1e630 100644 --- a/web/htmlt/parse/parse.go +++ b/web/htmlt/parse/parse.go @@ -3,56 +3,65 @@ package parse import ( "fmt" "io" - "iter" + "log" "slices" "strings" ) -type tokens iter.Seq[*Token] +type ParseState int -func (toks tokens) nextOf(tts ...TokenType) *Token { - var out *Token - for tok := range toks.until(tts...) { - out = tok - } - if slices.Contains(tts, out.Type) { - return out - } else { - return nil - } -} +const ( + PSTATE_CONTENT = ParseState(iota) + PSTATE_ATTRS +) -// untilInclusive gets the next N tokens in p, until one is in types, or is EOF. -func (toks tokens) until(tts ...TokenType) tokens { - return func(yield func(*Token) bool) { - for tok := range toks { - if !yield(tok) || slices.Contains(tts, tok.Type) || tok.Type == EOF { - return - } - } - } -} +// contentFunc is a reader over tokens that may return any number of +// arbitrary nodes, like readContent and readAttributes. This is used to provide +// callbacks to nodes where reading of mixed content is interrupted, like with +// templates. This usage typically means a contentFunc passing itself as +// a callback to readTemplate. +type contentFunc func(until ...TokenType) ([]any, error) -func (toks tokens) discardUntil(tts ...TokenType) { - for range toks.until(tts...) { - } +// parser is a parser over a token iterator. +// Parsers have two types of readers: content and node. Content readers have a +// signature that conforms to contentFunc, and may return any number of nodes of +// types depending on the reader. Node readers return a single node of defined +// type. Node readers may also accept a callback to a content reader, if they +// may contain variable types of content. +type parser struct { + tokens tokens + + debug *log.Logger } -func (toks tokens) seq() iter.Seq[*Token] { - return iter.Seq[*Token](toks) +// newParser returns a new parser with optional debug logging. +// If debug is nil, log output is discarded. Debug logging is also passed to the +// underlying tokenizer. +func newParser(r io.Reader, debug *log.Logger) *parser { + if debug == nil { + debug = log.New(io.Discard, "", 0) + } + tokenizer := newTokenizer(r, debug) + return &parser{ + tokens: tokenizer.all(), + debug: debug, + } } -// readContent returns a list of "content tokens", wherever they're acceptable. -func readContent(toks tokens) ([]any, error) { +// anyContent is a content reader for the base types of content in an HTML +// template file; text, HTML, and templates. +func (p *parser) anyContent(until ...TokenType) ([]any, error) { + p.debug.Printf("reading content until any %v", until) out := make([]any, 0) var text *TextNode resolveText := func() { if text != nil { + p.debug.Printf("content resolving text %s", text.Value) out = append(out, text) } text = nil } - for tok := range toks { + for tok := range p.tokens.until(until...) { switch tok.Type { case TEXT, WHITESPACE: if text == nil { @@ -61,24 +70,25 @@ func readContent(toks tokens) ([]any, error) { text.Value += tok.Literal case TEMPLATE_OPEN: resolveText() - node, err := readTemplate(toks) + node, err := p.templateNode(p.anyContent) if err != nil { return nil, err } out = append(out, node) case TAG_OPEN: resolveText() - node, err := readElement(toks) + node, err := p.elementNode() if err != nil { return nil, err } out = append(out, node) case TAG_END_OPEN: - toks.discardUntil(TAG_CLOSE) + p.tokens.discardUntil(TAG_CLOSE) default: } } resolveText() + p.debug.Printf("read content %v", out) return out, nil } @@ -94,9 +104,11 @@ func (d *Document) String() string { return out } -func (d *Document) read(toks tokens) (err error) { - d.Children, err = readContent(toks) - return err +func (p *parser) docNode() (d *Document, err error) { + p.debug.Println("reading document") + d = new(Document) + d.Children, err = p.anyContent() + return d, err } type TextNode struct { @@ -115,40 +127,64 @@ func (node *TemplateNode) String() string { return fmt.Sprintf("{{ %s }}", node.Value) } -// read reads tokens into the TemplateNode. -func readTemplate(toks tokens) (any, error) { +// templateNode is a node reader, and it may return a TemplateNode or any of +// the template "block" nodes (TemplateIf, etc.) if the template contains the +// corresponding keyword. +func (p *parser) templateNode(callback contentFunc) (any, error) { + p.debug.Println("reading template") node := new(TemplateNode) - for tok := range toks.until(TEMPLATE_CLOSE) { - switch tok.Type { - case TEMPLATE_KEYWORD: + first := true + +readTemplate: + for tok := range p.tokens.until(TEMPLATE_CLOSE) { + if first && tok.Type == TEMPLATE_KEYWORD { + p.debug.Printf("template starts with keyword '%s'; deferring", tok.Literal) switch tok.Literal { case "if": - return readTemplateIf(toks) + return p.templateIfNode(callback) case "else": - next := toks.nextOf(TEMPLATE_KEYWORD, TEMPLATE_CLOSE) + next := p.tokens.nextOf(TEMPLATE_KEYWORD, TEMPLATE_CLOSE) if next.Literal == "if" { - return readTemplateIf(toks) + return p.templateIfNode(callback) } else if next.Type == TEXT { - toks.discardUntil(TEMPLATE_CLOSE) + p.tokens.discardUntil(TEMPLATE_CLOSE) } return &TemplateElse{}, nil + case "with": + return p.templateWithNode(callback) case "end": - toks.discardUntil(TEMPLATE_CLOSE) + p.tokens.discardUntil(TEMPLATE_CLOSE) return &TemplateEnd{}, nil default: return nil, fmt.Errorf("unrecognized template keyword %s", tok.Literal) } + } + switch tok.Type { case TEXT: node.Value += " " + tok.Literal + case TEMPLATE_CLOSE: + break readTemplate + default: + return nil, fmt.Errorf("unexpected token %s in templateNode", tok) } } node.Value = strings.TrimSpace(node.Value) + p.debug.Printf("read template expression %s", node) return node, nil } type TemplateElse struct{} + +func (node *TemplateElse) String() string { + return "TemplateElse" +} + type TemplateEnd struct{} +func (node *TemplateEnd) String() string { + return "TemplateEnd" +} + type TemplateIf struct { Condition string Then []any @@ -168,46 +204,93 @@ func (node *TemplateIf) String() string { return fmt.Sprintf("{{ if %s }}%s%s%s{{ end }}", node.Condition, JoinAny(node.Then, ""), elifStr, elseStr) } -func readTemplateIf(toks tokens) (*TemplateIf, error) { +func (p *parser) templateIfNode(callback contentFunc) (*TemplateIf, error) { + p.debug.Println("reading template if") node := new(TemplateIf) - for tok := range toks.until(TEMPLATE_CLOSE) { + for tok := range p.tokens.until(TEMPLATE_CLOSE) { switch tok.Type { case TEXT, WHITESPACE: node.Condition += tok.Literal } } node.Condition = strings.TrimSpace(node.Condition) + + acc := make([]any, 0) isElse := false +readBlock: for { - then, err := readContent(toks.until(TEMPLATE_OPEN)) + then, err := callback(TEMPLATE_OPEN) if err != nil { return nil, err } if len(then) == 0 { break } - term := then[len(then)-1] - body := then[:len(then)-1] - switch v := term.(type) { + acc = append(acc, then...) + last := acc[len(acc)-1] + switch v := last.(type) { case *TemplateIf: - node.Then = append(node.Then, body...) + node.Then = acc[:len(acc)-1] + acc = make([]any, 0) // Elifs steal the else. Take it back. - node.Else = append(node.Else, v.Else...) + node.Else = v.Else v.Else = nil node.Elif = append(node.Elif, v) case *TemplateElse: - node.Then = append(node.Then, body...) + node.Then = acc[:len(acc)-1] + acc = make([]any, 0) isElse = true case *TemplateEnd: if !isElse { - node.Then = body + node.Then = acc[:len(acc)-1] } else { - node.Else = body + node.Else = acc[:len(acc)-1] isElse = false } - default: + break readBlock + } + } + p.debug.Printf("read template if %s", node) + return node, nil +} + +type TemplateWith struct { + Expression string + Content []any +} + +func (node *TemplateWith) String() string { + return fmt.Sprintf("{{ with %s }}%s{{ end }}", node.Expression, fmt.Sprint(node.Content...)) +} + +func (p *parser) templateWithNode(callback contentFunc) (*TemplateWith, error) { + p.debug.Println("reading template with") + node := new(TemplateWith) + for tok := range p.tokens.until(TEMPLATE_CLOSE) { + switch tok.Type { + case TEXT, WHITESPACE: + node.Expression += tok.Literal } } + node.Expression = strings.TrimSpace(node.Expression) + + acc := make([]any, 0) + for { + then, err := callback(TEMPLATE_OPEN) + if err != nil { + return nil, err + } + if len(then) == 0 { + break + } + acc = append(acc, then...) + last := acc[len(acc)-1] + if _, ok := last.(*TemplateEnd); ok { + node.Content = acc[:len(acc)-1] + break + } + } + p.debug.Printf("read template with %s", node) return node, nil } @@ -220,7 +303,7 @@ var voidElems = []string{ type Attribute struct { Name string - Value string + Value []any // Boolean is true if attr is a "boolean attribute"; it has no string value // in the source HTML, so it just represents true-if-present. Boolean is // *not* the value of the boolean attribute. @@ -231,77 +314,82 @@ func (attr Attribute) String() string { if attr.Boolean { return attr.Name } else { - return fmt.Sprintf(`%s="%s"`, attr.Name, attr.Value) + valStrs := make([]string, len(attr.Value)) + for i, val := range attr.Value { + valStrs[i] = fmt.Sprint(val) + } + return fmt.Sprintf(`%s="%s"`, attr.Name, strings.Join(valStrs, " ")) + } +} + +func (p *parser) attrContent(until ...TokenType) ([]any, error) { + p.debug.Println("reading tag attributes") + attrs := make([]any, 0) + attr := Attribute{} + for tok := range p.tokens.until(until...) { + switch tok.Type { + case TEXT: + attr.Name = tok.Literal + case TAG_EQ: + p.tokens.discardUntil(TAG_QUOTE) + values, err := p.anyContent() + if err != nil { + return nil, err + } + attr.Value = append(attr.Value, values...) + attrs = append(attrs, attr) + attr = Attribute{} + case TEMPLATE_OPEN: + tmpl, err := p.templateNode(p.attrContent) + if err != nil { + return nil, err + } + attrs = append(attrs, tmpl) + } } + return attrs, nil } type ElementNode struct { Name string - Attributes []Attribute + Attributes []any Children []any void bool } func (node *ElementNode) String() string { - attrsRaw := make([]string, len(node.Attributes)) - for i, attr := range node.Attributes { - attrsRaw[i] = attr.String() - } - attrs := "" - if len(attrsRaw) > 0 { - attrs = strings.Join(attrsRaw, " ") + attrs := fmt.Sprint(node.Attributes...) + if attrs != "" { + attrs = " " + attrs } if node.void { - return fmt.Sprintf("<%s %s />", node.Name, attrs) + return fmt.Sprintf("<%s%s />", node.Name, attrs) } else { inner := "" for _, child := range node.Children { inner += fmt.Sprint(child) } - return fmt.Sprintf("<%s %s >%s", node.Name, attrs, inner, node.Name) + return fmt.Sprintf("<%s%s>%s", node.Name, attrs, inner, node.Name) } } -func readElement(toks tokens) (*ElementNode, error) { +func (p *parser) elementNode() (*ElementNode, error) { + p.debug.Println("reading element") node := new(ElementNode) - node.Name = toks.nextOf(TEXT).Literal + node.Name = p.tokens.nextOf(TEXT).Literal node.void = slices.Contains(voidElems, node.Name) - tagToks := toks.until(TAG_CLOSE, TAG_VOID_CLOSE) - next := tagToks.nextOf(TEXT) - for next != nil { - name := next.Literal - next = tagToks.nextOf(TEXT, TAG_EQ) - // If it's text, this is a boolean attribute. Otherwise, it has a value. - if next == nil || next.Type == TEXT { - node.Attributes = append(node.Attributes, Attribute{ - Name: name, - Boolean: true, - }) - } else { - value := "" - // Advance to the start of the value... - tagToks.discardUntil(TAG_QUOTE) - // Then read until the end. - for tok := range tagToks.until(TAG_QUOTE) { - switch tok.Type { - case TEXT: - value += tok.Literal - } - } - node.Attributes = append(node.Attributes, Attribute{ - Name: name, - Value: value, - }) - next = tagToks.nextOf(TEXT) - } + attrs, err := p.attrContent(TAG_CLOSE, TAG_VOID_CLOSE) + if err != nil { + return nil, err } + node.Attributes = attrs if node.void { return node, nil } - children, err := readContent(toks.until(TAG_END_OPEN)) + children, err := p.anyContent() if err != nil { return nil, err } @@ -310,7 +398,6 @@ func readElement(toks tokens) (*ElementNode, error) { } func Parse(r io.Reader) (any, error) { - doc := new(Document) - err := doc.read(Tokenize(r)) - return doc, err + parser := newParser(r, nil) + return parser.docNode() } diff --git a/web/htmlt/parse/parse_test.go b/web/htmlt/parse/parse_test.go index b9eae1e..44c68fa 100644 --- a/web/htmlt/parse/parse_test.go +++ b/web/htmlt/parse/parse_test.go @@ -1,27 +1,62 @@ package parse import ( - "fmt" - "slices" + "log" + "os" "strings" "testing" ) +// basicParseTest is a parser test. +// if expected is given, the test result for input, stringified, is compared +// to it; otherwise, input is expected to pass a roundtrip. +type basicParseTest struct { + name string + input string + + expected string +} + func TestParseBasic(t *testing.T) { - testStrings := map[string]string{ - "hello": "Hello, World!", - "template": "Hello, {{ `template` }}!", - "html": "

Hello, HTML!


", - "html+template": "{{ if .condition }}

{{- .text -}}

{{ end }}", + tests := []basicParseTest{ + {name: "expression", input: "Hello, {{ `template` }}!"}, + {name: "if", input: "{{ if .condition }}hello{{ end }}"}, + {name: "if-nested", input: "{{ if .message }}{{ .message }}{{ end }}"}, + {name: "if-else", input: "{{ if .condition }}foo{{ else }}bar{{ end }}"}, + {name: "if-else-nested", input: "{{ if .m1 }}{{ .m1 }}{{ else }}{{ .m2 }}{{ end }}"}, + {name: "if-elif", input: "{{ if .c1 }}foo{{ else if .c2 }}bar{{ end }}"}, + {name: "if-elif-nested", input: "{{ if .m1 }}{{ .m1 }}{{ else if .m2 }}{{ .m2 }}{{ end }}"}, + {name: "if-elif-else", input: "{{ if .c1 }}foo{{ else if .c2 }}bar{{ else }}baz{{ end }}"}, + {name: "if-elif-else-nested", input: "{{ if .m1 }}{{ .m1 }}{{ else if m2 }}{{ .m2 }}{{ else }}{{ .m3 }}{{ end }}"}, + {name: "with", input: "{{ with .message }}hello{{ end }}"}, + {name: "with-nested", input: "{{ with .message }}{{ . }}{{ end }}"}, } - for name, val := range testStrings { - t.Run(name, func(t *testing.T) { - doc, err := Parse(strings.NewReader(val)) - t.Log(val) + /* + testStrings := map[string]string{ + //"if": "{{ if .condition }}hello{{ end }}", + //"if-else": "{{ if .condition }}hello{{ else }}{{ .else }}{{ end }}", + //"if-elif": "{{ if .condition }}hello{{ else if .other }}world{{ end }}", + //"if-elif-else": "{{ if .condition }}hello{{ else if .other }}foo{{ else }}bar{{ end }}", + //"with": "{{ with .value }}abc{{ . }}{{ end }}", + //"html": "

Hello, HTML!


", + //"html+template": "{{ if .condition }}

{{- .text -}}

{{ end }}", + } + */ + for _, test := range tests { + debug := log.New(os.Stderr, "", log.Lshortfile) + t.Run(test.name, func(t *testing.T) { + parser := newParser(strings.NewReader(test.input), debug) + doc, err := parser.docNode() + t.Log(test.input) if err != nil { t.Fatal(err) } t.Log(doc) + if test.expected != "" && doc.String() != test.expected { + t.Fatal("result document doesn't match") + } else if doc.String() != test.input { + t.Fatal("result document didn't pass roundtrip") + } }) } } @@ -42,33 +77,15 @@ func TestParseHTML(t *testing.T) { }) } } -func TestParseTemplate(t *testing.T) { - testStrings := map[string]string{ - "if": "{{ if .condition }}Hello{{ end }}", - "if-else": "{{ if .condition }}Hello{{ else }}World{{ end }}", - "if-elif": "{{ if .condition }}Hello{{ else if .other.condition }}World{{ end }}", - "if-elif-else": "{{ if .condition }}One{{ else if .other.condition }}Two{{ else }}Three{{ end }}", - } - for name, val := range testStrings { - t.Run(name, func(t *testing.T) { - doc, err := Parse(strings.NewReader(val)) - t.Log(val) - if err != nil { - t.Fatal(err) - } - t.Log(doc) - }) - } -} func TestParseComplex(t *testing.T) { testStrings := map[string]string{ - "template-attr": ``, + "attr-template-value": ``, + "attr-if-template": `
`, + "attr-if-else-template": `
`, } for name, val := range testStrings { t.Run(name, func(t *testing.T) { - toks := slices.Collect(Tokenize(strings.NewReader(val)).seq()) - fmt.Println(toks) doc, err := Parse(strings.NewReader(val)) t.Log(val) if err != nil { diff --git a/web/htmlt/parse/tokens.go b/web/htmlt/parse/tokens.go index 791f696..14d7e90 100644 --- a/web/htmlt/parse/tokens.go +++ b/web/htmlt/parse/tokens.go @@ -5,25 +5,31 @@ import ( "errors" "fmt" "io" + "iter" + "log" + "slices" "strings" ) type TokenState int const ( - STATE_CONTENT = TokenState(iota) - STATE_TEMPLATE - STATE_TAG + TSTATE_CONTENT = TokenState(iota) + TSTATE_TEMPLATE + TSTATE_TAG + TSTATE_ATTRS ) func (ts TokenState) String() string { switch ts { - case STATE_CONTENT: + case TSTATE_CONTENT: return "STATE_CONTENT" - case STATE_TEMPLATE: + case TSTATE_TEMPLATE: return "STATE_TEMPLATE" - case STATE_TAG: + case TSTATE_TAG: return "STATE_TAG" + case TSTATE_ATTRS: + return "STATE_ATTRS" default: return "STATE_UNKNOWN" } @@ -92,10 +98,23 @@ func (t *Token) String() string { } type tokenizer struct { - r *bufio.Reader - state TokenState - nextToken *Token - err error + r *bufio.Reader + state TokenState + prevStates []TokenState + nextToken *Token + err error + + debug *log.Logger +} + +func newTokenizer(r io.Reader, debug *log.Logger) *tokenizer { + if debug == nil { + debug = log.New(io.Discard, "", 0) + } + return &tokenizer{ + r: bufio.NewReader(r), + debug: debug, + } } // peek returns the next i characters, and "true" if i characters were found. @@ -111,6 +130,20 @@ func (t *tokenizer) advance(i int) { t.r.Discard(i) } +func (t *tokenizer) setState(ts TokenState) { + t.debug.Printf("pushing tokenizer state %s", ts) + t.prevStates = append(t.prevStates, t.state) + t.state = ts +} + +func (t *tokenizer) revertState() { + if len(t.prevStates) > 0 { + t.state = t.prevStates[len(t.prevStates)-1] + t.prevStates = t.prevStates[:len(t.prevStates)-1] + t.debug.Printf("popping tokenizer state %s", t.state) + } +} + func (t *tokenizer) nextContent() (*Token, error) { acc := make([]rune, 100) cursor := 0 @@ -126,7 +159,7 @@ func (t *tokenizer) nextContent() (*Token, error) { switch token { case "{{-": out := &Token{TEXT, string(acc[:cursor])} - t.state = STATE_TEMPLATE + t.setState(TSTATE_TEMPLATE) return out, nil } } @@ -134,11 +167,11 @@ func (t *tokenizer) nextContent() (*Token, error) { switch token { case "{{": out := &Token{TEXT, string(acc[:cursor])} - t.state = STATE_TEMPLATE + t.setState(TSTATE_TEMPLATE) return out, nil case " 0 { + t.setState(TSTATE_ATTRS) + } + return &Token{TEXT, string(acc[:cursor])}, nil + } else { + accumulate(r) + } + } +} + +func (t *tokenizer) nextAttrs() (*Token, error) { + acc := make([]rune, 10) + cursor := 0 + accumulate := func(r rune) { + if cursor >= len(acc) { + acc = append(acc, make([]rune, 10)...) + } + acc[cursor] = r + cursor++ + } + for { + if token, ok := t.peek(2); ok { + switch token { + case "{{": + out := &Token{TEXT, string(acc[:cursor])} + t.setState(TSTATE_TEMPLATE) + return out, nil + case "/>": + out := &Token{TEXT, string(acc[:cursor])} + t.setState(TSTATE_TAG) + return out, nil + } + } + if token, ok := t.peek(1); ok { + switch token { + case "<": + out := &Token{TAG_OPEN, token} + t.advance(1) + return out, nil + case "=": + out := &Token{TEXT, string(acc[:cursor])} + t.nextToken = &Token{TAG_EQ, token} + t.advance(1) + return out, nil + case `"`: + out := &Token{TEXT, string(acc[:cursor])} + t.nextToken = &Token{TAG_QUOTE, token} + t.advance(1) + return out, nil + case ">": + out := &Token{TEXT, string(acc[:cursor])} + t.setState(TSTATE_TAG) return out, nil } } @@ -304,7 +379,7 @@ func (t *tokenizer) nextTag() (*Token, error) { } } -func (t *tokenizer) next2() (*Token, error) { +func (t *tokenizer) next() (*Token, error) { var next *Token var err error for next == nil && err == nil { @@ -314,12 +389,14 @@ func (t *tokenizer) next2() (*Token, error) { return next, nil } switch t.state { - case STATE_CONTENT: + case TSTATE_CONTENT: next, err = t.nextContent() - case STATE_TEMPLATE: + case TSTATE_TEMPLATE: next, err = t.nextTemplate() - case STATE_TAG: + case TSTATE_TAG: next, err = t.nextTag() + case TSTATE_ATTRS: + next, err = t.nextAttrs() default: return nil, fmt.Errorf("unknown state %s", t.state) } @@ -327,9 +404,12 @@ func (t *tokenizer) next2() (*Token, error) { next = nil } } + t.debug.Printf("got %s", next) return next, err } +type tokens iter.Seq[*Token] + // all returns an iterator over all tokens produced by the tokenizer. // Stops iterating on EOF or error. func (t *tokenizer) all() tokens { @@ -338,7 +418,7 @@ func (t *tokenizer) all() tokens { if t.err != nil { return } - tok, err := t.next2() + tok, err := t.next() if err != nil { yield(&Token{ERROR, t.err.Error()}) break @@ -353,8 +433,34 @@ func (t *tokenizer) all() tokens { } } -func Tokenize(r io.Reader) tokens { - tkns := new(tokenizer) - tkns.r = bufio.NewReader(r) - return tkns.all() +func (toks tokens) nextOf(tts ...TokenType) *Token { + var out *Token + for tok := range toks.until(tts...) { + out = tok + } + if out != nil && slices.Contains(tts, out.Type) { + return out + } else { + return nil + } +} + +// untilInclusive gets the next N tokens in p, until one is in types, or is EOF. +func (toks tokens) until(tts ...TokenType) tokens { + return func(yield func(*Token) bool) { + for tok := range toks { + if !yield(tok) || slices.Contains(tts, tok.Type) || tok.Type == EOF { + return + } + } + } +} + +func (toks tokens) discardUntil(tts ...TokenType) { + for range toks.until(tts...) { + } +} + +func (toks tokens) seq() iter.Seq[*Token] { + return iter.Seq[*Token](toks) } diff --git a/web/htmlt/parse/tokens_test.go b/web/htmlt/parse/tokens_test.go index f29e6ee..b943f93 100644 --- a/web/htmlt/parse/tokens_test.go +++ b/web/htmlt/parse/tokens_test.go @@ -1,6 +1,8 @@ package parse import ( + "log" + "os" "slices" "strings" "testing" @@ -14,8 +16,10 @@ func TestTokenize(t *testing.T) { "html+template": "{{ if .condition }}

{{- .text -}}

{{ end }}", } for name, val := range testStrings { + debug := log.New(os.Stderr, "", log.Lshortfile) t.Run(name, func(t *testing.T) { - toks := slices.Collect(Tokenize(strings.NewReader(val)).seq()) + tokenizer := newTokenizer(strings.NewReader(val), debug) + toks := slices.Collect(tokenizer.all().seq()) t.Log(val) t.Log(toks) }) -- 2.39.5