--- /dev/null
+# What's the plan, anyways
+
+I want to make templates and custom elements process concurrently. That means
+that `template.Execute` and the data loaders execute simultaneously, waiting on
+each other at critical points to yield a value or set a variable. This is going
+to require a significant refactor of everything, but frankly I think that is
+overdue outside of any feature goals, so that's just fine. I'm going to lay out
+a plan of attack for getting this feature live.
+
+## Write a parser
+
+`x/net/html` is a really good library! It's also not what this project needs.
+Go HTML is focused on being a spec-compliant parser and renderer, and templates
+are not compliant with the HTML specification in some circumstances. A parser
+that recognizes templates as separate from regular text would also be really
+handy!
+
+The first iteration of this parser is focused on restoring functionality to
+where it is now, not adding anything new. To this end, it'll mostly leave the
+semantics of templates alone, outside of recognizing things like template blocks
+as having children.
--- /dev/null
+# Templates
+
+Templates are a mix of HTML and Go's standard templates. This document describes
+how template documents are tokenized and parsed before being processed.
+
+## Tokenization
+
+Documents are processed first by a tokenizer that splits it up into sequential
+tokens for processing. While this tokenizer isn't super concerned with syntax or
+semantics, and shouldn't throw errors for either on its own, it does have states
+that govern how tokens are processed. The main states are *content*, *template*,
+and *tag*, though each may have sub-states depending on what's being processed.
+
+### Content
+
+Content is a mix of plain text, templates, and tags. It's the simplest to
+tokenize, usually consisting of gathering text up until a template or tag starts
+and then outputting that as a TEXT token. TEXT tokens are literal text and
+whitespace making up the content body of the document.
+
+### Templates
+
+Templates are Go template expressions. Sometimes, these expressions have
+keywords like `if` or `range` or assignment characters `:=`. At the time of
+writing, pipelines are not processed into tokens, just left as plain text.
+
+### Tags
+
+Tags are HTML tags. These tags have a name and attributes, consisting of value
+attributes `name="value"` and boolean attributes `name`. Because data attributes
+are written `:name`, colons may be included in attribute names.
module git.earlybird.gay/today
-go 1.22.4
+go 1.23.4
require (
golang.org/x/text v0.19.0
--- /dev/null
+package parse
+
+import (
+ "fmt"
+ "io"
+ "iter"
+ "slices"
+ "strings"
+)
+
+type tokens iter.Seq[*Token]
+
+func (toks tokens) nextOf(tts ...TokenType) *Token {
+ var out *Token
+ for tok := range toks.until(tts...) {
+ out = tok
+ }
+ if slices.Contains(tts, out.Type) {
+ return out
+ } else {
+ return nil
+ }
+}
+
+// untilInclusive gets the next N tokens in p, until one is in types, or is EOF.
+func (toks tokens) until(tts ...TokenType) tokens {
+ return func(yield func(*Token) bool) {
+ for tok := range toks {
+ if !yield(tok) || slices.Contains(tts, tok.Type) || tok.Type == EOF {
+ return
+ }
+ }
+ }
+}
+
+func (toks tokens) discardUntil(tts ...TokenType) {
+ for range toks.until(tts...) {
+ }
+}
+
+func (toks tokens) seq() iter.Seq[*Token] {
+ return iter.Seq[*Token](toks)
+}
+
+// readContent returns a list of "content tokens", wherever they're acceptable.
+func readContent(toks tokens) ([]any, error) {
+ out := make([]any, 0)
+ var text *TextNode
+ resolveText := func() {
+ if text != nil {
+ out = append(out, text)
+ }
+ text = nil
+ }
+ for tok := range toks {
+ switch tok.Type {
+ case TEXT, WHITESPACE:
+ if text == nil {
+ text = new(TextNode)
+ }
+ text.Value += tok.Literal
+ case TEMPLATE_OPEN:
+ resolveText()
+ node, err := readTemplate(toks)
+ if err != nil {
+ return nil, err
+ }
+ out = append(out, node)
+ case TAG_OPEN:
+ resolveText()
+ node, err := readElement(toks)
+ if err != nil {
+ return nil, err
+ }
+ out = append(out, node)
+ case TAG_END_OPEN:
+ toks.discardUntil(TAG_CLOSE)
+ default:
+ }
+ }
+ resolveText()
+ return out, nil
+}
+
+type Document struct {
+ Children []any
+}
+
+func (d *Document) String() string {
+ out := ""
+ for _, child := range d.Children {
+ out += fmt.Sprint(child)
+ }
+ return out
+}
+
+func (d *Document) read(toks tokens) (err error) {
+ d.Children, err = readContent(toks)
+ return err
+}
+
+type TextNode struct {
+ Value string
+}
+
+func (node *TextNode) String() string {
+ return node.Value
+}
+
+type TemplateNode struct {
+ Value string
+}
+
+func (node *TemplateNode) String() string {
+ return fmt.Sprintf("{{ %s }}", node.Value)
+}
+
+// read reads tokens into the TemplateNode.
+func readTemplate(toks tokens) (any, error) {
+ node := new(TemplateNode)
+ for tok := range toks.until(TEMPLATE_CLOSE) {
+ switch tok.Type {
+ case TEMPLATE_KEYWORD:
+ switch tok.Literal {
+ case "if":
+ return readTemplateIf(toks)
+ case "else":
+ next := toks.nextOf(TEMPLATE_KEYWORD, TEMPLATE_CLOSE)
+ if next.Literal == "if" {
+ return readTemplateIf(toks)
+ } else if next.Type == TEXT {
+ toks.discardUntil(TEMPLATE_CLOSE)
+ }
+ return &TemplateElse{}, nil
+ case "end":
+ toks.discardUntil(TEMPLATE_CLOSE)
+ return &TemplateEnd{}, nil
+ default:
+ return nil, fmt.Errorf("unrecognized template keyword %s", tok.Literal)
+ }
+ case TEXT:
+ node.Value += " " + tok.Literal
+ }
+ }
+ node.Value = strings.TrimSpace(node.Value)
+ return node, nil
+}
+
+type TemplateElse struct{}
+type TemplateEnd struct{}
+
+type TemplateIf struct {
+ Condition string
+ Then []any
+ Elif []*TemplateIf
+ Else []any
+}
+
+func (node *TemplateIf) String() string {
+ elifStr := ""
+ for _, elif := range node.Elif {
+ elifStr += fmt.Sprintf("{{ else if %s }}%s", elif.Condition, JoinAny(elif.Then, ""))
+ }
+ elseStr := ""
+ if len(node.Else) > 0 {
+ elseStr = fmt.Sprintf("{{ else }}%s", JoinAny(node.Else, ""))
+ }
+ return fmt.Sprintf("{{ if %s }}%s%s%s{{ end }}", node.Condition, JoinAny(node.Then, ""), elifStr, elseStr)
+}
+
+func readTemplateIf(toks tokens) (*TemplateIf, error) {
+ node := new(TemplateIf)
+ for tok := range toks.until(TEMPLATE_CLOSE) {
+ switch tok.Type {
+ case TEXT, WHITESPACE:
+ node.Condition += tok.Literal
+ }
+ }
+ node.Condition = strings.TrimSpace(node.Condition)
+ isElse := false
+ for {
+ then, err := readContent(toks.until(TEMPLATE_OPEN))
+ if err != nil {
+ return nil, err
+ }
+ if len(then) == 0 {
+ break
+ }
+ term := then[len(then)-1]
+ body := then[:len(then)-1]
+ switch v := term.(type) {
+ case *TemplateIf:
+ node.Then = append(node.Then, body...)
+ // Elifs steal the else. Take it back.
+ node.Else = append(node.Else, v.Else...)
+ v.Else = nil
+ node.Elif = append(node.Elif, v)
+ case *TemplateElse:
+ node.Then = append(node.Then, body...)
+ isElse = true
+ case *TemplateEnd:
+ if !isElse {
+ node.Then = body
+ } else {
+ node.Else = body
+ isElse = false
+ }
+ default:
+ }
+ }
+ return node, nil
+}
+
+var voidElems = []string{
+ "area", "base", "br", "col",
+ "embed", "hr", "img", "input",
+ "link", "meta", "param", "source",
+ "track", "wbr",
+}
+
+type Attribute struct {
+ Name string
+ Value string
+ // Boolean is true if attr is a "boolean attribute"; it has no string value
+ // in the source HTML, so it just represents true-if-present. Boolean is
+ // *not* the value of the boolean attribute.
+ Boolean bool
+}
+
+func (attr Attribute) String() string {
+ if attr.Boolean {
+ return attr.Name
+ } else {
+ return fmt.Sprintf(`%s="%s"`, attr.Name, attr.Value)
+ }
+}
+
+type ElementNode struct {
+ Name string
+ Attributes []Attribute
+ Children []any
+
+ void bool
+}
+
+func (node *ElementNode) String() string {
+ attrsRaw := make([]string, len(node.Attributes))
+ for i, attr := range node.Attributes {
+ attrsRaw[i] = attr.String()
+ }
+ attrs := ""
+ if len(attrsRaw) > 0 {
+ attrs = strings.Join(attrsRaw, " ")
+ }
+ if node.void {
+ return fmt.Sprintf("<%s %s />", node.Name, attrs)
+ } else {
+ inner := ""
+ for _, child := range node.Children {
+ inner += fmt.Sprint(child)
+ }
+ return fmt.Sprintf("<%s %s >%s</%s>", node.Name, attrs, inner, node.Name)
+ }
+}
+
+func readElement(toks tokens) (*ElementNode, error) {
+ node := new(ElementNode)
+ node.Name = toks.nextOf(TEXT).Literal
+ node.void = slices.Contains(voidElems, node.Name)
+
+ tagToks := toks.until(TAG_CLOSE, TAG_VOID_CLOSE)
+ next := tagToks.nextOf(TEXT)
+ for next != nil {
+ name := next.Literal
+ next = tagToks.nextOf(TEXT, TAG_EQ)
+ // If it's text, this is a boolean attribute. Otherwise, it has a value.
+ if next == nil || next.Type == TEXT {
+ node.Attributes = append(node.Attributes, Attribute{
+ Name: name,
+ Boolean: true,
+ })
+ } else {
+ value := ""
+ // Advance to the start of the value...
+ tagToks.discardUntil(TAG_QUOTE)
+ // Then read until the end.
+ for tok := range tagToks.until(TAG_QUOTE) {
+ switch tok.Type {
+ case TEXT:
+ value += tok.Literal
+ }
+ }
+ node.Attributes = append(node.Attributes, Attribute{
+ Name: name,
+ Value: value,
+ })
+ next = tagToks.nextOf(TEXT)
+ }
+ }
+
+ if node.void {
+ return node, nil
+ }
+ children, err := readContent(toks.until(TAG_END_OPEN))
+ if err != nil {
+ return nil, err
+ }
+ node.Children = children
+ return node, nil
+}
+
+func Parse(r io.Reader) (any, error) {
+ doc := new(Document)
+ err := doc.read(Tokenize(r))
+ return doc, err
+}
--- /dev/null
+package parse
+
+import (
+ "fmt"
+ "slices"
+ "strings"
+ "testing"
+)
+
+func TestParseBasic(t *testing.T) {
+ testStrings := map[string]string{
+ "hello": "Hello, World!",
+ "template": "Hello, {{ `template` }}!",
+ "html": "<div><p>Hello, HTML!</p><br></div>",
+ "html+template": "{{ if .condition }}<p>{{- .text -}}</p>{{ end }}",
+ }
+ for name, val := range testStrings {
+ t.Run(name, func(t *testing.T) {
+ doc, err := Parse(strings.NewReader(val))
+ t.Log(val)
+ if err != nil {
+ t.Fatal(err)
+ }
+ t.Log(doc)
+ })
+ }
+}
+
+func TestParseHTML(t *testing.T) {
+ testStrings := map[string]string{
+ "void": "<p>Hello</p><br><p>World</p>",
+ "attrs": `<div class = "outer"><div my-boolean class="inner"></div></div>`,
+ }
+ for name, val := range testStrings {
+ t.Run(name, func(t *testing.T) {
+ doc, err := Parse(strings.NewReader(val))
+ t.Log(val)
+ if err != nil {
+ t.Fatal(err)
+ }
+ t.Log(doc)
+ })
+ }
+}
+func TestParseTemplate(t *testing.T) {
+ testStrings := map[string]string{
+ "if": "{{ if .condition }}Hello{{ end }}",
+ "if-else": "{{ if .condition }}Hello{{ else }}World{{ end }}",
+ "if-elif": "{{ if .condition }}Hello{{ else if .other.condition }}World{{ end }}",
+ "if-elif-else": "{{ if .condition }}One{{ else if .other.condition }}Two{{ else }}Three{{ end }}",
+ }
+ for name, val := range testStrings {
+ t.Run(name, func(t *testing.T) {
+ doc, err := Parse(strings.NewReader(val))
+ t.Log(val)
+ if err != nil {
+ t.Fatal(err)
+ }
+ t.Log(doc)
+ })
+ }
+}
+
+func TestParseComplex(t *testing.T) {
+ testStrings := map[string]string{
+ "template-attr": `<img src="{{ .img }}">`,
+ }
+ for name, val := range testStrings {
+ t.Run(name, func(t *testing.T) {
+ toks := slices.Collect(Tokenize(strings.NewReader(val)).seq())
+ fmt.Println(toks)
+ doc, err := Parse(strings.NewReader(val))
+ t.Log(val)
+ if err != nil {
+ t.Fatal(err)
+ }
+ t.Log(doc)
+ })
+ }
+}
--- /dev/null
+package parse
+
+import (
+ "fmt"
+ "regexp"
+ "strings"
+)
+
+var (
+ HtmlNameRegexp = regexp.MustCompile("^[a-zA-Z][a-zA-Z0-9]*(?:-[a-zA-Z][a-zA-Z0-9]*)*$")
+)
+
+func IsHtmlName(text string) bool {
+ return HtmlNameRegexp.MatchString(text)
+}
+
+func CutQuotes(text string) (string, bool) {
+ preQuote, preOk := strings.CutPrefix(text, `"`)
+ postQuote, postOk := strings.CutSuffix(preQuote, `"`)
+ return postQuote, preOk && postOk
+}
+
+func JoinAny(vals []any, sep string) string {
+ valsStr := make([]string, len(vals))
+ for i, val := range vals {
+ valsStr[i] = fmt.Sprintf("%s", val)
+ }
+ return strings.Join(valsStr, sep)
+}
--- /dev/null
+package parse
+
+import (
+ "bufio"
+ "errors"
+ "fmt"
+ "io"
+ "strings"
+)
+
+type TokenState int
+
+const (
+ STATE_CONTENT = TokenState(iota)
+ STATE_TEMPLATE
+ STATE_TAG
+)
+
+func (ts TokenState) String() string {
+ switch ts {
+ case STATE_CONTENT:
+ return "STATE_CONTENT"
+ case STATE_TEMPLATE:
+ return "STATE_TEMPLATE"
+ case STATE_TAG:
+ return "STATE_TAG"
+ default:
+ return "STATE_UNKNOWN"
+ }
+}
+
+type TokenType int
+
+const (
+ ERROR = TokenType(iota)
+ EOF
+ TEXT
+ WHITESPACE
+ TEMPLATE_OPEN // {{
+ TEMPLATE_CLOSE // }}
+ TEMPLATE_KEYWORD // range, with, else, if, end
+ TAG_OPEN // <
+ TAG_END_OPEN // </
+ TAG_CLOSE // >
+ TAG_VOID_CLOSE // />
+ TAG_EQ // =
+ TAG_QUOTE // "
+)
+
+func (tt TokenType) String() string {
+ switch tt {
+ case ERROR:
+ return "ERROR"
+ case EOF:
+ return "EOF"
+ case TEXT:
+ return "TEXT"
+ case WHITESPACE:
+ return "S"
+ case TEMPLATE_OPEN:
+ return "TEMPLATE_OPEN"
+ case TEMPLATE_CLOSE:
+ return "TEMPLATE_CLOSE"
+ case TEMPLATE_KEYWORD:
+ return "TEMPLATE_KEYWORD"
+ case TAG_OPEN:
+ return "TAG_OPEN"
+ case TAG_END_OPEN:
+ return "TAG_END_OPEN"
+ case TAG_CLOSE:
+ return "TAG_CLOSE"
+ case TAG_EQ:
+ return "TAG_EQ"
+ case TAG_QUOTE:
+ return "TAG_QUOTE"
+ default:
+ return "UNKNOWN"
+ }
+}
+
+const (
+ whitespace = " \n\t\r\f\b"
+)
+
+type Token struct {
+ Type TokenType
+ Literal string
+}
+
+func (t *Token) String() string {
+ return fmt.Sprintf("%s(%s)", t.Type, t.Literal)
+}
+
+type tokenizer struct {
+ r *bufio.Reader
+ state TokenState
+ nextToken *Token
+ err error
+}
+
+// peek returns the next i characters, and "true" if i characters were found.
+func (t *tokenizer) peek(i int) (string, bool) {
+ x, err := t.r.Peek(i)
+ if err != nil {
+ return string(x), false
+ }
+ return string(x), true
+}
+
+func (t *tokenizer) advance(i int) {
+ t.r.Discard(i)
+}
+
+func (t *tokenizer) nextContent() (*Token, error) {
+ acc := make([]rune, 100)
+ cursor := 0
+ accumulate := func(r rune) {
+ if cursor >= len(acc) {
+ acc = append(acc, make([]rune, 100)...)
+ }
+ acc[cursor] = r
+ cursor++
+ }
+ for {
+ if token, ok := t.peek(3); ok {
+ switch token {
+ case "{{-":
+ out := &Token{TEXT, string(acc[:cursor])}
+ t.state = STATE_TEMPLATE
+ return out, nil
+ }
+ }
+ if token, ok := t.peek(2); ok {
+ switch token {
+ case "{{":
+ out := &Token{TEXT, string(acc[:cursor])}
+ t.state = STATE_TEMPLATE
+ return out, nil
+ case "</":
+ out := &Token{TEXT, string(acc[:cursor])}
+ t.state = STATE_TAG
+ return out, nil
+ }
+ }
+ if token, ok := t.peek(1); ok {
+ switch token {
+ case "<":
+ out := &Token{TEXT, string(acc[:cursor])}
+ t.state = STATE_TAG
+ return out, nil
+ }
+ }
+ r, _, err := t.r.ReadRune()
+ if errors.Is(err, io.EOF) {
+ if cursor > 0 {
+ return &Token{TEXT, string(acc[:cursor])}, nil
+ } else {
+ return &Token{EOF, ""}, nil
+ }
+ } else if err != nil {
+ return nil, err
+ }
+ accumulate(r)
+ }
+}
+func (t *tokenizer) nextTemplate() (*Token, error) {
+ acc := make([]rune, 10)
+ cursor := 0
+ accumulate := func(r rune) {
+ if cursor >= len(acc) {
+ acc = append(acc, make([]rune, 10)...)
+ }
+ acc[cursor] = r
+ cursor++
+ }
+ for {
+ if token, ok := t.peek(5); ok {
+ switch token {
+ case "range":
+ out := &Token{TEMPLATE_KEYWORD, token}
+ t.advance(5)
+ return out, nil
+ }
+ }
+ if token, ok := t.peek(4); ok {
+ switch token {
+ case "with":
+ out := &Token{TEMPLATE_KEYWORD, token}
+ t.advance(4)
+ return out, nil
+ case "else":
+ out := &Token{TEMPLATE_KEYWORD, token}
+ t.advance(4)
+ return out, nil
+ }
+ }
+ if token, ok := t.peek(3); ok {
+ switch token {
+ case "end":
+ out := &Token{TEMPLATE_KEYWORD, token}
+ t.advance(3)
+ return out, nil
+ case "{{-":
+ out := &Token{TEMPLATE_OPEN, token}
+ t.advance(3)
+ return out, nil
+ case "-}}":
+ out := &Token{TEMPLATE_CLOSE, token}
+ t.advance(3)
+ t.state = STATE_CONTENT
+ return out, nil
+ }
+ }
+ if token, ok := t.peek(2); ok {
+ switch token {
+ case "if":
+ out := &Token{TEMPLATE_KEYWORD, token}
+ t.advance(2)
+ return out, nil
+ case "{{":
+ out := &Token{TEMPLATE_OPEN, token}
+ t.advance(2)
+ return out, nil
+ case "}}":
+ out := &Token{TEMPLATE_CLOSE, token}
+ t.advance(2)
+ t.state = STATE_CONTENT
+ return out, nil
+ }
+ }
+ r, _, err := t.r.ReadRune()
+ if errors.Is(err, io.EOF) {
+ return &Token{EOF, ""}, nil
+ } else if err != nil {
+ return nil, err
+ }
+ if strings.ContainsRune(whitespace, r) {
+ return &Token{TEXT, string(acc[:cursor])}, nil
+ } else {
+ accumulate(r)
+ }
+ }
+}
+func (t *tokenizer) nextTag() (*Token, error) {
+ acc := make([]rune, 10)
+ cursor := 0
+ accumulate := func(r rune) {
+ if cursor >= len(acc) {
+ acc = append(acc, make([]rune, 10)...)
+ }
+ acc[cursor] = r
+ cursor++
+ }
+ for {
+ if token, ok := t.peek(2); ok {
+ switch token {
+ case "</":
+ out := &Token{TAG_END_OPEN, token}
+ t.advance(2)
+ return out, nil
+ case "/>":
+ out := &Token{TEXT, string(acc[:cursor])}
+ t.nextToken = &Token{TAG_VOID_CLOSE, token}
+ t.advance(2)
+ t.state = STATE_CONTENT
+ return out, nil
+ }
+ }
+ if token, ok := t.peek(1); ok {
+ switch token {
+ case "<":
+ out := &Token{TAG_OPEN, token}
+ t.advance(1)
+ return out, nil
+ case "=":
+ out := &Token{TEXT, string(acc[:cursor])}
+ t.nextToken = &Token{TAG_EQ, token}
+ t.advance(1)
+ return out, nil
+ case `"`:
+ out := &Token{TEXT, string(acc[:cursor])}
+ t.nextToken = &Token{TAG_QUOTE, token}
+ t.advance(1)
+ return out, nil
+ case ">":
+ out := &Token{TEXT, string(acc[:cursor])}
+ t.nextToken = &Token{TAG_CLOSE, token}
+ t.advance(1)
+ t.state = STATE_CONTENT
+ return out, nil
+ }
+ }
+ r, _, err := t.r.ReadRune()
+ if errors.Is(err, io.EOF) {
+ return &Token{EOF, ""}, nil
+ } else if err != nil {
+ return nil, err
+ }
+ if strings.ContainsRune(whitespace, r) {
+ return &Token{TEXT, string(acc[:cursor])}, nil
+ } else {
+ accumulate(r)
+ }
+ }
+}
+
+func (t *tokenizer) next2() (*Token, error) {
+ var next *Token
+ var err error
+ for next == nil && err == nil {
+ if t.nextToken != nil {
+ next := t.nextToken
+ t.nextToken = nil
+ return next, nil
+ }
+ switch t.state {
+ case STATE_CONTENT:
+ next, err = t.nextContent()
+ case STATE_TEMPLATE:
+ next, err = t.nextTemplate()
+ case STATE_TAG:
+ next, err = t.nextTag()
+ default:
+ return nil, fmt.Errorf("unknown state %s", t.state)
+ }
+ if next.Type == TEXT && next.Literal == "" {
+ next = nil
+ }
+ }
+ return next, err
+}
+
+// all returns an iterator over all tokens produced by the tokenizer.
+// Stops iterating on EOF or error.
+func (t *tokenizer) all() tokens {
+ return func(yield func(*Token) bool) {
+ for {
+ if t.err != nil {
+ return
+ }
+ tok, err := t.next2()
+ if err != nil {
+ yield(&Token{ERROR, t.err.Error()})
+ break
+ }
+ if tok.Type == EOF {
+ t.err = io.EOF
+ }
+ if !yield(tok) {
+ break
+ }
+ }
+ }
+}
+
+func Tokenize(r io.Reader) tokens {
+ tkns := new(tokenizer)
+ tkns.r = bufio.NewReader(r)
+ return tkns.all()
+}
--- /dev/null
+package parse
+
+import (
+ "slices"
+ "strings"
+ "testing"
+)
+
+func TestTokenize(t *testing.T) {
+ testStrings := map[string]string{
+ "hello": "Hello, World!",
+ "template": "Hello, {{ `template` }}!",
+ "html": "<div><p>Hello, HTML!</p><br></div>",
+ "html+template": "{{ if .condition }}<p>{{- .text -}}</p>{{ end }}",
+ }
+ for name, val := range testStrings {
+ t.Run(name, func(t *testing.T) {
+ toks := slices.Collect(Tokenize(strings.NewReader(val)).seq())
+ t.Log(val)
+ t.Log(toks)
+ })
+ }
+}