]> git.earlybird.gay Git - today/commitdiff
rethink making the worse html package public
authorearly <me@earlybird.gay>
Mon, 2 Sep 2024 18:35:02 +0000 (12:35 -0600)
committerearly <me@earlybird.gay>
Mon, 2 Sep 2024 18:35:02 +0000 (12:35 -0600)
213 files changed:
html/LICENSE [deleted file]
html/PATENTS [deleted file]
html/README.md [deleted file]
html/atom/atom.go [deleted file]
html/atom/atom_test.go [deleted file]
html/atom/gen.go [deleted file]
html/atom/table.go [deleted file]
html/atom/table_test.go [deleted file]
html/charset/charset.go [deleted file]
html/charset/charset_test.go [deleted file]
html/charset/testdata/HTTP-charset.html [deleted file]
html/charset/testdata/HTTP-vs-UTF-8-BOM.html [deleted file]
html/charset/testdata/HTTP-vs-meta-charset.html [deleted file]
html/charset/testdata/HTTP-vs-meta-content.html [deleted file]
html/charset/testdata/No-encoding-declaration.html [deleted file]
html/charset/testdata/README [deleted file]
html/charset/testdata/UTF-16BE-BOM.html [deleted file]
html/charset/testdata/UTF-16LE-BOM.html [deleted file]
html/charset/testdata/UTF-8-BOM-vs-meta-charset.html [deleted file]
html/charset/testdata/UTF-8-BOM-vs-meta-content.html [deleted file]
html/charset/testdata/meta-charset-attribute.html [deleted file]
html/charset/testdata/meta-content-attribute.html [deleted file]
html/comment_test.go [deleted file]
html/const.go [deleted file]
html/doc.go [deleted file]
html/doctype.go [deleted file]
html/entity.go [deleted file]
html/entity_test.go [deleted file]
html/escape.go [deleted file]
html/escape_test.go [deleted file]
html/example_test.go [deleted file]
html/foreign.go [deleted file]
html/node.go [deleted file]
html/node_test.go [deleted file]
html/parse.go [deleted file]
html/parse_test.go [deleted file]
html/render.go [deleted file]
html/render_test.go [deleted file]
html/testdata/go/issue_30600_parse_panics_in_cell_mode.dat [deleted file]
html/testdata/go/issue_30961_error_nested_unknown_tag_types.dat [deleted file]
html/testdata/go/raw_tags_to_be_ignored.dat [deleted file]
html/testdata/go/select.dat [deleted file]
html/testdata/go/template.dat [deleted file]
html/testdata/go1.html [deleted file]
html/testdata/webkit/README [deleted file]
html/testdata/webkit/adoption01.dat [deleted file]
html/testdata/webkit/adoption02.dat [deleted file]
html/testdata/webkit/blocks.dat [deleted file]
html/testdata/webkit/comments01.dat [deleted file]
html/testdata/webkit/doctype01.dat [deleted file]
html/testdata/webkit/domjs-unsafe.dat [deleted file]
html/testdata/webkit/entities01.dat [deleted file]
html/testdata/webkit/entities02.dat [deleted file]
html/testdata/webkit/foreign-fragment.dat [deleted file]
html/testdata/webkit/html5test-com.dat [deleted file]
html/testdata/webkit/inbody01.dat [deleted file]
html/testdata/webkit/isindex.dat [deleted file]
html/testdata/webkit/main-element.dat [deleted file]
html/testdata/webkit/math.dat [deleted file]
html/testdata/webkit/menuitem-element.dat [deleted file]
html/testdata/webkit/namespace-sensitivity.dat [deleted file]
html/testdata/webkit/noscript01.dat [deleted file]
html/testdata/webkit/pending-spec-changes-plain-text-unsafe.dat [deleted file]
html/testdata/webkit/pending-spec-changes.dat [deleted file]
html/testdata/webkit/plain-text-unsafe.dat [deleted file]
html/testdata/webkit/ruby.dat [deleted file]
html/testdata/webkit/scriptdata01.dat [deleted file]
html/testdata/webkit/scripted/adoption01.dat [deleted file]
html/testdata/webkit/scripted/ark.dat [deleted file]
html/testdata/webkit/scripted/webkit01.dat [deleted file]
html/testdata/webkit/svg.dat [deleted file]
html/testdata/webkit/tables01.dat [deleted file]
html/testdata/webkit/template.dat [deleted file]
html/testdata/webkit/tests1.dat [deleted file]
html/testdata/webkit/tests10.dat [deleted file]
html/testdata/webkit/tests11.dat [deleted file]
html/testdata/webkit/tests12.dat [deleted file]
html/testdata/webkit/tests14.dat [deleted file]
html/testdata/webkit/tests15.dat [deleted file]
html/testdata/webkit/tests16.dat [deleted file]
html/testdata/webkit/tests17.dat [deleted file]
html/testdata/webkit/tests18.dat [deleted file]
html/testdata/webkit/tests19.dat [deleted file]
html/testdata/webkit/tests2.dat [deleted file]
html/testdata/webkit/tests20.dat [deleted file]
html/testdata/webkit/tests21.dat [deleted file]
html/testdata/webkit/tests22.dat [deleted file]
html/testdata/webkit/tests23.dat [deleted file]
html/testdata/webkit/tests24.dat [deleted file]
html/testdata/webkit/tests25.dat [deleted file]
html/testdata/webkit/tests26.dat [deleted file]
html/testdata/webkit/tests3.dat [deleted file]
html/testdata/webkit/tests4.dat [deleted file]
html/testdata/webkit/tests5.dat [deleted file]
html/testdata/webkit/tests6.dat [deleted file]
html/testdata/webkit/tests7.dat [deleted file]
html/testdata/webkit/tests8.dat [deleted file]
html/testdata/webkit/tests9.dat [deleted file]
html/testdata/webkit/tests_innerHTML_1.dat [deleted file]
html/testdata/webkit/tricky01.dat [deleted file]
html/testdata/webkit/webkit01.dat [deleted file]
html/testdata/webkit/webkit02.dat [deleted file]
html/token.go [deleted file]
html/token_test.go [deleted file]
htmltree/attrs.go
htmltree/prettify.go
internal/compile/compile.go
internal/compile/component.go
internal/compile/template.go
internal/html/LICENSE [new file with mode: 0644]
internal/html/PATENTS [new file with mode: 0644]
internal/html/README.md [new file with mode: 0644]
internal/html/atom/atom.go [new file with mode: 0644]
internal/html/atom/atom_test.go [new file with mode: 0644]
internal/html/atom/gen.go [new file with mode: 0644]
internal/html/atom/table.go [new file with mode: 0644]
internal/html/atom/table_test.go [new file with mode: 0644]
internal/html/charset/charset.go [new file with mode: 0644]
internal/html/charset/charset_test.go [new file with mode: 0644]
internal/html/charset/testdata/HTTP-charset.html [new file with mode: 0644]
internal/html/charset/testdata/HTTP-vs-UTF-8-BOM.html [new file with mode: 0644]
internal/html/charset/testdata/HTTP-vs-meta-charset.html [new file with mode: 0644]
internal/html/charset/testdata/HTTP-vs-meta-content.html [new file with mode: 0644]
internal/html/charset/testdata/No-encoding-declaration.html [new file with mode: 0644]
internal/html/charset/testdata/README [new file with mode: 0644]
internal/html/charset/testdata/UTF-16BE-BOM.html [new file with mode: 0644]
internal/html/charset/testdata/UTF-16LE-BOM.html [new file with mode: 0644]
internal/html/charset/testdata/UTF-8-BOM-vs-meta-charset.html [new file with mode: 0644]
internal/html/charset/testdata/UTF-8-BOM-vs-meta-content.html [new file with mode: 0644]
internal/html/charset/testdata/meta-charset-attribute.html [new file with mode: 0644]
internal/html/charset/testdata/meta-content-attribute.html [new file with mode: 0644]
internal/html/comment_test.go [new file with mode: 0644]
internal/html/const.go [new file with mode: 0644]
internal/html/doc.go [new file with mode: 0644]
internal/html/doctype.go [new file with mode: 0644]
internal/html/entity.go [new file with mode: 0644]
internal/html/entity_test.go [new file with mode: 0644]
internal/html/escape.go [new file with mode: 0644]
internal/html/escape_test.go [new file with mode: 0644]
internal/html/example_test.go [new file with mode: 0644]
internal/html/foreign.go [new file with mode: 0644]
internal/html/node.go [new file with mode: 0644]
internal/html/node_test.go [new file with mode: 0644]
internal/html/parse.go [new file with mode: 0644]
internal/html/parse_test.go [new file with mode: 0644]
internal/html/render.go [new file with mode: 0644]
internal/html/render_test.go [new file with mode: 0644]
internal/html/testdata/go/issue_30600_parse_panics_in_cell_mode.dat [new file with mode: 0644]
internal/html/testdata/go/issue_30961_error_nested_unknown_tag_types.dat [new file with mode: 0644]
internal/html/testdata/go/raw_tags_to_be_ignored.dat [new file with mode: 0644]
internal/html/testdata/go/select.dat [new file with mode: 0644]
internal/html/testdata/go/template.dat [new file with mode: 0644]
internal/html/testdata/go1.html [new file with mode: 0644]
internal/html/testdata/webkit/README [new file with mode: 0644]
internal/html/testdata/webkit/adoption01.dat [new file with mode: 0644]
internal/html/testdata/webkit/adoption02.dat [new file with mode: 0644]
internal/html/testdata/webkit/blocks.dat [new file with mode: 0644]
internal/html/testdata/webkit/comments01.dat [new file with mode: 0644]
internal/html/testdata/webkit/doctype01.dat [new file with mode: 0644]
internal/html/testdata/webkit/domjs-unsafe.dat [new file with mode: 0644]
internal/html/testdata/webkit/entities01.dat [new file with mode: 0644]
internal/html/testdata/webkit/entities02.dat [new file with mode: 0644]
internal/html/testdata/webkit/foreign-fragment.dat [new file with mode: 0644]
internal/html/testdata/webkit/html5test-com.dat [new file with mode: 0644]
internal/html/testdata/webkit/inbody01.dat [new file with mode: 0644]
internal/html/testdata/webkit/isindex.dat [new file with mode: 0644]
internal/html/testdata/webkit/main-element.dat [new file with mode: 0644]
internal/html/testdata/webkit/math.dat [new file with mode: 0644]
internal/html/testdata/webkit/menuitem-element.dat [new file with mode: 0644]
internal/html/testdata/webkit/namespace-sensitivity.dat [new file with mode: 0644]
internal/html/testdata/webkit/noscript01.dat [new file with mode: 0644]
internal/html/testdata/webkit/pending-spec-changes-plain-text-unsafe.dat [new file with mode: 0644]
internal/html/testdata/webkit/pending-spec-changes.dat [new file with mode: 0644]
internal/html/testdata/webkit/plain-text-unsafe.dat [new file with mode: 0644]
internal/html/testdata/webkit/ruby.dat [new file with mode: 0644]
internal/html/testdata/webkit/scriptdata01.dat [new file with mode: 0644]
internal/html/testdata/webkit/scripted/adoption01.dat [new file with mode: 0644]
internal/html/testdata/webkit/scripted/ark.dat [new file with mode: 0644]
internal/html/testdata/webkit/scripted/webkit01.dat [new file with mode: 0644]
internal/html/testdata/webkit/svg.dat [new file with mode: 0644]
internal/html/testdata/webkit/tables01.dat [new file with mode: 0644]
internal/html/testdata/webkit/template.dat [new file with mode: 0644]
internal/html/testdata/webkit/tests1.dat [new file with mode: 0644]
internal/html/testdata/webkit/tests10.dat [new file with mode: 0644]
internal/html/testdata/webkit/tests11.dat [new file with mode: 0644]
internal/html/testdata/webkit/tests12.dat [new file with mode: 0644]
internal/html/testdata/webkit/tests14.dat [new file with mode: 0644]
internal/html/testdata/webkit/tests15.dat [new file with mode: 0644]
internal/html/testdata/webkit/tests16.dat [new file with mode: 0644]
internal/html/testdata/webkit/tests17.dat [new file with mode: 0644]
internal/html/testdata/webkit/tests18.dat [new file with mode: 0644]
internal/html/testdata/webkit/tests19.dat [new file with mode: 0644]
internal/html/testdata/webkit/tests2.dat [new file with mode: 0644]
internal/html/testdata/webkit/tests20.dat [new file with mode: 0644]
internal/html/testdata/webkit/tests21.dat [new file with mode: 0644]
internal/html/testdata/webkit/tests22.dat [new file with mode: 0644]
internal/html/testdata/webkit/tests23.dat [new file with mode: 0644]
internal/html/testdata/webkit/tests24.dat [new file with mode: 0644]
internal/html/testdata/webkit/tests25.dat [new file with mode: 0644]
internal/html/testdata/webkit/tests26.dat [new file with mode: 0644]
internal/html/testdata/webkit/tests3.dat [new file with mode: 0644]
internal/html/testdata/webkit/tests4.dat [new file with mode: 0644]
internal/html/testdata/webkit/tests5.dat [new file with mode: 0644]
internal/html/testdata/webkit/tests6.dat [new file with mode: 0644]
internal/html/testdata/webkit/tests7.dat [new file with mode: 0644]
internal/html/testdata/webkit/tests8.dat [new file with mode: 0644]
internal/html/testdata/webkit/tests9.dat [new file with mode: 0644]
internal/html/testdata/webkit/tests_innerHTML_1.dat [new file with mode: 0644]
internal/html/testdata/webkit/tricky01.dat [new file with mode: 0644]
internal/html/testdata/webkit/webkit01.dat [new file with mode: 0644]
internal/html/testdata/webkit/webkit02.dat [new file with mode: 0644]
internal/html/token.go [new file with mode: 0644]
internal/html/token_test.go [new file with mode: 0644]

diff --git a/html/LICENSE b/html/LICENSE
deleted file mode 100644 (file)
index 2a7cf70..0000000
+++ /dev/null
@@ -1,27 +0,0 @@
-Copyright 2009 The Go Authors.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-   * Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
-   * Redistributions in binary form must reproduce the above
-copyright notice, this list of conditions and the following disclaimer
-in the documentation and/or other materials provided with the
-distribution.
-   * Neither the name of Google LLC nor the names of its
-contributors may be used to endorse or promote products derived from
-this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/html/PATENTS b/html/PATENTS
deleted file mode 100644 (file)
index 7330990..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-Additional IP Rights Grant (Patents)
-
-"This implementation" means the copyrightable works distributed by
-Google as part of the Go project.
-
-Google hereby grants to You a perpetual, worldwide, non-exclusive,
-no-charge, royalty-free, irrevocable (except as stated in this section)
-patent license to make, have made, use, offer to sell, sell, import,
-transfer and otherwise run, modify and propagate the contents of this
-implementation of Go, where such license applies only to those patent
-claims, both currently owned or controlled by Google and acquired in
-the future, licensable by Google that are necessarily infringed by this
-implementation of Go.  This grant does not include claims that would be
-infringed only as a consequence of further modification of this
-implementation.  If you or your agent or exclusive licensee institute or
-order or agree to the institution of patent litigation against any
-entity (including a cross-claim or counterclaim in a lawsuit) alleging
-that this implementation of Go or any code incorporated within this
-implementation of Go constitutes direct or contributory patent
-infringement, or inducement of patent infringement, then any patent
-rights granted to you under this License for this implementation of Go
-shall terminate as of the date such litigation is filed.
diff --git a/html/README.md b/html/README.md
deleted file mode 100644 (file)
index ad72cd7..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
-# HTML
-
-## This is a fork.
-
-This repo forks `golang.org/x/net/html` and makes limited changes:
-
-- Foster parenting is disabled.
-- Attribute keys are not automatically set to lowercase.
-
-These changes are made to support Today's use of `x/net/html` to parse and
-re-render Go templates. They have the intended side effect of allowing invalid
-HTML to be rendered, which is almost definitely not what you want.
-
-Please see the LICENSE and PATENTS file for this directory.
diff --git a/html/atom/atom.go b/html/atom/atom.go
deleted file mode 100644 (file)
index b4439bc..0000000
+++ /dev/null
@@ -1,78 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package atom provides integer codes (also known as atoms) for a fixed set of
-// frequently occurring HTML strings: tag names and attribute keys such as "p"
-// and "id".
-//
-// Sharing an atom's name between all elements with the same tag can result in
-// fewer string allocations when tokenizing and parsing HTML. Integer
-// comparisons are also generally faster than string comparisons.
-//
-// The value of an atom's particular code is not guaranteed to stay the same
-// between versions of this package. Neither is any ordering guaranteed:
-// whether atom.H1 < atom.H2 may also change. The codes are not guaranteed to
-// be dense. The only guarantees are that e.g. looking up "div" will yield
-// atom.Div, calling atom.Div.String will return "div", and atom.Div != 0.
-package atom // import "git.earlybird.gay/today-engine/html/atom"
-
-// Atom is an integer code for a string. The zero value maps to "".
-type Atom uint32
-
-// String returns the atom's name.
-func (a Atom) String() string {
-       start := uint32(a >> 8)
-       n := uint32(a & 0xff)
-       if start+n > uint32(len(atomText)) {
-               return ""
-       }
-       return atomText[start : start+n]
-}
-
-func (a Atom) string() string {
-       return atomText[a>>8 : a>>8+a&0xff]
-}
-
-// fnv computes the FNV hash with an arbitrary starting value h.
-func fnv(h uint32, s []byte) uint32 {
-       for i := range s {
-               h ^= uint32(s[i])
-               h *= 16777619
-       }
-       return h
-}
-
-func match(s string, t []byte) bool {
-       for i, c := range t {
-               if s[i] != c {
-                       return false
-               }
-       }
-       return true
-}
-
-// Lookup returns the atom whose name is s. It returns zero if there is no
-// such atom. The lookup is case sensitive.
-func Lookup(s []byte) Atom {
-       if len(s) == 0 || len(s) > maxAtomLen {
-               return 0
-       }
-       h := fnv(hash0, s)
-       if a := table[h&uint32(len(table)-1)]; int(a&0xff) == len(s) && match(a.string(), s) {
-               return a
-       }
-       if a := table[(h>>16)&uint32(len(table)-1)]; int(a&0xff) == len(s) && match(a.string(), s) {
-               return a
-       }
-       return 0
-}
-
-// String returns a string whose contents are equal to s. In that sense, it is
-// equivalent to string(s) but may be more efficient.
-func String(s []byte) string {
-       if a := Lookup(s); a != 0 {
-               return a.String()
-       }
-       return string(s)
-}
diff --git a/html/atom/atom_test.go b/html/atom/atom_test.go
deleted file mode 100644 (file)
index 6e33704..0000000
+++ /dev/null
@@ -1,109 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package atom
-
-import (
-       "sort"
-       "testing"
-)
-
-func TestKnown(t *testing.T) {
-       for _, s := range testAtomList {
-               if atom := Lookup([]byte(s)); atom.String() != s {
-                       t.Errorf("Lookup(%q) = %#x (%q)", s, uint32(atom), atom.String())
-               }
-       }
-}
-
-func TestHits(t *testing.T) {
-       for _, a := range table {
-               if a == 0 {
-                       continue
-               }
-               got := Lookup([]byte(a.String()))
-               if got != a {
-                       t.Errorf("Lookup(%q) = %#x, want %#x", a.String(), uint32(got), uint32(a))
-               }
-       }
-}
-
-func TestMisses(t *testing.T) {
-       testCases := []string{
-               "",
-               "\x00",
-               "\xff",
-               "A",
-               "DIV",
-               "Div",
-               "dIV",
-               "aa",
-               "a\x00",
-               "ab",
-               "abb",
-               "abbr0",
-               "abbr ",
-               " abbr",
-               " a",
-               "acceptcharset",
-               "acceptCharset",
-               "accept_charset",
-               "h0",
-               "h1h2",
-               "h7",
-               "onClick",
-               "λ",
-               // The following string has the same hash (0xa1d7fab7) as "onmouseover".
-               "\x00\x00\x00\x00\x00\x50\x18\xae\x38\xd0\xb7",
-       }
-       for _, tc := range testCases {
-               got := Lookup([]byte(tc))
-               if got != 0 {
-                       t.Errorf("Lookup(%q): got %d, want 0", tc, got)
-               }
-       }
-}
-
-func TestForeignObject(t *testing.T) {
-       const (
-               afo = Foreignobject
-               afO = ForeignObject
-               sfo = "foreignobject"
-               sfO = "foreignObject"
-       )
-       if got := Lookup([]byte(sfo)); got != afo {
-               t.Errorf("Lookup(%q): got %#v, want %#v", sfo, got, afo)
-       }
-       if got := Lookup([]byte(sfO)); got != afO {
-               t.Errorf("Lookup(%q): got %#v, want %#v", sfO, got, afO)
-       }
-       if got := afo.String(); got != sfo {
-               t.Errorf("Atom(%#v).String(): got %q, want %q", afo, got, sfo)
-       }
-       if got := afO.String(); got != sfO {
-               t.Errorf("Atom(%#v).String(): got %q, want %q", afO, got, sfO)
-       }
-}
-
-func BenchmarkLookup(b *testing.B) {
-       sortedTable := make([]string, 0, len(table))
-       for _, a := range table {
-               if a != 0 {
-                       sortedTable = append(sortedTable, a.String())
-               }
-       }
-       sort.Strings(sortedTable)
-
-       x := make([][]byte, 1000)
-       for i := range x {
-               x[i] = []byte(sortedTable[i%len(sortedTable)])
-       }
-
-       b.ResetTimer()
-       for i := 0; i < b.N; i++ {
-               for _, s := range x {
-                       Lookup(s)
-               }
-       }
-}
diff --git a/html/atom/gen.go b/html/atom/gen.go
deleted file mode 100644 (file)
index 1e249d1..0000000
+++ /dev/null
@@ -1,711 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build ignore
-
-//go:generate go run gen.go
-//go:generate go run gen.go -test
-
-package main
-
-import (
-       "bytes"
-       "flag"
-       "fmt"
-       "go/format"
-       "math/rand"
-       "os"
-       "sort"
-       "strings"
-)
-
-// identifier converts s to a Go exported identifier.
-// It converts "div" to "Div" and "accept-charset" to "AcceptCharset".
-func identifier(s string) string {
-       b := make([]byte, 0, len(s))
-       cap := true
-       for _, c := range s {
-               if c == '-' {
-                       cap = true
-                       continue
-               }
-               if cap && 'a' <= c && c <= 'z' {
-                       c -= 'a' - 'A'
-               }
-               cap = false
-               b = append(b, byte(c))
-       }
-       return string(b)
-}
-
-var test = flag.Bool("test", false, "generate table_test.go")
-
-func genFile(name string, buf *bytes.Buffer) {
-       b, err := format.Source(buf.Bytes())
-       if err != nil {
-               fmt.Fprintln(os.Stderr, err)
-               os.Exit(1)
-       }
-       if err := os.WriteFile(name, b, 0644); err != nil {
-               fmt.Fprintln(os.Stderr, err)
-               os.Exit(1)
-       }
-}
-
-func main() {
-       flag.Parse()
-
-       var all []string
-       all = append(all, elements...)
-       all = append(all, attributes...)
-       all = append(all, eventHandlers...)
-       all = append(all, extra...)
-       sort.Strings(all)
-
-       // uniq - lists have dups
-       w := 0
-       for _, s := range all {
-               if w == 0 || all[w-1] != s {
-                       all[w] = s
-                       w++
-               }
-       }
-       all = all[:w]
-
-       if *test {
-               var buf bytes.Buffer
-               fmt.Fprintln(&buf, "// Code generated by go generate gen.go; DO NOT EDIT.\n")
-               fmt.Fprintln(&buf, "//go:generate go run gen.go -test\n")
-               fmt.Fprintln(&buf, "package atom\n")
-               fmt.Fprintln(&buf, "var testAtomList = []string{")
-               for _, s := range all {
-                       fmt.Fprintf(&buf, "\t%q,\n", s)
-               }
-               fmt.Fprintln(&buf, "}")
-
-               genFile("table_test.go", &buf)
-               return
-       }
-
-       // Find hash that minimizes table size.
-       var best *table
-       for i := 0; i < 1000000; i++ {
-               if best != nil && 1<<(best.k-1) < len(all) {
-                       break
-               }
-               h := rand.Uint32()
-               for k := uint(0); k <= 16; k++ {
-                       if best != nil && k >= best.k {
-                               break
-                       }
-                       var t table
-                       if t.init(h, k, all) {
-                               best = &t
-                               break
-                       }
-               }
-       }
-       if best == nil {
-               fmt.Fprintf(os.Stderr, "failed to construct string table\n")
-               os.Exit(1)
-       }
-
-       // Lay out strings, using overlaps when possible.
-       layout := append([]string{}, all...)
-
-       // Remove strings that are substrings of other strings
-       for changed := true; changed; {
-               changed = false
-               for i, s := range layout {
-                       if s == "" {
-                               continue
-                       }
-                       for j, t := range layout {
-                               if i != j && t != "" && strings.Contains(s, t) {
-                                       changed = true
-                                       layout[j] = ""
-                               }
-                       }
-               }
-       }
-
-       // Join strings where one suffix matches another prefix.
-       for {
-               // Find best i, j, k such that layout[i][len-k:] == layout[j][:k],
-               // maximizing overlap length k.
-               besti := -1
-               bestj := -1
-               bestk := 0
-               for i, s := range layout {
-                       if s == "" {
-                               continue
-                       }
-                       for j, t := range layout {
-                               if i == j {
-                                       continue
-                               }
-                               for k := bestk + 1; k <= len(s) && k <= len(t); k++ {
-                                       if s[len(s)-k:] == t[:k] {
-                                               besti = i
-                                               bestj = j
-                                               bestk = k
-                                       }
-                               }
-                       }
-               }
-               if bestk > 0 {
-                       layout[besti] += layout[bestj][bestk:]
-                       layout[bestj] = ""
-                       continue
-               }
-               break
-       }
-
-       text := strings.Join(layout, "")
-
-       atom := map[string]uint32{}
-       for _, s := range all {
-               off := strings.Index(text, s)
-               if off < 0 {
-                       panic("lost string " + s)
-               }
-               atom[s] = uint32(off<<8 | len(s))
-       }
-
-       var buf bytes.Buffer
-       // Generate the Go code.
-       fmt.Fprintln(&buf, "// Code generated by go generate gen.go; DO NOT EDIT.\n")
-       fmt.Fprintln(&buf, "//go:generate go run gen.go\n")
-       fmt.Fprintln(&buf, "package atom\n\nconst (")
-
-       // compute max len
-       maxLen := 0
-       for _, s := range all {
-               if maxLen < len(s) {
-                       maxLen = len(s)
-               }
-               fmt.Fprintf(&buf, "\t%s Atom = %#x\n", identifier(s), atom[s])
-       }
-       fmt.Fprintln(&buf, ")\n")
-
-       fmt.Fprintf(&buf, "const hash0 = %#x\n\n", best.h0)
-       fmt.Fprintf(&buf, "const maxAtomLen = %d\n\n", maxLen)
-
-       fmt.Fprintf(&buf, "var table = [1<<%d]Atom{\n", best.k)
-       for i, s := range best.tab {
-               if s == "" {
-                       continue
-               }
-               fmt.Fprintf(&buf, "\t%#x: %#x, // %s\n", i, atom[s], s)
-       }
-       fmt.Fprintf(&buf, "}\n")
-       datasize := (1 << best.k) * 4
-
-       fmt.Fprintln(&buf, "const atomText =")
-       textsize := len(text)
-       for len(text) > 60 {
-               fmt.Fprintf(&buf, "\t%q +\n", text[:60])
-               text = text[60:]
-       }
-       fmt.Fprintf(&buf, "\t%q\n\n", text)
-
-       genFile("table.go", &buf)
-
-       fmt.Fprintf(os.Stdout, "%d atoms; %d string bytes + %d tables = %d total data\n", len(all), textsize, datasize, textsize+datasize)
-}
-
-type byLen []string
-
-func (x byLen) Less(i, j int) bool { return len(x[i]) > len(x[j]) }
-func (x byLen) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
-func (x byLen) Len() int           { return len(x) }
-
-// fnv computes the FNV hash with an arbitrary starting value h.
-func fnv(h uint32, s string) uint32 {
-       for i := 0; i < len(s); i++ {
-               h ^= uint32(s[i])
-               h *= 16777619
-       }
-       return h
-}
-
-// A table represents an attempt at constructing the lookup table.
-// The lookup table uses cuckoo hashing, meaning that each string
-// can be found in one of two positions.
-type table struct {
-       h0   uint32
-       k    uint
-       mask uint32
-       tab  []string
-}
-
-// hash returns the two hashes for s.
-func (t *table) hash(s string) (h1, h2 uint32) {
-       h := fnv(t.h0, s)
-       h1 = h & t.mask
-       h2 = (h >> 16) & t.mask
-       return
-}
-
-// init initializes the table with the given parameters.
-// h0 is the initial hash value,
-// k is the number of bits of hash value to use, and
-// x is the list of strings to store in the table.
-// init returns false if the table cannot be constructed.
-func (t *table) init(h0 uint32, k uint, x []string) bool {
-       t.h0 = h0
-       t.k = k
-       t.tab = make([]string, 1<<k)
-       t.mask = 1<<k - 1
-       for _, s := range x {
-               if !t.insert(s) {
-                       return false
-               }
-       }
-       return true
-}
-
-// insert inserts s in the table.
-func (t *table) insert(s string) bool {
-       h1, h2 := t.hash(s)
-       if t.tab[h1] == "" {
-               t.tab[h1] = s
-               return true
-       }
-       if t.tab[h2] == "" {
-               t.tab[h2] = s
-               return true
-       }
-       if t.push(h1, 0) {
-               t.tab[h1] = s
-               return true
-       }
-       if t.push(h2, 0) {
-               t.tab[h2] = s
-               return true
-       }
-       return false
-}
-
-// push attempts to push aside the entry in slot i.
-func (t *table) push(i uint32, depth int) bool {
-       if depth > len(t.tab) {
-               return false
-       }
-       s := t.tab[i]
-       h1, h2 := t.hash(s)
-       j := h1 + h2 - i
-       if t.tab[j] != "" && !t.push(j, depth+1) {
-               return false
-       }
-       t.tab[j] = s
-       return true
-}
-
-// The lists of element names and attribute keys were taken from
-// https://html.spec.whatwg.org/multipage/indices.html#index
-// as of the "HTML Living Standard - Last Updated 16 April 2018" version.
-
-// "command", "keygen" and "menuitem" have been removed from the spec,
-// but are kept here for backwards compatibility.
-var elements = []string{
-       "a",
-       "abbr",
-       "address",
-       "area",
-       "article",
-       "aside",
-       "audio",
-       "b",
-       "base",
-       "bdi",
-       "bdo",
-       "blockquote",
-       "body",
-       "br",
-       "button",
-       "canvas",
-       "caption",
-       "cite",
-       "code",
-       "col",
-       "colgroup",
-       "command",
-       "data",
-       "datalist",
-       "dd",
-       "del",
-       "details",
-       "dfn",
-       "dialog",
-       "div",
-       "dl",
-       "dt",
-       "em",
-       "embed",
-       "fieldset",
-       "figcaption",
-       "figure",
-       "footer",
-       "form",
-       "h1",
-       "h2",
-       "h3",
-       "h4",
-       "h5",
-       "h6",
-       "head",
-       "header",
-       "hgroup",
-       "hr",
-       "html",
-       "i",
-       "iframe",
-       "img",
-       "input",
-       "ins",
-       "kbd",
-       "keygen",
-       "label",
-       "legend",
-       "li",
-       "link",
-       "main",
-       "map",
-       "mark",
-       "menu",
-       "menuitem",
-       "meta",
-       "meter",
-       "nav",
-       "noscript",
-       "object",
-       "ol",
-       "optgroup",
-       "option",
-       "output",
-       "p",
-       "param",
-       "picture",
-       "pre",
-       "progress",
-       "q",
-       "rp",
-       "rt",
-       "ruby",
-       "s",
-       "samp",
-       "script",
-       "section",
-       "select",
-       "slot",
-       "small",
-       "source",
-       "span",
-       "strong",
-       "style",
-       "sub",
-       "summary",
-       "sup",
-       "table",
-       "tbody",
-       "td",
-       "template",
-       "textarea",
-       "tfoot",
-       "th",
-       "thead",
-       "time",
-       "title",
-       "tr",
-       "track",
-       "u",
-       "ul",
-       "var",
-       "video",
-       "wbr",
-}
-
-// https://html.spec.whatwg.org/multipage/indices.html#attributes-3
-//
-// "challenge", "command", "contextmenu", "dropzone", "icon", "keytype", "mediagroup",
-// "radiogroup", "spellcheck", "scoped", "seamless", "sortable" and "sorted" have been removed from the spec,
-// but are kept here for backwards compatibility.
-var attributes = []string{
-       "abbr",
-       "accept",
-       "accept-charset",
-       "accesskey",
-       "action",
-       "allowfullscreen",
-       "allowpaymentrequest",
-       "allowusermedia",
-       "alt",
-       "as",
-       "async",
-       "autocomplete",
-       "autofocus",
-       "autoplay",
-       "challenge",
-       "charset",
-       "checked",
-       "cite",
-       "class",
-       "color",
-       "cols",
-       "colspan",
-       "command",
-       "content",
-       "contenteditable",
-       "contextmenu",
-       "controls",
-       "coords",
-       "crossorigin",
-       "data",
-       "datetime",
-       "default",
-       "defer",
-       "dir",
-       "dirname",
-       "disabled",
-       "download",
-       "draggable",
-       "dropzone",
-       "enctype",
-       "for",
-       "form",
-       "formaction",
-       "formenctype",
-       "formmethod",
-       "formnovalidate",
-       "formtarget",
-       "headers",
-       "height",
-       "hidden",
-       "high",
-       "href",
-       "hreflang",
-       "http-equiv",
-       "icon",
-       "id",
-       "inputmode",
-       "integrity",
-       "is",
-       "ismap",
-       "itemid",
-       "itemprop",
-       "itemref",
-       "itemscope",
-       "itemtype",
-       "keytype",
-       "kind",
-       "label",
-       "lang",
-       "list",
-       "loop",
-       "low",
-       "manifest",
-       "max",
-       "maxlength",
-       "media",
-       "mediagroup",
-       "method",
-       "min",
-       "minlength",
-       "multiple",
-       "muted",
-       "name",
-       "nomodule",
-       "nonce",
-       "novalidate",
-       "open",
-       "optimum",
-       "pattern",
-       "ping",
-       "placeholder",
-       "playsinline",
-       "poster",
-       "preload",
-       "radiogroup",
-       "readonly",
-       "referrerpolicy",
-       "rel",
-       "required",
-       "reversed",
-       "rows",
-       "rowspan",
-       "sandbox",
-       "spellcheck",
-       "scope",
-       "scoped",
-       "seamless",
-       "selected",
-       "shape",
-       "size",
-       "sizes",
-       "sortable",
-       "sorted",
-       "slot",
-       "span",
-       "spellcheck",
-       "src",
-       "srcdoc",
-       "srclang",
-       "srcset",
-       "start",
-       "step",
-       "style",
-       "tabindex",
-       "target",
-       "title",
-       "translate",
-       "type",
-       "typemustmatch",
-       "updateviacache",
-       "usemap",
-       "value",
-       "width",
-       "workertype",
-       "wrap",
-}
-
-// "onautocomplete", "onautocompleteerror", "onmousewheel",
-// "onshow" and "onsort" have been removed from the spec,
-// but are kept here for backwards compatibility.
-var eventHandlers = []string{
-       "onabort",
-       "onautocomplete",
-       "onautocompleteerror",
-       "onauxclick",
-       "onafterprint",
-       "onbeforeprint",
-       "onbeforeunload",
-       "onblur",
-       "oncancel",
-       "oncanplay",
-       "oncanplaythrough",
-       "onchange",
-       "onclick",
-       "onclose",
-       "oncontextmenu",
-       "oncopy",
-       "oncuechange",
-       "oncut",
-       "ondblclick",
-       "ondrag",
-       "ondragend",
-       "ondragenter",
-       "ondragexit",
-       "ondragleave",
-       "ondragover",
-       "ondragstart",
-       "ondrop",
-       "ondurationchange",
-       "onemptied",
-       "onended",
-       "onerror",
-       "onfocus",
-       "onhashchange",
-       "oninput",
-       "oninvalid",
-       "onkeydown",
-       "onkeypress",
-       "onkeyup",
-       "onlanguagechange",
-       "onload",
-       "onloadeddata",
-       "onloadedmetadata",
-       "onloadend",
-       "onloadstart",
-       "onmessage",
-       "onmessageerror",
-       "onmousedown",
-       "onmouseenter",
-       "onmouseleave",
-       "onmousemove",
-       "onmouseout",
-       "onmouseover",
-       "onmouseup",
-       "onmousewheel",
-       "onwheel",
-       "onoffline",
-       "ononline",
-       "onpagehide",
-       "onpageshow",
-       "onpaste",
-       "onpause",
-       "onplay",
-       "onplaying",
-       "onpopstate",
-       "onprogress",
-       "onratechange",
-       "onreset",
-       "onresize",
-       "onrejectionhandled",
-       "onscroll",
-       "onsecuritypolicyviolation",
-       "onseeked",
-       "onseeking",
-       "onselect",
-       "onshow",
-       "onsort",
-       "onstalled",
-       "onstorage",
-       "onsubmit",
-       "onsuspend",
-       "ontimeupdate",
-       "ontoggle",
-       "onunhandledrejection",
-       "onunload",
-       "onvolumechange",
-       "onwaiting",
-}
-
-// extra are ad-hoc values not covered by any of the lists above.
-var extra = []string{
-       "acronym",
-       "align",
-       "annotation",
-       "annotation-xml",
-       "applet",
-       "basefont",
-       "bgsound",
-       "big",
-       "blink",
-       "center",
-       "color",
-       "desc",
-       "face",
-       "font",
-       "foreignObject", // HTML is case-insensitive, but SVG-embedded-in-HTML is case-sensitive.
-       "foreignobject",
-       "frame",
-       "frameset",
-       "image",
-       "isindex", // "isindex" has been removed from the spec, but are kept here for backwards compatibility.
-       "listing",
-       "malignmark",
-       "marquee",
-       "math",
-       "mglyph",
-       "mi",
-       "mn",
-       "mo",
-       "ms",
-       "mtext",
-       "nobr",
-       "noembed",
-       "noframes",
-       "plaintext",
-       "prompt",
-       "public",
-       "rb",
-       "rtc",
-       "spacer",
-       "strike",
-       "svg",
-       "system",
-       "tt",
-       "xmp",
-}
diff --git a/html/atom/table.go b/html/atom/table.go
deleted file mode 100644 (file)
index 2a93886..0000000
+++ /dev/null
@@ -1,783 +0,0 @@
-// Code generated by go generate gen.go; DO NOT EDIT.
-
-//go:generate go run gen.go
-
-package atom
-
-const (
-       A                         Atom = 0x1
-       Abbr                      Atom = 0x4
-       Accept                    Atom = 0x1a06
-       AcceptCharset             Atom = 0x1a0e
-       Accesskey                 Atom = 0x2c09
-       Acronym                   Atom = 0xaa07
-       Action                    Atom = 0x27206
-       Address                   Atom = 0x6f307
-       Align                     Atom = 0xb105
-       Allowfullscreen           Atom = 0x2080f
-       Allowpaymentrequest       Atom = 0xc113
-       Allowusermedia            Atom = 0xdd0e
-       Alt                       Atom = 0xf303
-       Annotation                Atom = 0x1c90a
-       AnnotationXml             Atom = 0x1c90e
-       Applet                    Atom = 0x31906
-       Area                      Atom = 0x35604
-       Article                   Atom = 0x3fc07
-       As                        Atom = 0x3c02
-       Aside                     Atom = 0x10705
-       Async                     Atom = 0xff05
-       Audio                     Atom = 0x11505
-       Autocomplete              Atom = 0x2780c
-       Autofocus                 Atom = 0x12109
-       Autoplay                  Atom = 0x13c08
-       B                         Atom = 0x101
-       Base                      Atom = 0x3b04
-       Basefont                  Atom = 0x3b08
-       Bdi                       Atom = 0xba03
-       Bdo                       Atom = 0x14b03
-       Bgsound                   Atom = 0x15e07
-       Big                       Atom = 0x17003
-       Blink                     Atom = 0x17305
-       Blockquote                Atom = 0x1870a
-       Body                      Atom = 0x2804
-       Br                        Atom = 0x202
-       Button                    Atom = 0x19106
-       Canvas                    Atom = 0x10306
-       Caption                   Atom = 0x23107
-       Center                    Atom = 0x22006
-       Challenge                 Atom = 0x29b09
-       Charset                   Atom = 0x2107
-       Checked                   Atom = 0x47907
-       Cite                      Atom = 0x19c04
-       Class                     Atom = 0x56405
-       Code                      Atom = 0x5c504
-       Col                       Atom = 0x1ab03
-       Colgroup                  Atom = 0x1ab08
-       Color                     Atom = 0x1bf05
-       Cols                      Atom = 0x1c404
-       Colspan                   Atom = 0x1c407
-       Command                   Atom = 0x1d707
-       Content                   Atom = 0x58b07
-       Contenteditable           Atom = 0x58b0f
-       Contextmenu               Atom = 0x3800b
-       Controls                  Atom = 0x1de08
-       Coords                    Atom = 0x1ea06
-       Crossorigin               Atom = 0x1fb0b
-       Data                      Atom = 0x4a504
-       Datalist                  Atom = 0x4a508
-       Datetime                  Atom = 0x2b808
-       Dd                        Atom = 0x2d702
-       Default                   Atom = 0x10a07
-       Defer                     Atom = 0x5c705
-       Del                       Atom = 0x45203
-       Desc                      Atom = 0x56104
-       Details                   Atom = 0x7207
-       Dfn                       Atom = 0x8703
-       Dialog                    Atom = 0xbb06
-       Dir                       Atom = 0x9303
-       Dirname                   Atom = 0x9307
-       Disabled                  Atom = 0x16408
-       Div                       Atom = 0x16b03
-       Dl                        Atom = 0x5e602
-       Download                  Atom = 0x46308
-       Draggable                 Atom = 0x17a09
-       Dropzone                  Atom = 0x40508
-       Dt                        Atom = 0x64b02
-       Em                        Atom = 0x6e02
-       Embed                     Atom = 0x6e05
-       Enctype                   Atom = 0x28d07
-       Face                      Atom = 0x21e04
-       Fieldset                  Atom = 0x22608
-       Figcaption                Atom = 0x22e0a
-       Figure                    Atom = 0x24806
-       Font                      Atom = 0x3f04
-       Footer                    Atom = 0xf606
-       For                       Atom = 0x25403
-       ForeignObject             Atom = 0x2540d
-       Foreignobject             Atom = 0x2610d
-       Form                      Atom = 0x26e04
-       Formaction                Atom = 0x26e0a
-       Formenctype               Atom = 0x2890b
-       Formmethod                Atom = 0x2a40a
-       Formnovalidate            Atom = 0x2ae0e
-       Formtarget                Atom = 0x2c00a
-       Frame                     Atom = 0x8b05
-       Frameset                  Atom = 0x8b08
-       H1                        Atom = 0x15c02
-       H2                        Atom = 0x2de02
-       H3                        Atom = 0x30d02
-       H4                        Atom = 0x34502
-       H5                        Atom = 0x34f02
-       H6                        Atom = 0x64d02
-       Head                      Atom = 0x33104
-       Header                    Atom = 0x33106
-       Headers                   Atom = 0x33107
-       Height                    Atom = 0x5206
-       Hgroup                    Atom = 0x2ca06
-       Hidden                    Atom = 0x2d506
-       High                      Atom = 0x2db04
-       Hr                        Atom = 0x15702
-       Href                      Atom = 0x2e004
-       Hreflang                  Atom = 0x2e008
-       Html                      Atom = 0x5604
-       HttpEquiv                 Atom = 0x2e80a
-       I                         Atom = 0x601
-       Icon                      Atom = 0x58a04
-       Id                        Atom = 0x10902
-       Iframe                    Atom = 0x2fc06
-       Image                     Atom = 0x30205
-       Img                       Atom = 0x30703
-       Input                     Atom = 0x44b05
-       Inputmode                 Atom = 0x44b09
-       Ins                       Atom = 0x20403
-       Integrity                 Atom = 0x23f09
-       Is                        Atom = 0x16502
-       Isindex                   Atom = 0x30f07
-       Ismap                     Atom = 0x31605
-       Itemid                    Atom = 0x38b06
-       Itemprop                  Atom = 0x19d08
-       Itemref                   Atom = 0x3cd07
-       Itemscope                 Atom = 0x67109
-       Itemtype                  Atom = 0x31f08
-       Kbd                       Atom = 0xb903
-       Keygen                    Atom = 0x3206
-       Keytype                   Atom = 0xd607
-       Kind                      Atom = 0x17704
-       Label                     Atom = 0x5905
-       Lang                      Atom = 0x2e404
-       Legend                    Atom = 0x18106
-       Li                        Atom = 0xb202
-       Link                      Atom = 0x17404
-       List                      Atom = 0x4a904
-       Listing                   Atom = 0x4a907
-       Loop                      Atom = 0x5d04
-       Low                       Atom = 0xc303
-       Main                      Atom = 0x1004
-       Malignmark                Atom = 0xb00a
-       Manifest                  Atom = 0x6d708
-       Map                       Atom = 0x31803
-       Mark                      Atom = 0xb604
-       Marquee                   Atom = 0x32707
-       Math                      Atom = 0x32e04
-       Max                       Atom = 0x33d03
-       Maxlength                 Atom = 0x33d09
-       Media                     Atom = 0xe605
-       Mediagroup                Atom = 0xe60a
-       Menu                      Atom = 0x38704
-       Menuitem                  Atom = 0x38708
-       Meta                      Atom = 0x4b804
-       Meter                     Atom = 0x9805
-       Method                    Atom = 0x2a806
-       Mglyph                    Atom = 0x30806
-       Mi                        Atom = 0x34702
-       Min                       Atom = 0x34703
-       Minlength                 Atom = 0x34709
-       Mn                        Atom = 0x2b102
-       Mo                        Atom = 0xa402
-       Ms                        Atom = 0x67402
-       Mtext                     Atom = 0x35105
-       Multiple                  Atom = 0x35f08
-       Muted                     Atom = 0x36705
-       Name                      Atom = 0x9604
-       Nav                       Atom = 0x1303
-       Nobr                      Atom = 0x3704
-       Noembed                   Atom = 0x6c07
-       Noframes                  Atom = 0x8908
-       Nomodule                  Atom = 0xa208
-       Nonce                     Atom = 0x1a605
-       Noscript                  Atom = 0x21608
-       Novalidate                Atom = 0x2b20a
-       Object                    Atom = 0x26806
-       Ol                        Atom = 0x13702
-       Onabort                   Atom = 0x19507
-       Onafterprint              Atom = 0x2360c
-       Onautocomplete            Atom = 0x2760e
-       Onautocompleteerror       Atom = 0x27613
-       Onauxclick                Atom = 0x61f0a
-       Onbeforeprint             Atom = 0x69e0d
-       Onbeforeunload            Atom = 0x6e70e
-       Onblur                    Atom = 0x56d06
-       Oncancel                  Atom = 0x11908
-       Oncanplay                 Atom = 0x14d09
-       Oncanplaythrough          Atom = 0x14d10
-       Onchange                  Atom = 0x41b08
-       Onclick                   Atom = 0x2f507
-       Onclose                   Atom = 0x36c07
-       Oncontextmenu             Atom = 0x37e0d
-       Oncopy                    Atom = 0x39106
-       Oncuechange               Atom = 0x3970b
-       Oncut                     Atom = 0x3a205
-       Ondblclick                Atom = 0x3a70a
-       Ondrag                    Atom = 0x3b106
-       Ondragend                 Atom = 0x3b109
-       Ondragenter               Atom = 0x3ba0b
-       Ondragexit                Atom = 0x3c50a
-       Ondragleave               Atom = 0x3df0b
-       Ondragover                Atom = 0x3ea0a
-       Ondragstart               Atom = 0x3f40b
-       Ondrop                    Atom = 0x40306
-       Ondurationchange          Atom = 0x41310
-       Onemptied                 Atom = 0x40a09
-       Onended                   Atom = 0x42307
-       Onerror                   Atom = 0x42a07
-       Onfocus                   Atom = 0x43107
-       Onhashchange              Atom = 0x43d0c
-       Oninput                   Atom = 0x44907
-       Oninvalid                 Atom = 0x45509
-       Onkeydown                 Atom = 0x45e09
-       Onkeypress                Atom = 0x46b0a
-       Onkeyup                   Atom = 0x48007
-       Onlanguagechange          Atom = 0x48d10
-       Onload                    Atom = 0x49d06
-       Onloadeddata              Atom = 0x49d0c
-       Onloadedmetadata          Atom = 0x4b010
-       Onloadend                 Atom = 0x4c609
-       Onloadstart               Atom = 0x4cf0b
-       Onmessage                 Atom = 0x4da09
-       Onmessageerror            Atom = 0x4da0e
-       Onmousedown               Atom = 0x4e80b
-       Onmouseenter              Atom = 0x4f30c
-       Onmouseleave              Atom = 0x4ff0c
-       Onmousemove               Atom = 0x50b0b
-       Onmouseout                Atom = 0x5160a
-       Onmouseover               Atom = 0x5230b
-       Onmouseup                 Atom = 0x52e09
-       Onmousewheel              Atom = 0x53c0c
-       Onoffline                 Atom = 0x54809
-       Ononline                  Atom = 0x55108
-       Onpagehide                Atom = 0x5590a
-       Onpageshow                Atom = 0x5730a
-       Onpaste                   Atom = 0x57f07
-       Onpause                   Atom = 0x59a07
-       Onplay                    Atom = 0x5a406
-       Onplaying                 Atom = 0x5a409
-       Onpopstate                Atom = 0x5ad0a
-       Onprogress                Atom = 0x5b70a
-       Onratechange              Atom = 0x5cc0c
-       Onrejectionhandled        Atom = 0x5d812
-       Onreset                   Atom = 0x5ea07
-       Onresize                  Atom = 0x5f108
-       Onscroll                  Atom = 0x60008
-       Onsecuritypolicyviolation Atom = 0x60819
-       Onseeked                  Atom = 0x62908
-       Onseeking                 Atom = 0x63109
-       Onselect                  Atom = 0x63a08
-       Onshow                    Atom = 0x64406
-       Onsort                    Atom = 0x64f06
-       Onstalled                 Atom = 0x65909
-       Onstorage                 Atom = 0x66209
-       Onsubmit                  Atom = 0x66b08
-       Onsuspend                 Atom = 0x67b09
-       Ontimeupdate              Atom = 0x400c
-       Ontoggle                  Atom = 0x68408
-       Onunhandledrejection      Atom = 0x68c14
-       Onunload                  Atom = 0x6ab08
-       Onvolumechange            Atom = 0x6b30e
-       Onwaiting                 Atom = 0x6c109
-       Onwheel                   Atom = 0x6ca07
-       Open                      Atom = 0x1a304
-       Optgroup                  Atom = 0x5f08
-       Optimum                   Atom = 0x6d107
-       Option                    Atom = 0x6e306
-       Output                    Atom = 0x51d06
-       P                         Atom = 0xc01
-       Param                     Atom = 0xc05
-       Pattern                   Atom = 0x6607
-       Picture                   Atom = 0x7b07
-       Ping                      Atom = 0xef04
-       Placeholder               Atom = 0x1310b
-       Plaintext                 Atom = 0x1b209
-       Playsinline               Atom = 0x1400b
-       Poster                    Atom = 0x2cf06
-       Pre                       Atom = 0x47003
-       Preload                   Atom = 0x48607
-       Progress                  Atom = 0x5b908
-       Prompt                    Atom = 0x53606
-       Public                    Atom = 0x58606
-       Q                         Atom = 0xcf01
-       Radiogroup                Atom = 0x30a
-       Rb                        Atom = 0x3a02
-       Readonly                  Atom = 0x35708
-       Referrerpolicy            Atom = 0x3d10e
-       Rel                       Atom = 0x48703
-       Required                  Atom = 0x24c08
-       Reversed                  Atom = 0x8008
-       Rows                      Atom = 0x9c04
-       Rowspan                   Atom = 0x9c07
-       Rp                        Atom = 0x23c02
-       Rt                        Atom = 0x19a02
-       Rtc                       Atom = 0x19a03
-       Ruby                      Atom = 0xfb04
-       S                         Atom = 0x2501
-       Samp                      Atom = 0x7804
-       Sandbox                   Atom = 0x12907
-       Scope                     Atom = 0x67505
-       Scoped                    Atom = 0x67506
-       Script                    Atom = 0x21806
-       Seamless                  Atom = 0x37108
-       Section                   Atom = 0x56807
-       Select                    Atom = 0x63c06
-       Selected                  Atom = 0x63c08
-       Shape                     Atom = 0x1e505
-       Size                      Atom = 0x5f504
-       Sizes                     Atom = 0x5f505
-       Slot                      Atom = 0x1ef04
-       Small                     Atom = 0x20605
-       Sortable                  Atom = 0x65108
-       Sorted                    Atom = 0x33706
-       Source                    Atom = 0x37806
-       Spacer                    Atom = 0x43706
-       Span                      Atom = 0x9f04
-       Spellcheck                Atom = 0x4740a
-       Src                       Atom = 0x5c003
-       Srcdoc                    Atom = 0x5c006
-       Srclang                   Atom = 0x5f907
-       Srcset                    Atom = 0x6f906
-       Start                     Atom = 0x3fa05
-       Step                      Atom = 0x58304
-       Strike                    Atom = 0xd206
-       Strong                    Atom = 0x6dd06
-       Style                     Atom = 0x6ff05
-       Sub                       Atom = 0x66d03
-       Summary                   Atom = 0x70407
-       Sup                       Atom = 0x70b03
-       Svg                       Atom = 0x70e03
-       System                    Atom = 0x71106
-       Tabindex                  Atom = 0x4be08
-       Table                     Atom = 0x59505
-       Target                    Atom = 0x2c406
-       Tbody                     Atom = 0x2705
-       Td                        Atom = 0x9202
-       Template                  Atom = 0x71408
-       Textarea                  Atom = 0x35208
-       Tfoot                     Atom = 0xf505
-       Th                        Atom = 0x15602
-       Thead                     Atom = 0x33005
-       Time                      Atom = 0x4204
-       Title                     Atom = 0x11005
-       Tr                        Atom = 0xcc02
-       Track                     Atom = 0x1ba05
-       Translate                 Atom = 0x1f209
-       Tt                        Atom = 0x6802
-       Type                      Atom = 0xd904
-       Typemustmatch             Atom = 0x2900d
-       U                         Atom = 0xb01
-       Ul                        Atom = 0xa702
-       Updateviacache            Atom = 0x460e
-       Usemap                    Atom = 0x59e06
-       Value                     Atom = 0x1505
-       Var                       Atom = 0x16d03
-       Video                     Atom = 0x2f105
-       Wbr                       Atom = 0x57c03
-       Width                     Atom = 0x64905
-       Workertype                Atom = 0x71c0a
-       Wrap                      Atom = 0x72604
-       Xmp                       Atom = 0x12f03
-)
-
-const hash0 = 0x81cdf10e
-
-const maxAtomLen = 25
-
-var table = [1 << 9]Atom{
-       0x1:   0xe60a,  // mediagroup
-       0x2:   0x2e404, // lang
-       0x4:   0x2c09,  // accesskey
-       0x5:   0x8b08,  // frameset
-       0x7:   0x63a08, // onselect
-       0x8:   0x71106, // system
-       0xa:   0x64905, // width
-       0xc:   0x2890b, // formenctype
-       0xd:   0x13702, // ol
-       0xe:   0x3970b, // oncuechange
-       0x10:  0x14b03, // bdo
-       0x11:  0x11505, // audio
-       0x12:  0x17a09, // draggable
-       0x14:  0x2f105, // video
-       0x15:  0x2b102, // mn
-       0x16:  0x38704, // menu
-       0x17:  0x2cf06, // poster
-       0x19:  0xf606,  // footer
-       0x1a:  0x2a806, // method
-       0x1b:  0x2b808, // datetime
-       0x1c:  0x19507, // onabort
-       0x1d:  0x460e,  // updateviacache
-       0x1e:  0xff05,  // async
-       0x1f:  0x49d06, // onload
-       0x21:  0x11908, // oncancel
-       0x22:  0x62908, // onseeked
-       0x23:  0x30205, // image
-       0x24:  0x5d812, // onrejectionhandled
-       0x26:  0x17404, // link
-       0x27:  0x51d06, // output
-       0x28:  0x33104, // head
-       0x29:  0x4ff0c, // onmouseleave
-       0x2a:  0x57f07, // onpaste
-       0x2b:  0x5a409, // onplaying
-       0x2c:  0x1c407, // colspan
-       0x2f:  0x1bf05, // color
-       0x30:  0x5f504, // size
-       0x31:  0x2e80a, // http-equiv
-       0x33:  0x601,   // i
-       0x34:  0x5590a, // onpagehide
-       0x35:  0x68c14, // onunhandledrejection
-       0x37:  0x42a07, // onerror
-       0x3a:  0x3b08,  // basefont
-       0x3f:  0x1303,  // nav
-       0x40:  0x17704, // kind
-       0x41:  0x35708, // readonly
-       0x42:  0x30806, // mglyph
-       0x44:  0xb202,  // li
-       0x46:  0x2d506, // hidden
-       0x47:  0x70e03, // svg
-       0x48:  0x58304, // step
-       0x49:  0x23f09, // integrity
-       0x4a:  0x58606, // public
-       0x4c:  0x1ab03, // col
-       0x4d:  0x1870a, // blockquote
-       0x4e:  0x34f02, // h5
-       0x50:  0x5b908, // progress
-       0x51:  0x5f505, // sizes
-       0x52:  0x34502, // h4
-       0x56:  0x33005, // thead
-       0x57:  0xd607,  // keytype
-       0x58:  0x5b70a, // onprogress
-       0x59:  0x44b09, // inputmode
-       0x5a:  0x3b109, // ondragend
-       0x5d:  0x3a205, // oncut
-       0x5e:  0x43706, // spacer
-       0x5f:  0x1ab08, // colgroup
-       0x62:  0x16502, // is
-       0x65:  0x3c02,  // as
-       0x66:  0x54809, // onoffline
-       0x67:  0x33706, // sorted
-       0x69:  0x48d10, // onlanguagechange
-       0x6c:  0x43d0c, // onhashchange
-       0x6d:  0x9604,  // name
-       0x6e:  0xf505,  // tfoot
-       0x6f:  0x56104, // desc
-       0x70:  0x33d03, // max
-       0x72:  0x1ea06, // coords
-       0x73:  0x30d02, // h3
-       0x74:  0x6e70e, // onbeforeunload
-       0x75:  0x9c04,  // rows
-       0x76:  0x63c06, // select
-       0x77:  0x9805,  // meter
-       0x78:  0x38b06, // itemid
-       0x79:  0x53c0c, // onmousewheel
-       0x7a:  0x5c006, // srcdoc
-       0x7d:  0x1ba05, // track
-       0x7f:  0x31f08, // itemtype
-       0x82:  0xa402,  // mo
-       0x83:  0x41b08, // onchange
-       0x84:  0x33107, // headers
-       0x85:  0x5cc0c, // onratechange
-       0x86:  0x60819, // onsecuritypolicyviolation
-       0x88:  0x4a508, // datalist
-       0x89:  0x4e80b, // onmousedown
-       0x8a:  0x1ef04, // slot
-       0x8b:  0x4b010, // onloadedmetadata
-       0x8c:  0x1a06,  // accept
-       0x8d:  0x26806, // object
-       0x91:  0x6b30e, // onvolumechange
-       0x92:  0x2107,  // charset
-       0x93:  0x27613, // onautocompleteerror
-       0x94:  0xc113,  // allowpaymentrequest
-       0x95:  0x2804,  // body
-       0x96:  0x10a07, // default
-       0x97:  0x63c08, // selected
-       0x98:  0x21e04, // face
-       0x99:  0x1e505, // shape
-       0x9b:  0x68408, // ontoggle
-       0x9e:  0x64b02, // dt
-       0x9f:  0xb604,  // mark
-       0xa1:  0xb01,   // u
-       0xa4:  0x6ab08, // onunload
-       0xa5:  0x5d04,  // loop
-       0xa6:  0x16408, // disabled
-       0xaa:  0x42307, // onended
-       0xab:  0xb00a,  // malignmark
-       0xad:  0x67b09, // onsuspend
-       0xae:  0x35105, // mtext
-       0xaf:  0x64f06, // onsort
-       0xb0:  0x19d08, // itemprop
-       0xb3:  0x67109, // itemscope
-       0xb4:  0x17305, // blink
-       0xb6:  0x3b106, // ondrag
-       0xb7:  0xa702,  // ul
-       0xb8:  0x26e04, // form
-       0xb9:  0x12907, // sandbox
-       0xba:  0x8b05,  // frame
-       0xbb:  0x1505,  // value
-       0xbc:  0x66209, // onstorage
-       0xbf:  0xaa07,  // acronym
-       0xc0:  0x19a02, // rt
-       0xc2:  0x202,   // br
-       0xc3:  0x22608, // fieldset
-       0xc4:  0x2900d, // typemustmatch
-       0xc5:  0xa208,  // nomodule
-       0xc6:  0x6c07,  // noembed
-       0xc7:  0x69e0d, // onbeforeprint
-       0xc8:  0x19106, // button
-       0xc9:  0x2f507, // onclick
-       0xca:  0x70407, // summary
-       0xcd:  0xfb04,  // ruby
-       0xce:  0x56405, // class
-       0xcf:  0x3f40b, // ondragstart
-       0xd0:  0x23107, // caption
-       0xd4:  0xdd0e,  // allowusermedia
-       0xd5:  0x4cf0b, // onloadstart
-       0xd9:  0x16b03, // div
-       0xda:  0x4a904, // list
-       0xdb:  0x32e04, // math
-       0xdc:  0x44b05, // input
-       0xdf:  0x3ea0a, // ondragover
-       0xe0:  0x2de02, // h2
-       0xe2:  0x1b209, // plaintext
-       0xe4:  0x4f30c, // onmouseenter
-       0xe7:  0x47907, // checked
-       0xe8:  0x47003, // pre
-       0xea:  0x35f08, // multiple
-       0xeb:  0xba03,  // bdi
-       0xec:  0x33d09, // maxlength
-       0xed:  0xcf01,  // q
-       0xee:  0x61f0a, // onauxclick
-       0xf0:  0x57c03, // wbr
-       0xf2:  0x3b04,  // base
-       0xf3:  0x6e306, // option
-       0xf5:  0x41310, // ondurationchange
-       0xf7:  0x8908,  // noframes
-       0xf9:  0x40508, // dropzone
-       0xfb:  0x67505, // scope
-       0xfc:  0x8008,  // reversed
-       0xfd:  0x3ba0b, // ondragenter
-       0xfe:  0x3fa05, // start
-       0xff:  0x12f03, // xmp
-       0x100: 0x5f907, // srclang
-       0x101: 0x30703, // img
-       0x104: 0x101,   // b
-       0x105: 0x25403, // for
-       0x106: 0x10705, // aside
-       0x107: 0x44907, // oninput
-       0x108: 0x35604, // area
-       0x109: 0x2a40a, // formmethod
-       0x10a: 0x72604, // wrap
-       0x10c: 0x23c02, // rp
-       0x10d: 0x46b0a, // onkeypress
-       0x10e: 0x6802,  // tt
-       0x110: 0x34702, // mi
-       0x111: 0x36705, // muted
-       0x112: 0xf303,  // alt
-       0x113: 0x5c504, // code
-       0x114: 0x6e02,  // em
-       0x115: 0x3c50a, // ondragexit
-       0x117: 0x9f04,  // span
-       0x119: 0x6d708, // manifest
-       0x11a: 0x38708, // menuitem
-       0x11b: 0x58b07, // content
-       0x11d: 0x6c109, // onwaiting
-       0x11f: 0x4c609, // onloadend
-       0x121: 0x37e0d, // oncontextmenu
-       0x123: 0x56d06, // onblur
-       0x124: 0x3fc07, // article
-       0x125: 0x9303,  // dir
-       0x126: 0xef04,  // ping
-       0x127: 0x24c08, // required
-       0x128: 0x45509, // oninvalid
-       0x129: 0xb105,  // align
-       0x12b: 0x58a04, // icon
-       0x12c: 0x64d02, // h6
-       0x12d: 0x1c404, // cols
-       0x12e: 0x22e0a, // figcaption
-       0x12f: 0x45e09, // onkeydown
-       0x130: 0x66b08, // onsubmit
-       0x131: 0x14d09, // oncanplay
-       0x132: 0x70b03, // sup
-       0x133: 0xc01,   // p
-       0x135: 0x40a09, // onemptied
-       0x136: 0x39106, // oncopy
-       0x137: 0x19c04, // cite
-       0x138: 0x3a70a, // ondblclick
-       0x13a: 0x50b0b, // onmousemove
-       0x13c: 0x66d03, // sub
-       0x13d: 0x48703, // rel
-       0x13e: 0x5f08,  // optgroup
-       0x142: 0x9c07,  // rowspan
-       0x143: 0x37806, // source
-       0x144: 0x21608, // noscript
-       0x145: 0x1a304, // open
-       0x146: 0x20403, // ins
-       0x147: 0x2540d, // foreignObject
-       0x148: 0x5ad0a, // onpopstate
-       0x14a: 0x28d07, // enctype
-       0x14b: 0x2760e, // onautocomplete
-       0x14c: 0x35208, // textarea
-       0x14e: 0x2780c, // autocomplete
-       0x14f: 0x15702, // hr
-       0x150: 0x1de08, // controls
-       0x151: 0x10902, // id
-       0x153: 0x2360c, // onafterprint
-       0x155: 0x2610d, // foreignobject
-       0x156: 0x32707, // marquee
-       0x157: 0x59a07, // onpause
-       0x158: 0x5e602, // dl
-       0x159: 0x5206,  // height
-       0x15a: 0x34703, // min
-       0x15b: 0x9307,  // dirname
-       0x15c: 0x1f209, // translate
-       0x15d: 0x5604,  // html
-       0x15e: 0x34709, // minlength
-       0x15f: 0x48607, // preload
-       0x160: 0x71408, // template
-       0x161: 0x3df0b, // ondragleave
-       0x162: 0x3a02,  // rb
-       0x164: 0x5c003, // src
-       0x165: 0x6dd06, // strong
-       0x167: 0x7804,  // samp
-       0x168: 0x6f307, // address
-       0x169: 0x55108, // ononline
-       0x16b: 0x1310b, // placeholder
-       0x16c: 0x2c406, // target
-       0x16d: 0x20605, // small
-       0x16e: 0x6ca07, // onwheel
-       0x16f: 0x1c90a, // annotation
-       0x170: 0x4740a, // spellcheck
-       0x171: 0x7207,  // details
-       0x172: 0x10306, // canvas
-       0x173: 0x12109, // autofocus
-       0x174: 0xc05,   // param
-       0x176: 0x46308, // download
-       0x177: 0x45203, // del
-       0x178: 0x36c07, // onclose
-       0x179: 0xb903,  // kbd
-       0x17a: 0x31906, // applet
-       0x17b: 0x2e004, // href
-       0x17c: 0x5f108, // onresize
-       0x17e: 0x49d0c, // onloadeddata
-       0x180: 0xcc02,  // tr
-       0x181: 0x2c00a, // formtarget
-       0x182: 0x11005, // title
-       0x183: 0x6ff05, // style
-       0x184: 0xd206,  // strike
-       0x185: 0x59e06, // usemap
-       0x186: 0x2fc06, // iframe
-       0x187: 0x1004,  // main
-       0x189: 0x7b07,  // picture
-       0x18c: 0x31605, // ismap
-       0x18e: 0x4a504, // data
-       0x18f: 0x5905,  // label
-       0x191: 0x3d10e, // referrerpolicy
-       0x192: 0x15602, // th
-       0x194: 0x53606, // prompt
-       0x195: 0x56807, // section
-       0x197: 0x6d107, // optimum
-       0x198: 0x2db04, // high
-       0x199: 0x15c02, // h1
-       0x19a: 0x65909, // onstalled
-       0x19b: 0x16d03, // var
-       0x19c: 0x4204,  // time
-       0x19e: 0x67402, // ms
-       0x19f: 0x33106, // header
-       0x1a0: 0x4da09, // onmessage
-       0x1a1: 0x1a605, // nonce
-       0x1a2: 0x26e0a, // formaction
-       0x1a3: 0x22006, // center
-       0x1a4: 0x3704,  // nobr
-       0x1a5: 0x59505, // table
-       0x1a6: 0x4a907, // listing
-       0x1a7: 0x18106, // legend
-       0x1a9: 0x29b09, // challenge
-       0x1aa: 0x24806, // figure
-       0x1ab: 0xe605,  // media
-       0x1ae: 0xd904,  // type
-       0x1af: 0x3f04,  // font
-       0x1b0: 0x4da0e, // onmessageerror
-       0x1b1: 0x37108, // seamless
-       0x1b2: 0x8703,  // dfn
-       0x1b3: 0x5c705, // defer
-       0x1b4: 0xc303,  // low
-       0x1b5: 0x19a03, // rtc
-       0x1b6: 0x5230b, // onmouseover
-       0x1b7: 0x2b20a, // novalidate
-       0x1b8: 0x71c0a, // workertype
-       0x1ba: 0x3cd07, // itemref
-       0x1bd: 0x1,     // a
-       0x1be: 0x31803, // map
-       0x1bf: 0x400c,  // ontimeupdate
-       0x1c0: 0x15e07, // bgsound
-       0x1c1: 0x3206,  // keygen
-       0x1c2: 0x2705,  // tbody
-       0x1c5: 0x64406, // onshow
-       0x1c7: 0x2501,  // s
-       0x1c8: 0x6607,  // pattern
-       0x1cc: 0x14d10, // oncanplaythrough
-       0x1ce: 0x2d702, // dd
-       0x1cf: 0x6f906, // srcset
-       0x1d0: 0x17003, // big
-       0x1d2: 0x65108, // sortable
-       0x1d3: 0x48007, // onkeyup
-       0x1d5: 0x5a406, // onplay
-       0x1d7: 0x4b804, // meta
-       0x1d8: 0x40306, // ondrop
-       0x1da: 0x60008, // onscroll
-       0x1db: 0x1fb0b, // crossorigin
-       0x1dc: 0x5730a, // onpageshow
-       0x1dd: 0x4,     // abbr
-       0x1de: 0x9202,  // td
-       0x1df: 0x58b0f, // contenteditable
-       0x1e0: 0x27206, // action
-       0x1e1: 0x1400b, // playsinline
-       0x1e2: 0x43107, // onfocus
-       0x1e3: 0x2e008, // hreflang
-       0x1e5: 0x5160a, // onmouseout
-       0x1e6: 0x5ea07, // onreset
-       0x1e7: 0x13c08, // autoplay
-       0x1e8: 0x63109, // onseeking
-       0x1ea: 0x67506, // scoped
-       0x1ec: 0x30a,   // radiogroup
-       0x1ee: 0x3800b, // contextmenu
-       0x1ef: 0x52e09, // onmouseup
-       0x1f1: 0x2ca06, // hgroup
-       0x1f2: 0x2080f, // allowfullscreen
-       0x1f3: 0x4be08, // tabindex
-       0x1f6: 0x30f07, // isindex
-       0x1f7: 0x1a0e,  // accept-charset
-       0x1f8: 0x2ae0e, // formnovalidate
-       0x1fb: 0x1c90e, // annotation-xml
-       0x1fc: 0x6e05,  // embed
-       0x1fd: 0x21806, // script
-       0x1fe: 0xbb06,  // dialog
-       0x1ff: 0x1d707, // command
-}
-
-const atomText = "abbradiogrouparamainavalueaccept-charsetbodyaccesskeygenobrb" +
-       "asefontimeupdateviacacheightmlabelooptgroupatternoembedetail" +
-       "sampictureversedfnoframesetdirnameterowspanomoduleacronymali" +
-       "gnmarkbdialogallowpaymentrequestrikeytypeallowusermediagroup" +
-       "ingaltfooterubyasyncanvasidefaultitleaudioncancelautofocusan" +
-       "dboxmplaceholderautoplaysinlinebdoncanplaythrough1bgsoundisa" +
-       "bledivarbigblinkindraggablegendblockquotebuttonabortcitempro" +
-       "penoncecolgrouplaintextrackcolorcolspannotation-xmlcommandco" +
-       "ntrolshapecoordslotranslatecrossoriginsmallowfullscreenoscri" +
-       "ptfacenterfieldsetfigcaptionafterprintegrityfigurequiredfore" +
-       "ignObjectforeignobjectformactionautocompleteerrorformenctype" +
-       "mustmatchallengeformmethodformnovalidatetimeformtargethgroup" +
-       "osterhiddenhigh2hreflanghttp-equivideonclickiframeimageimgly" +
-       "ph3isindexismappletitemtypemarqueematheadersortedmaxlength4m" +
-       "inlength5mtextareadonlymultiplemutedoncloseamlessourceoncont" +
-       "extmenuitemidoncopyoncuechangeoncutondblclickondragendondrag" +
-       "enterondragexitemreferrerpolicyondragleaveondragoverondragst" +
-       "articleondropzonemptiedondurationchangeonendedonerroronfocus" +
-       "paceronhashchangeoninputmodeloninvalidonkeydownloadonkeypres" +
-       "spellcheckedonkeyupreloadonlanguagechangeonloadeddatalisting" +
-       "onloadedmetadatabindexonloadendonloadstartonmessageerroronmo" +
-       "usedownonmouseenteronmouseleaveonmousemoveonmouseoutputonmou" +
-       "seoveronmouseupromptonmousewheelonofflineononlineonpagehides" +
-       "classectionbluronpageshowbronpastepublicontenteditableonpaus" +
-       "emaponplayingonpopstateonprogressrcdocodeferonratechangeonre" +
-       "jectionhandledonresetonresizesrclangonscrollonsecuritypolicy" +
-       "violationauxclickonseekedonseekingonselectedonshowidth6onsor" +
-       "tableonstalledonstorageonsubmitemscopedonsuspendontoggleonun" +
-       "handledrejectionbeforeprintonunloadonvolumechangeonwaitingon" +
-       "wheeloptimumanifestrongoptionbeforeunloaddressrcsetstylesumm" +
-       "arysupsvgsystemplateworkertypewrap"
diff --git a/html/atom/table_test.go b/html/atom/table_test.go
deleted file mode 100644 (file)
index 8a30762..0000000
+++ /dev/null
@@ -1,376 +0,0 @@
-// Code generated by go generate gen.go; DO NOT EDIT.
-
-//go:generate go run gen.go -test
-
-package atom
-
-var testAtomList = []string{
-       "a",
-       "abbr",
-       "accept",
-       "accept-charset",
-       "accesskey",
-       "acronym",
-       "action",
-       "address",
-       "align",
-       "allowfullscreen",
-       "allowpaymentrequest",
-       "allowusermedia",
-       "alt",
-       "annotation",
-       "annotation-xml",
-       "applet",
-       "area",
-       "article",
-       "as",
-       "aside",
-       "async",
-       "audio",
-       "autocomplete",
-       "autofocus",
-       "autoplay",
-       "b",
-       "base",
-       "basefont",
-       "bdi",
-       "bdo",
-       "bgsound",
-       "big",
-       "blink",
-       "blockquote",
-       "body",
-       "br",
-       "button",
-       "canvas",
-       "caption",
-       "center",
-       "challenge",
-       "charset",
-       "checked",
-       "cite",
-       "class",
-       "code",
-       "col",
-       "colgroup",
-       "color",
-       "cols",
-       "colspan",
-       "command",
-       "content",
-       "contenteditable",
-       "contextmenu",
-       "controls",
-       "coords",
-       "crossorigin",
-       "data",
-       "datalist",
-       "datetime",
-       "dd",
-       "default",
-       "defer",
-       "del",
-       "desc",
-       "details",
-       "dfn",
-       "dialog",
-       "dir",
-       "dirname",
-       "disabled",
-       "div",
-       "dl",
-       "download",
-       "draggable",
-       "dropzone",
-       "dt",
-       "em",
-       "embed",
-       "enctype",
-       "face",
-       "fieldset",
-       "figcaption",
-       "figure",
-       "font",
-       "footer",
-       "for",
-       "foreignObject",
-       "foreignobject",
-       "form",
-       "formaction",
-       "formenctype",
-       "formmethod",
-       "formnovalidate",
-       "formtarget",
-       "frame",
-       "frameset",
-       "h1",
-       "h2",
-       "h3",
-       "h4",
-       "h5",
-       "h6",
-       "head",
-       "header",
-       "headers",
-       "height",
-       "hgroup",
-       "hidden",
-       "high",
-       "hr",
-       "href",
-       "hreflang",
-       "html",
-       "http-equiv",
-       "i",
-       "icon",
-       "id",
-       "iframe",
-       "image",
-       "img",
-       "input",
-       "inputmode",
-       "ins",
-       "integrity",
-       "is",
-       "isindex",
-       "ismap",
-       "itemid",
-       "itemprop",
-       "itemref",
-       "itemscope",
-       "itemtype",
-       "kbd",
-       "keygen",
-       "keytype",
-       "kind",
-       "label",
-       "lang",
-       "legend",
-       "li",
-       "link",
-       "list",
-       "listing",
-       "loop",
-       "low",
-       "main",
-       "malignmark",
-       "manifest",
-       "map",
-       "mark",
-       "marquee",
-       "math",
-       "max",
-       "maxlength",
-       "media",
-       "mediagroup",
-       "menu",
-       "menuitem",
-       "meta",
-       "meter",
-       "method",
-       "mglyph",
-       "mi",
-       "min",
-       "minlength",
-       "mn",
-       "mo",
-       "ms",
-       "mtext",
-       "multiple",
-       "muted",
-       "name",
-       "nav",
-       "nobr",
-       "noembed",
-       "noframes",
-       "nomodule",
-       "nonce",
-       "noscript",
-       "novalidate",
-       "object",
-       "ol",
-       "onabort",
-       "onafterprint",
-       "onautocomplete",
-       "onautocompleteerror",
-       "onauxclick",
-       "onbeforeprint",
-       "onbeforeunload",
-       "onblur",
-       "oncancel",
-       "oncanplay",
-       "oncanplaythrough",
-       "onchange",
-       "onclick",
-       "onclose",
-       "oncontextmenu",
-       "oncopy",
-       "oncuechange",
-       "oncut",
-       "ondblclick",
-       "ondrag",
-       "ondragend",
-       "ondragenter",
-       "ondragexit",
-       "ondragleave",
-       "ondragover",
-       "ondragstart",
-       "ondrop",
-       "ondurationchange",
-       "onemptied",
-       "onended",
-       "onerror",
-       "onfocus",
-       "onhashchange",
-       "oninput",
-       "oninvalid",
-       "onkeydown",
-       "onkeypress",
-       "onkeyup",
-       "onlanguagechange",
-       "onload",
-       "onloadeddata",
-       "onloadedmetadata",
-       "onloadend",
-       "onloadstart",
-       "onmessage",
-       "onmessageerror",
-       "onmousedown",
-       "onmouseenter",
-       "onmouseleave",
-       "onmousemove",
-       "onmouseout",
-       "onmouseover",
-       "onmouseup",
-       "onmousewheel",
-       "onoffline",
-       "ononline",
-       "onpagehide",
-       "onpageshow",
-       "onpaste",
-       "onpause",
-       "onplay",
-       "onplaying",
-       "onpopstate",
-       "onprogress",
-       "onratechange",
-       "onrejectionhandled",
-       "onreset",
-       "onresize",
-       "onscroll",
-       "onsecuritypolicyviolation",
-       "onseeked",
-       "onseeking",
-       "onselect",
-       "onshow",
-       "onsort",
-       "onstalled",
-       "onstorage",
-       "onsubmit",
-       "onsuspend",
-       "ontimeupdate",
-       "ontoggle",
-       "onunhandledrejection",
-       "onunload",
-       "onvolumechange",
-       "onwaiting",
-       "onwheel",
-       "open",
-       "optgroup",
-       "optimum",
-       "option",
-       "output",
-       "p",
-       "param",
-       "pattern",
-       "picture",
-       "ping",
-       "placeholder",
-       "plaintext",
-       "playsinline",
-       "poster",
-       "pre",
-       "preload",
-       "progress",
-       "prompt",
-       "public",
-       "q",
-       "radiogroup",
-       "rb",
-       "readonly",
-       "referrerpolicy",
-       "rel",
-       "required",
-       "reversed",
-       "rows",
-       "rowspan",
-       "rp",
-       "rt",
-       "rtc",
-       "ruby",
-       "s",
-       "samp",
-       "sandbox",
-       "scope",
-       "scoped",
-       "script",
-       "seamless",
-       "section",
-       "select",
-       "selected",
-       "shape",
-       "size",
-       "sizes",
-       "slot",
-       "small",
-       "sortable",
-       "sorted",
-       "source",
-       "spacer",
-       "span",
-       "spellcheck",
-       "src",
-       "srcdoc",
-       "srclang",
-       "srcset",
-       "start",
-       "step",
-       "strike",
-       "strong",
-       "style",
-       "sub",
-       "summary",
-       "sup",
-       "svg",
-       "system",
-       "tabindex",
-       "table",
-       "target",
-       "tbody",
-       "td",
-       "template",
-       "textarea",
-       "tfoot",
-       "th",
-       "thead",
-       "time",
-       "title",
-       "tr",
-       "track",
-       "translate",
-       "tt",
-       "type",
-       "typemustmatch",
-       "u",
-       "ul",
-       "updateviacache",
-       "usemap",
-       "value",
-       "var",
-       "video",
-       "wbr",
-       "width",
-       "workertype",
-       "wrap",
-       "xmp",
-}
diff --git a/html/charset/charset.go b/html/charset/charset.go
deleted file mode 100644 (file)
index 00062a7..0000000
+++ /dev/null
@@ -1,257 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package charset provides common text encodings for HTML documents.
-//
-// The mapping from encoding labels to encodings is defined at
-// https://encoding.spec.whatwg.org/.
-package charset // import "git.earlybird.gay/today-engine/html/charset"
-
-import (
-       "bytes"
-       "fmt"
-       "io"
-       "mime"
-       "strings"
-       "unicode/utf8"
-
-       "git.earlybird.gay/today-engine/html"
-       "golang.org/x/text/encoding"
-       "golang.org/x/text/encoding/charmap"
-       "golang.org/x/text/encoding/htmlindex"
-       "golang.org/x/text/transform"
-)
-
-// Lookup returns the encoding with the specified label, and its canonical
-// name. It returns nil and the empty string if label is not one of the
-// standard encodings for HTML. Matching is case-insensitive and ignores
-// leading and trailing whitespace. Encoders will use HTML escape sequences for
-// runes that are not supported by the character set.
-func Lookup(label string) (e encoding.Encoding, name string) {
-       e, err := htmlindex.Get(label)
-       if err != nil {
-               return nil, ""
-       }
-       name, _ = htmlindex.Name(e)
-       return &htmlEncoding{e}, name
-}
-
-type htmlEncoding struct{ encoding.Encoding }
-
-func (h *htmlEncoding) NewEncoder() *encoding.Encoder {
-       // HTML requires a non-terminating legacy encoder. We use HTML escapes to
-       // substitute unsupported code points.
-       return encoding.HTMLEscapeUnsupported(h.Encoding.NewEncoder())
-}
-
-// DetermineEncoding determines the encoding of an HTML document by examining
-// up to the first 1024 bytes of content and the declared Content-Type.
-//
-// See http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#determining-the-character-encoding
-func DetermineEncoding(content []byte, contentType string) (e encoding.Encoding, name string, certain bool) {
-       if len(content) > 1024 {
-               content = content[:1024]
-       }
-
-       for _, b := range boms {
-               if bytes.HasPrefix(content, b.bom) {
-                       e, name = Lookup(b.enc)
-                       return e, name, true
-               }
-       }
-
-       if _, params, err := mime.ParseMediaType(contentType); err == nil {
-               if cs, ok := params["charset"]; ok {
-                       if e, name = Lookup(cs); e != nil {
-                               return e, name, true
-                       }
-               }
-       }
-
-       if len(content) > 0 {
-               e, name = prescan(content)
-               if e != nil {
-                       return e, name, false
-               }
-       }
-
-       // Try to detect UTF-8.
-       // First eliminate any partial rune at the end.
-       for i := len(content) - 1; i >= 0 && i > len(content)-4; i-- {
-               b := content[i]
-               if b < 0x80 {
-                       break
-               }
-               if utf8.RuneStart(b) {
-                       content = content[:i]
-                       break
-               }
-       }
-       hasHighBit := false
-       for _, c := range content {
-               if c >= 0x80 {
-                       hasHighBit = true
-                       break
-               }
-       }
-       if hasHighBit && utf8.Valid(content) {
-               return encoding.Nop, "utf-8", false
-       }
-
-       // TODO: change default depending on user's locale?
-       return charmap.Windows1252, "windows-1252", false
-}
-
-// NewReader returns an io.Reader that converts the content of r to UTF-8.
-// It calls DetermineEncoding to find out what r's encoding is.
-func NewReader(r io.Reader, contentType string) (io.Reader, error) {
-       preview := make([]byte, 1024)
-       n, err := io.ReadFull(r, preview)
-       switch {
-       case err == io.ErrUnexpectedEOF:
-               preview = preview[:n]
-               r = bytes.NewReader(preview)
-       case err != nil:
-               return nil, err
-       default:
-               r = io.MultiReader(bytes.NewReader(preview), r)
-       }
-
-       if e, _, _ := DetermineEncoding(preview, contentType); e != encoding.Nop {
-               r = transform.NewReader(r, e.NewDecoder())
-       }
-       return r, nil
-}
-
-// NewReaderLabel returns a reader that converts from the specified charset to
-// UTF-8. It uses Lookup to find the encoding that corresponds to label, and
-// returns an error if Lookup returns nil. It is suitable for use as
-// encoding/xml.Decoder's CharsetReader function.
-func NewReaderLabel(label string, input io.Reader) (io.Reader, error) {
-       e, _ := Lookup(label)
-       if e == nil {
-               return nil, fmt.Errorf("unsupported charset: %q", label)
-       }
-       return transform.NewReader(input, e.NewDecoder()), nil
-}
-
-func prescan(content []byte) (e encoding.Encoding, name string) {
-       z := html.NewTokenizer(bytes.NewReader(content))
-       for {
-               switch z.Next() {
-               case html.ErrorToken:
-                       return nil, ""
-
-               case html.StartTagToken, html.SelfClosingTagToken:
-                       tagName, hasAttr := z.TagName()
-                       if !bytes.Equal(tagName, []byte("meta")) {
-                               continue
-                       }
-                       attrList := make(map[string]bool)
-                       gotPragma := false
-
-                       const (
-                               dontKnow = iota
-                               doNeedPragma
-                               doNotNeedPragma
-                       )
-                       needPragma := dontKnow
-
-                       name = ""
-                       e = nil
-                       for hasAttr {
-                               var key, val []byte
-                               key, val, hasAttr = z.TagAttr()
-                               ks := string(key)
-                               if attrList[ks] {
-                                       continue
-                               }
-                               attrList[ks] = true
-                               for i, c := range val {
-                                       if 'A' <= c && c <= 'Z' {
-                                               val[i] = c + 0x20
-                                       }
-                               }
-
-                               switch ks {
-                               case "http-equiv":
-                                       if bytes.Equal(val, []byte("content-type")) {
-                                               gotPragma = true
-                                       }
-
-                               case "content":
-                                       if e == nil {
-                                               name = fromMetaElement(string(val))
-                                               if name != "" {
-                                                       e, name = Lookup(name)
-                                                       if e != nil {
-                                                               needPragma = doNeedPragma
-                                                       }
-                                               }
-                                       }
-
-                               case "charset":
-                                       e, name = Lookup(string(val))
-                                       needPragma = doNotNeedPragma
-                               }
-                       }
-
-                       if needPragma == dontKnow || needPragma == doNeedPragma && !gotPragma {
-                               continue
-                       }
-
-                       if strings.HasPrefix(name, "utf-16") {
-                               name = "utf-8"
-                               e = encoding.Nop
-                       }
-
-                       if e != nil {
-                               return e, name
-                       }
-               }
-       }
-}
-
-func fromMetaElement(s string) string {
-       for s != "" {
-               csLoc := strings.Index(s, "charset")
-               if csLoc == -1 {
-                       return ""
-               }
-               s = s[csLoc+len("charset"):]
-               s = strings.TrimLeft(s, " \t\n\f\r")
-               if !strings.HasPrefix(s, "=") {
-                       continue
-               }
-               s = s[1:]
-               s = strings.TrimLeft(s, " \t\n\f\r")
-               if s == "" {
-                       return ""
-               }
-               if q := s[0]; q == '"' || q == '\'' {
-                       s = s[1:]
-                       closeQuote := strings.IndexRune(s, rune(q))
-                       if closeQuote == -1 {
-                               return ""
-                       }
-                       return s[:closeQuote]
-               }
-
-               end := strings.IndexAny(s, "; \t\n\f\r")
-               if end == -1 {
-                       end = len(s)
-               }
-               return s[:end]
-       }
-       return ""
-}
-
-var boms = []struct {
-       bom []byte
-       enc string
-}{
-       {[]byte{0xfe, 0xff}, "utf-16be"},
-       {[]byte{0xff, 0xfe}, "utf-16le"},
-       {[]byte{0xef, 0xbb, 0xbf}, "utf-8"},
-}
diff --git a/html/charset/charset_test.go b/html/charset/charset_test.go
deleted file mode 100644 (file)
index c2f6244..0000000
+++ /dev/null
@@ -1,237 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package charset
-
-import (
-       "bytes"
-       "encoding/xml"
-       "io"
-       "os"
-       "runtime"
-       "strings"
-       "testing"
-
-       "golang.org/x/text/transform"
-)
-
-func transformString(t transform.Transformer, s string) (string, error) {
-       r := transform.NewReader(strings.NewReader(s), t)
-       b, err := io.ReadAll(r)
-       return string(b), err
-}
-
-type testCase struct {
-       utf8, other, otherEncoding string
-}
-
-// testCases for encoding and decoding.
-var testCases = []testCase{
-       {"Résumé", "Résumé", "utf8"},
-       {"Résumé", "R\xe9sum\xe9", "latin1"},
-       {"これは漢字です。", "S0\x8c0o0\"oW[g0Y0\x020", "UTF-16LE"},
-       {"これは漢字です。", "0S0\x8c0oo\"[W0g0Y0\x02", "UTF-16BE"},
-       {"Hello, world", "Hello, world", "ASCII"},
-       {"Gdańsk", "Gda\xf1sk", "ISO-8859-2"},
-       {"Ââ Čč Đđ Ŋŋ Õõ Šš Žž Åå Ää", "\xc2\xe2 \xc8\xe8 \xa9\xb9 \xaf\xbf \xd5\xf5 \xaa\xba \xac\xbc \xc5\xe5 \xc4\xe4", "ISO-8859-10"},
-       {"สำหรับ", "\xca\xd3\xcb\xc3\u047a", "ISO-8859-11"},
-       {"latviešu", "latvie\xf0u", "ISO-8859-13"},
-       {"Seònaid", "Se\xf2naid", "ISO-8859-14"},
-       {"€1 is cheap", "\xa41 is cheap", "ISO-8859-15"},
-       {"românește", "rom\xe2ne\xbate", "ISO-8859-16"},
-       {"nutraĵo", "nutra\xbco", "ISO-8859-3"},
-       {"Kalâdlit", "Kal\xe2dlit", "ISO-8859-4"},
-       {"русский", "\xe0\xe3\xe1\xe1\xda\xd8\xd9", "ISO-8859-5"},
-       {"ελληνικά", "\xe5\xeb\xeb\xe7\xed\xe9\xea\xdc", "ISO-8859-7"},
-       {"Kağan", "Ka\xf0an", "ISO-8859-9"},
-       {"Résumé", "R\x8esum\x8e", "macintosh"},
-       {"Gdańsk", "Gda\xf1sk", "windows-1250"},
-       {"русский", "\xf0\xf3\xf1\xf1\xea\xe8\xe9", "windows-1251"},
-       {"Résumé", "R\xe9sum\xe9", "windows-1252"},
-       {"ελληνικά", "\xe5\xeb\xeb\xe7\xed\xe9\xea\xdc", "windows-1253"},
-       {"Kağan", "Ka\xf0an", "windows-1254"},
-       {"עִבְרִית", "\xf2\xc4\xe1\xc0\xf8\xc4\xe9\xfa", "windows-1255"},
-       {"العربية", "\xc7\xe1\xda\xd1\xc8\xed\xc9", "windows-1256"},
-       {"latviešu", "latvie\xf0u", "windows-1257"},
-       {"Việt", "Vi\xea\xf2t", "windows-1258"},
-       {"สำหรับ", "\xca\xd3\xcb\xc3\u047a", "windows-874"},
-       {"русский", "\xd2\xd5\xd3\xd3\xcb\xc9\xca", "KOI8-R"},
-       {"українська", "\xd5\xcb\xd2\xc1\xa7\xce\xd3\xd8\xcb\xc1", "KOI8-U"},
-       {"Hello 常用國字標準字體表", "Hello \xb1`\xa5\u03b0\xea\xa6r\xbc\u0437\u01e6r\xc5\xe9\xaa\xed", "big5"},
-       {"Hello 常用國字標準字體表", "Hello \xb3\xa3\xd3\xc3\x87\xf8\xd7\xd6\x98\xcb\x9c\xca\xd7\xd6\xf3\x77\xb1\xed", "gbk"},
-       {"Hello 常用國字標準字體表", "Hello \xb3\xa3\xd3\xc3\x87\xf8\xd7\xd6\x98\xcb\x9c\xca\xd7\xd6\xf3\x77\xb1\xed", "gb18030"},
-       {"עִבְרִית", "\x81\x30\xfb\x30\x81\x30\xf6\x34\x81\x30\xf9\x33\x81\x30\xf6\x30\x81\x30\xfb\x36\x81\x30\xf6\x34\x81\x30\xfa\x31\x81\x30\xfb\x38", "gb18030"},
-       {"㧯", "\x82\x31\x89\x38", "gb18030"},
-       {"これは漢字です。", "\x82\xb1\x82\xea\x82\xcd\x8a\xbf\x8e\x9a\x82\xc5\x82\xb7\x81B", "SJIS"},
-       {"Hello, 世界!", "Hello, \x90\xa2\x8aE!", "SJIS"},
-       {"イウエオカ", "\xb2\xb3\xb4\xb5\xb6", "SJIS"},
-       {"これは漢字です。", "\xa4\xb3\xa4\xec\xa4\u03f4\xc1\xbb\xfa\xa4\u01e4\xb9\xa1\xa3", "EUC-JP"},
-       {"Hello, 世界!", "Hello, \x1b$B@$3&\x1b(B!", "ISO-2022-JP"},
-       {"다음과 같은 조건을 따라야 합니다: 저작자표시", "\xb4\xd9\xc0\xbd\xb0\xfa \xb0\xb0\xc0\xba \xc1\xb6\xb0\xc7\xc0\xbb \xb5\xfb\xb6\xf3\xbe\xdf \xc7մϴ\xd9: \xc0\xfa\xc0\xdb\xc0\xdaǥ\xbd\xc3", "EUC-KR"},
-}
-
-func TestDecode(t *testing.T) {
-       testCases := append(testCases, []testCase{
-               // Replace multi-byte maximum subpart of ill-formed subsequence with
-               // single replacement character (WhatWG requirement).
-               {"Rés\ufffdumé", "Rés\xe1\x80umé", "utf8"},
-       }...)
-       for _, tc := range testCases {
-               e, _ := Lookup(tc.otherEncoding)
-               if e == nil {
-                       t.Errorf("%s: not found", tc.otherEncoding)
-                       continue
-               }
-               s, err := transformString(e.NewDecoder(), tc.other)
-               if err != nil {
-                       t.Errorf("%s: decode %q: %v", tc.otherEncoding, tc.other, err)
-                       continue
-               }
-               if s != tc.utf8 {
-                       t.Errorf("%s: got %q, want %q", tc.otherEncoding, s, tc.utf8)
-               }
-       }
-}
-
-func TestEncode(t *testing.T) {
-       testCases := append(testCases, []testCase{
-               // Use Go-style replacement.
-               {"Rés\xe1\x80umé", "Rés\ufffd\ufffdumé", "utf8"},
-               // U+0144 LATIN SMALL LETTER N WITH ACUTE not supported by encoding.
-               {"Gdańsk", "Gda&#324;sk", "ISO-8859-11"},
-               {"\ufffd", "&#65533;", "ISO-8859-11"},
-               {"a\xe1\x80b", "a&#65533;&#65533;b", "ISO-8859-11"},
-       }...)
-       for _, tc := range testCases {
-               e, _ := Lookup(tc.otherEncoding)
-               if e == nil {
-                       t.Errorf("%s: not found", tc.otherEncoding)
-                       continue
-               }
-               s, err := transformString(e.NewEncoder(), tc.utf8)
-               if err != nil {
-                       t.Errorf("%s: encode %q: %s", tc.otherEncoding, tc.utf8, err)
-                       continue
-               }
-               if s != tc.other {
-                       t.Errorf("%s: got %q, want %q", tc.otherEncoding, s, tc.other)
-               }
-       }
-}
-
-var sniffTestCases = []struct {
-       filename, declared, want string
-}{
-       {"HTTP-charset.html", "text/html; charset=iso-8859-15", "iso-8859-15"},
-       {"UTF-16LE-BOM.html", "", "utf-16le"},
-       {"UTF-16BE-BOM.html", "", "utf-16be"},
-       {"meta-content-attribute.html", "text/html", "iso-8859-15"},
-       {"meta-charset-attribute.html", "text/html", "iso-8859-15"},
-       {"No-encoding-declaration.html", "text/html", "utf-8"},
-       {"HTTP-vs-UTF-8-BOM.html", "text/html; charset=iso-8859-15", "utf-8"},
-       {"HTTP-vs-meta-content.html", "text/html; charset=iso-8859-15", "iso-8859-15"},
-       {"HTTP-vs-meta-charset.html", "text/html; charset=iso-8859-15", "iso-8859-15"},
-       {"UTF-8-BOM-vs-meta-content.html", "text/html", "utf-8"},
-       {"UTF-8-BOM-vs-meta-charset.html", "text/html", "utf-8"},
-}
-
-func TestSniff(t *testing.T) {
-       switch runtime.GOOS {
-       case "nacl": // platforms that don't permit direct file system access
-               t.Skipf("not supported on %q", runtime.GOOS)
-       }
-
-       for _, tc := range sniffTestCases {
-               content, err := os.ReadFile("testdata/" + tc.filename)
-               if err != nil {
-                       t.Errorf("%s: error reading file: %v", tc.filename, err)
-                       continue
-               }
-
-               _, name, _ := DetermineEncoding(content, tc.declared)
-               if name != tc.want {
-                       t.Errorf("%s: got %q, want %q", tc.filename, name, tc.want)
-                       continue
-               }
-       }
-}
-
-func TestReader(t *testing.T) {
-       switch runtime.GOOS {
-       case "nacl": // platforms that don't permit direct file system access
-               t.Skipf("not supported on %q", runtime.GOOS)
-       }
-
-       for _, tc := range sniffTestCases {
-               content, err := os.ReadFile("testdata/" + tc.filename)
-               if err != nil {
-                       t.Errorf("%s: error reading file: %v", tc.filename, err)
-                       continue
-               }
-
-               r, err := NewReader(bytes.NewReader(content), tc.declared)
-               if err != nil {
-                       t.Errorf("%s: error creating reader: %v", tc.filename, err)
-                       continue
-               }
-
-               got, err := io.ReadAll(r)
-               if err != nil {
-                       t.Errorf("%s: error reading from charset.NewReader: %v", tc.filename, err)
-                       continue
-               }
-
-               e, _ := Lookup(tc.want)
-               want, err := io.ReadAll(transform.NewReader(bytes.NewReader(content), e.NewDecoder()))
-               if err != nil {
-                       t.Errorf("%s: error decoding with hard-coded charset name: %v", tc.filename, err)
-                       continue
-               }
-
-               if !bytes.Equal(got, want) {
-                       t.Errorf("%s: got %q, want %q", tc.filename, got, want)
-                       continue
-               }
-       }
-}
-
-var metaTestCases = []struct {
-       meta, want string
-}{
-       {"", ""},
-       {"text/html", ""},
-       {"text/html; charset utf-8", ""},
-       {"text/html; charset=latin-2", "latin-2"},
-       {"text/html; charset; charset = utf-8", "utf-8"},
-       {`charset="big5"`, "big5"},
-       {"charset='shift_jis'", "shift_jis"},
-}
-
-func TestFromMeta(t *testing.T) {
-       for _, tc := range metaTestCases {
-               got := fromMetaElement(tc.meta)
-               if got != tc.want {
-                       t.Errorf("%q: got %q, want %q", tc.meta, got, tc.want)
-               }
-       }
-}
-
-func TestXML(t *testing.T) {
-       const s = "<?xml version=\"1.0\" encoding=\"windows-1252\"?><a><Word>r\xe9sum\xe9</Word></a>"
-
-       d := xml.NewDecoder(strings.NewReader(s))
-       d.CharsetReader = NewReaderLabel
-
-       var a struct {
-               Word string
-       }
-       if err := d.Decode(&a); err != nil {
-               t.Fatalf("Decode: %v", err)
-       }
-
-       want := "résumé"
-       if a.Word != want {
-               t.Errorf("got %q, want %q", a.Word, want)
-       }
-}
diff --git a/html/charset/testdata/HTTP-charset.html b/html/charset/testdata/HTTP-charset.html
deleted file mode 100644 (file)
index 9915fa0..0000000
+++ /dev/null
@@ -1,48 +0,0 @@
-<!DOCTYPE html>
-<html  lang="en" >
-<head>
-  <title>HTTP charset</title>
-<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
-<link rel='help' href='http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream'>
-<link rel="stylesheet" type="text/css" href="./generatedtests.css">
-<script src="http://w3c-test.org/resources/testharness.js"></script>
-<script src="http://w3c-test.org/resources/testharnessreport.js"></script>
-<meta name='flags' content='http'>
-<meta name="assert" content="The character encoding of a page can be set using the HTTP header charset declaration.">
-<style type='text/css'>
-.test div { width: 50px; }</style>
-<link rel="stylesheet" type="text/css" href="the-input-byte-stream/support/encodingtests-15.css">
-</head>
-<body>
-<p class='title'>HTTP charset</p>
-
-
-<div id='log'></div>
-
-
-<div class='test'><div id='box' class='ýäè'>&#xA0;</div></div>
-
-
-
-
-
-<div class='description'>
-<p class="assertion" title="Assertion">The character encoding of a page can be set using the HTTP header charset declaration.</p>
-<div class="notes"><p><p>The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.&#x00C3;&#x0153;&#x00C3;&#x20AC;&#x00C3;&#x0161;</code>. This matches the sequence of bytes above when they are interpreted as ISO 8859-15. If the class name matches the selector then the test will pass.</p><p>The only character encoding declaration for this HTML file is in the HTTP header, which sets the encoding to ISO 8859-15.</p></p>
-</div>
-</div>
-<div class="nexttest"><div><a href="generate?test=the-input-byte-stream-003">Next test</a></div><div class="doctype">HTML5</div>
-<p class="jump">the-input-byte-stream-001<br /><a href="/International/tests/html5/the-input-byte-stream/results-basics#basics" target="_blank">Result summary &amp; related tests</a><br /><a href="http://w3c-test.org/framework/details/i18n-html5/the-input-byte-stream-001" target="_blank">Detailed results for this test</a><br/>       <a href="http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream" target="_blank">Link to spec</a></p>
-<div class='prereq'>Assumptions: <ul><li>The default encoding for the browser you are testing is not set to ISO 8859-15.</li>
-                               <li>The test is read from a server that supports HTTP.</li></ul></div>
-</div>
-<script>
-test(function() {
-assert_equals(document.getElementById('box').offsetWidth, 100);
-}, " ");
-</script>
-
-</body>
-</html>
-
-
diff --git a/html/charset/testdata/HTTP-vs-UTF-8-BOM.html b/html/charset/testdata/HTTP-vs-UTF-8-BOM.html
deleted file mode 100644 (file)
index 26e5d8b..0000000
+++ /dev/null
@@ -1,48 +0,0 @@
-<!DOCTYPE html>
-<html  lang="en" >
-<head>
-  <title>HTTP vs UTF-8 BOM</title>
-<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
-<link rel='help' href='http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream'>
-<link rel="stylesheet" type="text/css" href="./generatedtests.css">
-<script src="http://w3c-test.org/resources/testharness.js"></script>
-<script src="http://w3c-test.org/resources/testharnessreport.js"></script>
-<meta name='flags' content='http'>
-<meta name="assert" content="A character encoding set in the HTTP header has lower precedence than the UTF-8 signature.">
-<style type='text/css'>
-.test div { width: 50px; }</style>
-<link rel="stylesheet" type="text/css" href="the-input-byte-stream/support/encodingtests-utf8.css">
-</head>
-<body>
-<p class='title'>HTTP vs UTF-8 BOM</p>
-
-
-<div id='log'></div>
-
-
-<div class='test'><div id='box' class='ýäè'>&#xA0;</div></div>
-
-
-
-
-
-<div class='description'>
-<p class="assertion" title="Assertion">A character encoding set in the HTTP header has lower precedence than the UTF-8 signature.</p>
-<div class="notes"><p><p>The HTTP header attempts to set the character encoding to ISO 8859-15. The page starts with a UTF-8 signature.</p><p>The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.&#x00FD;&#x00E4;&#x00E8;</code>. This matches the sequence of bytes above when they are interpreted as UTF-8. If the class name matches the selector then the test will pass.</p><p>If the test is unsuccessful, the characters &#x00EF;&#x00BB;&#x00BF; should appear at the top of the page.  These represent the bytes that make up the UTF-8 signature when encountered in the ISO 8859-15 encoding.</p></p>
-</div>
-</div>
-<div class="nexttest"><div><a href="generate?test=the-input-byte-stream-022">Next test</a></div><div class="doctype">HTML5</div>
-<p class="jump">the-input-byte-stream-034<br /><a href="/International/tests/html5/the-input-byte-stream/results-basics#precedence" target="_blank">Result summary &amp; related tests</a><br /><a href="http://w3c-test.org/framework/details/i18n-html5/the-input-byte-stream-034" target="_blank">Detailed results for this test</a><br/>   <a href="http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream" target="_blank">Link to spec</a></p>
-<div class='prereq'>Assumptions: <ul><li>The default encoding for the browser you are testing is not set to ISO 8859-15.</li>
-                               <li>The test is read from a server that supports HTTP.</li></ul></div>
-</div>
-<script>
-test(function() {
-assert_equals(document.getElementById('box').offsetWidth, 100);
-}, " ");
-</script>
-
-</body>
-</html>
-
-
diff --git a/html/charset/testdata/HTTP-vs-meta-charset.html b/html/charset/testdata/HTTP-vs-meta-charset.html
deleted file mode 100644 (file)
index 2f07e95..0000000
+++ /dev/null
@@ -1,49 +0,0 @@
-<!DOCTYPE html>
-<html  lang="en" >
-<head>
- <meta charset="iso-8859-1" > <title>HTTP vs meta charset</title>
-<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
-<link rel='help' href='http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream'>
-<link rel="stylesheet" type="text/css" href="./generatedtests.css">
-<script src="http://w3c-test.org/resources/testharness.js"></script>
-<script src="http://w3c-test.org/resources/testharnessreport.js"></script>
-<meta name='flags' content='http'>
-<meta name="assert" content="The HTTP header has a higher precedence than an encoding declaration in a meta charset attribute.">
-<style type='text/css'>
-.test div { width: 50px; }.test div { width: 90px; }
-</style>
-<link rel="stylesheet" type="text/css" href="the-input-byte-stream/support/encodingtests-15.css">
-</head>
-<body>
-<p class='title'>HTTP vs meta charset</p>
-
-
-<div id='log'></div>
-
-
-<div class='test'><div id='box' class='ýäè'>&#xA0;</div></div>
-
-
-
-
-
-<div class='description'>
-<p class="assertion" title="Assertion">The HTTP header has a higher precedence than an encoding declaration in a meta charset attribute.</p>
-<div class="notes"><p><p>The HTTP header attempts to set the character encoding to ISO 8859-15. The page contains an encoding declaration in a meta charset attribute that attempts to set the character encoding to ISO 8859-1.</p><p>The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.&#x00C3;&#x0153;&#x00C3;&#x20AC;&#x00C3;&#x0161;</code>. This matches the sequence of bytes above when they are interpreted as ISO 8859-15. If the class name matches the selector then the test will pass.</p></p>
-</div>
-</div>
-<div class="nexttest"><div><a href="generate?test=the-input-byte-stream-037">Next test</a></div><div class="doctype">HTML5</div>
-<p class="jump">the-input-byte-stream-018<br /><a href="/International/tests/html5/the-input-byte-stream/results-basics#precedence" target="_blank">Result summary &amp; related tests</a><br /><a href="http://w3c-test.org/framework/details/i18n-html5/the-input-byte-stream-018" target="_blank">Detailed results for this test</a><br/>   <a href="http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream" target="_blank">Link to spec</a></p>
-<div class='prereq'>Assumptions: <ul><li>The default encoding for the browser you are testing is not set to ISO 8859-15.</li>
-                               <li>The test is read from a server that supports HTTP.</li></ul></div>
-</div>
-<script>
-test(function() {
-assert_equals(document.getElementById('box').offsetWidth, 100);
-}, " ");
-</script>
-
-</body>
-</html>
-
-
diff --git a/html/charset/testdata/HTTP-vs-meta-content.html b/html/charset/testdata/HTTP-vs-meta-content.html
deleted file mode 100644 (file)
index 6853cdd..0000000
+++ /dev/null
@@ -1,49 +0,0 @@
-<!DOCTYPE html>
-<html  lang="en" >
-<head>
- <meta http-equiv="content-type" content="text/html;charset=iso-8859-1" > <title>HTTP vs meta content</title>
-<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
-<link rel='help' href='http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream'>
-<link rel="stylesheet" type="text/css" href="./generatedtests.css">
-<script src="http://w3c-test.org/resources/testharness.js"></script>
-<script src="http://w3c-test.org/resources/testharnessreport.js"></script>
-<meta name='flags' content='http'>
-<meta name="assert" content="The HTTP header has a higher precedence than an encoding declaration in a meta content attribute.">
-<style type='text/css'>
-.test div { width: 50px; }.test div { width: 90px; }
-</style>
-<link rel="stylesheet" type="text/css" href="the-input-byte-stream/support/encodingtests-15.css">
-</head>
-<body>
-<p class='title'>HTTP vs meta content</p>
-
-
-<div id='log'></div>
-
-
-<div class='test'><div id='box' class='ýäè'>&#xA0;</div></div>
-
-
-
-
-
-<div class='description'>
-<p class="assertion" title="Assertion">The HTTP header has a higher precedence than an encoding declaration in a meta content attribute.</p>
-<div class="notes"><p><p>The HTTP header attempts to set the character encoding to ISO 8859-15. The page contains an encoding declaration in a meta content attribute that attempts to set the character encoding to ISO 8859-1.</p><p>The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.&#x00C3;&#x0153;&#x00C3;&#x20AC;&#x00C3;&#x0161;</code>. This matches the sequence of bytes above when they are interpreted as ISO 8859-15. If the class name matches the selector then the test will pass.</p></p>
-</div>
-</div>
-<div class="nexttest"><div><a href="generate?test=the-input-byte-stream-018">Next test</a></div><div class="doctype">HTML5</div>
-<p class="jump">the-input-byte-stream-016<br /><a href="/International/tests/html5/the-input-byte-stream/results-basics#precedence" target="_blank">Result summary &amp; related tests</a><br /><a href="http://w3c-test.org/framework/details/i18n-html5/the-input-byte-stream-016" target="_blank">Detailed results for this test</a><br/>   <a href="http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream" target="_blank">Link to spec</a></p>
-<div class='prereq'>Assumptions: <ul><li>The default encoding for the browser you are testing is not set to ISO 8859-15.</li>
-                               <li>The test is read from a server that supports HTTP.</li></ul></div>
-</div>
-<script>
-test(function() {
-assert_equals(document.getElementById('box').offsetWidth, 100);
-}, " ");
-</script>
-
-</body>
-</html>
-
-
diff --git a/html/charset/testdata/No-encoding-declaration.html b/html/charset/testdata/No-encoding-declaration.html
deleted file mode 100644 (file)
index 612e26c..0000000
+++ /dev/null
@@ -1,47 +0,0 @@
-<!DOCTYPE html>
-<html  lang="en" >
-<head>
-  <title>No encoding declaration</title>
-<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
-<link rel='help' href='http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream'>
-<link rel="stylesheet" type="text/css" href="./generatedtests.css">
-<script src="http://w3c-test.org/resources/testharness.js"></script>
-<script src="http://w3c-test.org/resources/testharnessreport.js"></script>
-<meta name='flags' content='http'>
-<meta name="assert" content="A page with no encoding information in HTTP, BOM, XML declaration or meta element will be treated as UTF-8.">
-<style type='text/css'>
-.test div { width: 50px; }</style>
-<link rel="stylesheet" type="text/css" href="the-input-byte-stream/support/encodingtests-utf8.css">
-</head>
-<body>
-<p class='title'>No encoding declaration</p>
-
-
-<div id='log'></div>
-
-
-<div class='test'><div id='box' class='ýäè'>&#xA0;</div></div>
-
-
-
-
-
-<div class='description'>
-<p class="assertion" title="Assertion">A page with no encoding information in HTTP, BOM, XML declaration or meta element will be treated as UTF-8.</p>
-<div class="notes"><p><p>The test on this page contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.&#x00FD;&#x00E4;&#x00E8;</code>. This matches the sequence of bytes above when they are interpreted as UTF-8. If the class name matches the selector then the test will pass.</p></p>
-</div>
-</div>
-<div class="nexttest"><div><a href="generate?test=the-input-byte-stream-034">Next test</a></div><div class="doctype">HTML5</div>
-<p class="jump">the-input-byte-stream-015<br /><a href="/International/tests/html5/the-input-byte-stream/results-basics#basics" target="_blank">Result summary &amp; related tests</a><br /><a href="http://w3c-test.org/framework/details/i18n-html5/the-input-byte-stream-015" target="_blank">Detailed results for this test</a><br/>       <a href="http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream" target="_blank">Link to spec</a></p>
-<div class='prereq'>Assumptions: <ul><li>The test is read from a server that supports HTTP.</li></ul></div>
-</div>
-<script>
-test(function() {
-assert_equals(document.getElementById('box').offsetWidth, 100);
-}, " ");
-</script>
-
-</body>
-</html>
-
-
diff --git a/html/charset/testdata/README b/html/charset/testdata/README
deleted file mode 100644 (file)
index 38ef0f9..0000000
+++ /dev/null
@@ -1,9 +0,0 @@
-These test cases come from
-http://www.w3.org/International/tests/repository/html5/the-input-byte-stream/results-basics
-
-Distributed under both the W3C Test Suite License
-(http://www.w3.org/Consortium/Legal/2008/04-testsuite-license)
-and the W3C 3-clause BSD License
-(http://www.w3.org/Consortium/Legal/2008/03-bsd-license).
-To contribute to a W3C Test Suite, see the policies and contribution
-forms (http://www.w3.org/2004/10/27-testcases).
diff --git a/html/charset/testdata/UTF-16BE-BOM.html b/html/charset/testdata/UTF-16BE-BOM.html
deleted file mode 100644 (file)
index 3abf7a9..0000000
Binary files a/html/charset/testdata/UTF-16BE-BOM.html and /dev/null differ
diff --git a/html/charset/testdata/UTF-16LE-BOM.html b/html/charset/testdata/UTF-16LE-BOM.html
deleted file mode 100644 (file)
index 76254c9..0000000
Binary files a/html/charset/testdata/UTF-16LE-BOM.html and /dev/null differ
diff --git a/html/charset/testdata/UTF-8-BOM-vs-meta-charset.html b/html/charset/testdata/UTF-8-BOM-vs-meta-charset.html
deleted file mode 100644 (file)
index 83de433..0000000
+++ /dev/null
@@ -1,49 +0,0 @@
-<!DOCTYPE html>
-<html  lang="en" >
-<head>
- <meta charset="iso-8859-15"> <title>UTF-8 BOM vs meta charset</title>
-<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
-<link rel='help' href='http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream'>
-<link rel="stylesheet" type="text/css" href="./generatedtests.css">
-<script src="http://w3c-test.org/resources/testharness.js"></script>
-<script src="http://w3c-test.org/resources/testharnessreport.js"></script>
-<meta name='flags' content='http'>
-<meta name="assert" content="A page with a UTF-8 BOM will be recognized as UTF-8 even if the meta charset attribute declares a different encoding.">
-<style type='text/css'>
-.test div { width: 50px; }.test div { width: 90px; }
-</style>
-<link rel="stylesheet" type="text/css" href="the-input-byte-stream/support/encodingtests-utf8.css">
-</head>
-<body>
-<p class='title'>UTF-8 BOM vs meta charset</p>
-
-
-<div id='log'></div>
-
-
-<div class='test'><div id='box' class='ýäè'>&#xA0;</div></div>
-
-
-
-
-
-<div class='description'>
-<p class="assertion" title="Assertion">A page with a UTF-8 BOM will be recognized as UTF-8 even if the meta charset attribute declares a different encoding.</p>
-<div class="notes"><p><p>The page contains an encoding declaration in a meta charset attribute that attempts to set the character encoding to ISO 8859-15, but the file starts with a UTF-8 signature.</p><p>The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.&#x00FD;&#x00E4;&#x00E8;</code>. This matches the sequence of bytes above when they are interpreted as UTF-8. If the class name matches the selector then the test will pass.</p></p>
-</div>
-</div>
-<div class="nexttest"><div><a href="generate?test=the-input-byte-stream-024">Next test</a></div><div class="doctype">HTML5</div>
-<p class="jump">the-input-byte-stream-038<br /><a href="/International/tests/html5/the-input-byte-stream/results-basics#precedence" target="_blank">Result summary &amp; related tests</a><br /><a href="http://w3c-test.org/framework/details/i18n-html5/the-input-byte-stream-038" target="_blank">Detailed results for this test</a><br/>   <a href="http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream" target="_blank">Link to spec</a></p>
-<div class='prereq'>Assumptions: <ul><li>The default encoding for the browser you are testing is not set to ISO 8859-15.</li>
-                               <li>The test is read from a server that supports HTTP.</li></ul></div>
-</div>
-<script>
-test(function() {
-assert_equals(document.getElementById('box').offsetWidth, 100);
-}, " ");
-</script>
-
-</body>
-</html>
-
-
diff --git a/html/charset/testdata/UTF-8-BOM-vs-meta-content.html b/html/charset/testdata/UTF-8-BOM-vs-meta-content.html
deleted file mode 100644 (file)
index 501aac2..0000000
+++ /dev/null
@@ -1,48 +0,0 @@
-<!DOCTYPE html>
-<html  lang="en" >
-<head>
- <meta http-equiv="content-type" content="text/html; charset=iso-8859-15"> <title>UTF-8 BOM vs meta content</title>
-<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
-<link rel='help' href='http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream'>
-<link rel="stylesheet" type="text/css" href="./generatedtests.css">
-<script src="http://w3c-test.org/resources/testharness.js"></script>
-<script src="http://w3c-test.org/resources/testharnessreport.js"></script>
-<meta name='flags' content='http'>
-<meta name="assert" content="A page with a UTF-8 BOM will be recognized as UTF-8 even if the meta content attribute declares a different encoding.">
-<style type='text/css'>
-.test div { width: 50px; }</style>
-<link rel="stylesheet" type="text/css" href="the-input-byte-stream/support/encodingtests-utf8.css">
-</head>
-<body>
-<p class='title'>UTF-8 BOM vs meta content</p>
-
-
-<div id='log'></div>
-
-
-<div class='test'><div id='box' class='ýäè'>&#xA0;</div></div>
-
-
-
-
-
-<div class='description'>
-<p class="assertion" title="Assertion">A page with a UTF-8 BOM will be recognized as UTF-8 even if the meta content attribute declares a different encoding.</p>
-<div class="notes"><p><p>The page contains an encoding declaration in a meta content attribute that attempts to set the character encoding to ISO 8859-15, but the file starts with a UTF-8 signature.</p><p>The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.&#x00FD;&#x00E4;&#x00E8;</code>. This matches the sequence of bytes above when they are interpreted as UTF-8. If the class name matches the selector then the test will pass.</p></p>
-</div>
-</div>
-<div class="nexttest"><div><a href="generate?test=the-input-byte-stream-038">Next test</a></div><div class="doctype">HTML5</div>
-<p class="jump">the-input-byte-stream-037<br /><a href="/International/tests/html5/the-input-byte-stream/results-basics#precedence" target="_blank">Result summary &amp; related tests</a><br /><a href="http://w3c-test.org/framework/details/i18n-html5/the-input-byte-stream-037" target="_blank">Detailed results for this test</a><br/>   <a href="http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream" target="_blank">Link to spec</a></p>
-<div class='prereq'>Assumptions: <ul><li>The default encoding for the browser you are testing is not set to ISO 8859-15.</li>
-                               <li>The test is read from a server that supports HTTP.</li></ul></div>
-</div>
-<script>
-test(function() {
-assert_equals(document.getElementById('box').offsetWidth, 100);
-}, " ");
-</script>
-
-</body>
-</html>
-
-
diff --git a/html/charset/testdata/meta-charset-attribute.html b/html/charset/testdata/meta-charset-attribute.html
deleted file mode 100644 (file)
index 2d7d25a..0000000
+++ /dev/null
@@ -1,48 +0,0 @@
-<!DOCTYPE html>
-<html  lang="en" >
-<head>
- <meta charset="iso-8859-15"> <title>meta charset attribute</title>
-<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
-<link rel='help' href='http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream'>
-<link rel="stylesheet" type="text/css" href="./generatedtests.css">
-<script src="http://w3c-test.org/resources/testharness.js"></script>
-<script src="http://w3c-test.org/resources/testharnessreport.js"></script>
-<meta name='flags' content='http'>
-<meta name="assert" content="The character encoding of the page can be set by a meta element with charset attribute.">
-<style type='text/css'>
-.test div { width: 50px; }</style>
-<link rel="stylesheet" type="text/css" href="the-input-byte-stream/support/encodingtests-15.css">
-</head>
-<body>
-<p class='title'>meta charset attribute</p>
-
-
-<div id='log'></div>
-
-
-<div class='test'><div id='box' class='ýäè'>&#xA0;</div></div>
-
-
-
-
-
-<div class='description'>
-<p class="assertion" title="Assertion">The character encoding of the page can be set by a meta element with charset attribute.</p>
-<div class="notes"><p><p>The only character encoding declaration for this HTML file is in the charset attribute of the meta element, which declares the encoding to be ISO 8859-15.</p><p>The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.&#x00C3;&#x0153;&#x00C3;&#x20AC;&#x00C3;&#x0161;</code>. This matches the sequence of bytes above when they are interpreted as ISO 8859-15. If the class name matches the selector then the test will pass.</p></p>
-</div>
-</div>
-<div class="nexttest"><div><a href="generate?test=the-input-byte-stream-015">Next test</a></div><div class="doctype">HTML5</div>
-<p class="jump">the-input-byte-stream-009<br /><a href="/International/tests/html5/the-input-byte-stream/results-basics#basics" target="_blank">Result summary &amp; related tests</a><br /><a href="http://w3c-test.org/framework/details/i18n-html5/the-input-byte-stream-009" target="_blank">Detailed results for this test</a><br/>       <a href="http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream" target="_blank">Link to spec</a></p>
-<div class='prereq'>Assumptions: <ul><li>The default encoding for the browser you are testing is not set to ISO 8859-15.</li>
-                               <li>The test is read from a server that supports HTTP.</li></ul></div>
-</div>
-<script>
-test(function() {
-assert_equals(document.getElementById('box').offsetWidth, 100);
-}, " ");
-</script>
-
-</body>
-</html>
-
-
diff --git a/html/charset/testdata/meta-content-attribute.html b/html/charset/testdata/meta-content-attribute.html
deleted file mode 100644 (file)
index 1c3f228..0000000
+++ /dev/null
@@ -1,48 +0,0 @@
-<!DOCTYPE html>
-<html  lang="en" >
-<head>
- <meta http-equiv="content-type" content="text/html; charset=iso-8859-15"> <title>meta content attribute</title>
-<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
-<link rel='help' href='http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream'>
-<link rel="stylesheet" type="text/css" href="./generatedtests.css">
-<script src="http://w3c-test.org/resources/testharness.js"></script>
-<script src="http://w3c-test.org/resources/testharnessreport.js"></script>
-<meta name='flags' content='http'>
-<meta name="assert" content="The character encoding of the page can be set by a meta element with http-equiv and content attributes.">
-<style type='text/css'>
-.test div { width: 50px; }</style>
-<link rel="stylesheet" type="text/css" href="the-input-byte-stream/support/encodingtests-15.css">
-</head>
-<body>
-<p class='title'>meta content attribute</p>
-
-
-<div id='log'></div>
-
-
-<div class='test'><div id='box' class='ýäè'>&#xA0;</div></div>
-
-
-
-
-
-<div class='description'>
-<p class="assertion" title="Assertion">The character encoding of the page can be set by a meta element with http-equiv and content attributes.</p>
-<div class="notes"><p><p>The only character encoding declaration for this HTML file is in the content attribute of the meta element, which declares the encoding to be ISO 8859-15.</p><p>The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.&#x00C3;&#x0153;&#x00C3;&#x20AC;&#x00C3;&#x0161;</code>. This matches the sequence of bytes above when they are interpreted as ISO 8859-15. If the class name matches the selector then the test will pass.</p></p>
-</div>
-</div>
-<div class="nexttest"><div><a href="generate?test=the-input-byte-stream-009">Next test</a></div><div class="doctype">HTML5</div>
-<p class="jump">the-input-byte-stream-007<br /><a href="/International/tests/html5/the-input-byte-stream/results-basics#basics" target="_blank">Result summary &amp; related tests</a><br /><a href="http://w3c-test.org/framework/details/i18n-html5/the-input-byte-stream-007" target="_blank">Detailed results for this test</a><br/>       <a href="http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream" target="_blank">Link to spec</a></p>
-<div class='prereq'>Assumptions: <ul><li>The default encoding for the browser you are testing is not set to ISO 8859-15.</li>
-                               <li>The test is read from a server that supports HTTP.</li></ul></div>
-</div>
-<script>
-test(function() {
-assert_equals(document.getElementById('box').offsetWidth, 100);
-}, " ");
-</script>
-
-</body>
-</html>
-
-
diff --git a/html/comment_test.go b/html/comment_test.go
deleted file mode 100644 (file)
index fd47de8..0000000
+++ /dev/null
@@ -1,291 +0,0 @@
-// Copyright 2023 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package html
-
-import (
-       "bytes"
-       "strings"
-       "testing"
-)
-
-// TestComments exhaustively tests every 'interesting' N-byte string is
-// correctly parsed as a comment. N ranges from 4+1 to 4+maxSuffixLen
-// inclusive. 4 is the length of the "<!--" prefix that starts an HTML comment.
-//
-// 'Interesting' means that the N-4 byte suffix consists entirely of bytes
-// sampled from the interestingCommentBytes const string, below. These cover
-// all of the possible state transitions from comment-related parser states, as
-// listed in the HTML spec (https://html.spec.whatwg.org/#comment-start-state
-// and subsequent sections).
-//
-// The spec is written as an explicit state machine that, as a side effect,
-// accumulates "the comment token's data" to a separate buffer.
-// Tokenizer.readComment in this package does not have an explicit state
-// machine and usually returns the comment text as a sub-slice of the input,
-// between the opening '<' and closing '>' or EOF. This test confirms that the
-// two algorithms match.
-func TestComments(t *testing.T) {
-       const prefix = "<!--"
-       const maxSuffixLen = 6
-       buffer := make([]byte, 0, len(prefix)+maxSuffixLen)
-       testAllComments(t, append(buffer, prefix...))
-}
-
-// NUL isn't in this list, even though the HTML spec sections 13.2.5.43 -
-// 13.2.5.52 mentions it. It's not interesting in terms of state transitions.
-// It's equivalent to any other non-interesting byte (other than being replaced
-// by U+FFFD REPLACEMENT CHARACTER).
-//
-// EOF isn't in this list. The HTML spec treats EOF as "an input character" but
-// testOneComment below breaks the loop instead.
-//
-// 'x' represents all other "non-interesting" comment bytes.
-var interestingCommentBytes = [...]byte{
-       '!', '-', '<', '>', 'x',
-}
-
-// testAllComments recursively fills in buffer[len(buffer):cap(buffer)] with
-// interesting bytes and then tests that this package's tokenization matches
-// the HTML spec.
-//
-// Precondition: len(buffer) < cap(buffer)
-// Precondition: string(buffer[:4]) == "<!--"
-func testAllComments(t *testing.T, buffer []byte) {
-       for _, interesting := range interestingCommentBytes {
-               b := append(buffer, interesting)
-               testOneComment(t, b)
-               if len(b) < cap(b) {
-                       testAllComments(t, b)
-               }
-       }
-}
-
-func testOneComment(t *testing.T, b []byte) {
-       z := NewTokenizer(bytes.NewReader(b))
-       if next := z.Next(); next != CommentToken {
-               t.Fatalf("Next(%q): got %v, want %v", b, next, CommentToken)
-       }
-       gotRemainder := string(b[len(z.Raw()):])
-       gotComment := string(z.Text())
-
-       i := len("<!--")
-       wantBuffer := []byte(nil)
-loop:
-       for state := 43; ; {
-               // Consume the next input character, handling EOF.
-               if i >= len(b) {
-                       break
-               }
-               nextInputCharacter := b[i]
-               i++
-
-               switch state {
-               case 43: // 13.2.5.43 Comment start state.
-                       switch nextInputCharacter {
-                       case '-':
-                               state = 44
-                       case '>':
-                               break loop
-                       default:
-                               i-- // Reconsume.
-                               state = 45
-                       }
-
-               case 44: // 13.2.5.44 Comment start dash state.
-                       switch nextInputCharacter {
-                       case '-':
-                               state = 51
-                       case '>':
-                               break loop
-                       default:
-                               wantBuffer = append(wantBuffer, '-')
-                               i-- // Reconsume.
-                               state = 45
-                       }
-
-               case 45: // 13.2.5.45 Comment state.
-                       switch nextInputCharacter {
-                       case '-':
-                               state = 50
-                       case '<':
-                               wantBuffer = append(wantBuffer, '<')
-                               state = 46
-                       default:
-                               wantBuffer = append(wantBuffer, nextInputCharacter)
-                       }
-
-               case 46: // 13.2.5.46 Comment less-than sign state.
-                       switch nextInputCharacter {
-                       case '!':
-                               wantBuffer = append(wantBuffer, '!')
-                               state = 47
-                       case '<':
-                               wantBuffer = append(wantBuffer, '<')
-                               state = 46
-                       default:
-                               i-- // Reconsume.
-                               state = 45
-                       }
-
-               case 47: // 13.2.5.47 Comment less-than sign bang state.
-                       switch nextInputCharacter {
-                       case '-':
-                               state = 48
-                       default:
-                               i-- // Reconsume.
-                               state = 45
-                       }
-
-               case 48: // 13.2.5.48 Comment less-than sign bang dash state.
-                       switch nextInputCharacter {
-                       case '-':
-                               state = 49
-                       default:
-                               i-- // Reconsume.
-                               state = 50
-                       }
-
-               case 49: // 13.2.5.49 Comment less-than sign bang dash dash state.
-                       switch nextInputCharacter {
-                       case '>':
-                               break loop
-                       default:
-                               i-- // Reconsume.
-                               state = 51
-                       }
-
-               case 50: // 13.2.5.50 Comment end dash state.
-                       switch nextInputCharacter {
-                       case '-':
-                               state = 51
-                       default:
-                               wantBuffer = append(wantBuffer, '-')
-                               i-- // Reconsume.
-                               state = 45
-                       }
-
-               case 51: // 13.2.5.51 Comment end state.
-                       switch nextInputCharacter {
-                       case '!':
-                               state = 52
-                       case '-':
-                               wantBuffer = append(wantBuffer, '-')
-                       case '>':
-                               break loop
-                       default:
-                               wantBuffer = append(wantBuffer, "--"...)
-                               i-- // Reconsume.
-                               state = 45
-                       }
-
-               case 52: // 13.2.5.52 Comment end bang state.
-                       switch nextInputCharacter {
-                       case '-':
-                               wantBuffer = append(wantBuffer, "--!"...)
-                               state = 50
-                       case '>':
-                               break loop
-                       default:
-                               wantBuffer = append(wantBuffer, "--!"...)
-                               i-- // Reconsume.
-                               state = 45
-                       }
-
-               default:
-                       t.Fatalf("input=%q: unexpected state %d", b, state)
-               }
-       }
-
-       wantRemainder := ""
-       if i < len(b) {
-               wantRemainder = string(b[i:])
-       }
-       wantComment := string(wantBuffer)
-       if (gotComment != wantComment) || (gotRemainder != wantRemainder) {
-               t.Errorf("input=%q\ngot:  %q + %q\nwant: %q + %q",
-                       b, gotComment, gotRemainder, wantComment, wantRemainder)
-               return
-       }
-
-       // suffix is the "N-4 byte suffix" per the TestComments comment.
-       suffix := string(b[4:])
-
-       // Test that a round trip, rendering (escaped) and re-parsing, of a comment
-       // token (with that suffix as the Token.Data) preserves that string.
-       tok := Token{
-               Type: CommentToken,
-               Data: suffix,
-       }
-       z2 := NewTokenizer(strings.NewReader(tok.String()))
-       if next := z2.Next(); next != CommentToken {
-               t.Fatalf("round-trip Next(%q): got %v, want %v", suffix, next, CommentToken)
-       }
-       gotComment2 := string(z2.Text())
-       if gotComment2 != suffix {
-               t.Errorf("round-trip\ngot:  %q\nwant: %q", gotComment2, suffix)
-               return
-       }
-}
-
-// This table below summarizes the HTML-comment-related state machine from
-// 13.2.5.43 "Comment start state" and subsequent sections.
-// https://html.spec.whatwg.org/#comment-start-state
-//
-// Get to state 13.2.5.43 after seeing "<!--". Specifically, starting from the
-// initial 13.2.5.1 "Data state":
-//   - "<"  moves to 13.2.5.6  "Tag open state",
-//   - "!"  moves to 13.2.5.42 "Markup declaration open state",
-//   - "--" moves to 13.2.5.43 "Comment start state".
-// Each of these transitions are the only way to get to the 6/42/43 states.
-//
-// State   !         -         <         >         NUL       EOF       default   HTML spec section
-// 43      ...       s44       ...       s01.T.E0  ...       ...       r45       13.2.5.43 Comment start state
-// 44      ...       s51       ...       s01.T.E0  ...       T.Z.E1    r45.A-    13.2.5.44 Comment start dash state
-// 45      ...       s50       s46.A<    ...       t45.A?.E2 T.Z.E1    t45.Ax    13.2.5.45 Comment state
-// 46      s47.A!    ...       t46.A<    ...       ...       ...       r45       13.2.5.46 Comment less-than sign state
-// 47      ...       s48       ...       ...       ...       ...       r45       13.2.5.47 Comment less-than sign bang state
-// 48      ...       s49       ...       ...       ...       ...       r50       13.2.5.48 Comment less-than sign bang dash state
-// 49      ...       ...       ...       s01.T     ...       T.Z.E1    r51.E3    13.2.5.49 Comment less-than sign bang dash dash state
-// 50      ...       s51       ...       ...       ...       T.Z.E1    r45.A-    13.2.5.50 Comment end dash state
-// 51      s52       t51.A-    ...       s01.T     ...       T.Z.E1    r45.A--   13.2.5.51 Comment end state
-// 52      ...       s50.A--!  ...       s01.T.E4  ...       T.Z.E1    r45.A--!  13.2.5.52 Comment end bang state
-//
-// State 43 is the "Comment start state" meaning that we've only seen "<!--"
-// and nothing else. Similarly, state 44 means that we've only seen "<!---",
-// with three dashes, and nothing else. For the other states, we deduce
-// (working backwards) that the immediate prior input must be:
-//   - 45  something that's not '-'
-//   - 46  "<"
-//   - 47  "<!"
-//   - 48  "<!-"
-//   - 49  "<!--"  not including the opening "<!--"
-//   - 50  "-"     not including the opening "<!--" and also not "--"
-//   - 51  "--"    not including the opening "<!--"
-//   - 52  "--!"
-//
-// The table cell actions:
-//   - ...   do the default action
-//   - A!    append "!"      to the comment token's data.
-//   - A-    append "-"      to the comment token's data.
-//   - A--   append "--"     to the comment token's data.
-//   - A--!  append "--!"    to the comment token's data.
-//   - A<    append "<"      to the comment token's data.
-//   - A?    append "\uFFFD" to the comment token's data.
-//   - Ax    append the current input character to the comment token's data.
-//   - E0    parse error (abrupt-closing-of-empty-comment).
-//   - E1    parse error (eof-in-comment).
-//   - E2    parse error (unexpected-null-character).
-//   - E3    parse error (nested-comment).
-//   - E4    parse error (incorrectly-closed-comment).
-//   - T     emit the current comment token.
-//   - Z     emit an end-of-file token.
-//   - rNN   reconsume in the 13.2.5.NN     state (after any A* or E* operations).
-//   - s01   switch to the    13.2.5.1 Data state (after any A* or E* operations).
-//   - sNN   switch to the    13.2.5.NN     state (after any A* or E* operations).
-//   - tNN   stay in the      13.2.5.NN     state (after any A* or E* operations).
-//
-// The E* actions are called errors in the HTML spec but they are not fatal
-// (https://html.spec.whatwg.org/#parse-errors says "may [but not must] abort
-// the parser"). They are warnings that, in practice, browsers simply ignore.
diff --git a/html/const.go b/html/const.go
deleted file mode 100644 (file)
index ff7acf2..0000000
+++ /dev/null
@@ -1,111 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package html
-
-// Section 12.2.4.2 of the HTML5 specification says "The following elements
-// have varying levels of special parsing rules".
-// https://html.spec.whatwg.org/multipage/syntax.html#the-stack-of-open-elements
-var isSpecialElementMap = map[string]bool{
-       "address":    true,
-       "applet":     true,
-       "area":       true,
-       "article":    true,
-       "aside":      true,
-       "base":       true,
-       "basefont":   true,
-       "bgsound":    true,
-       "blockquote": true,
-       "body":       true,
-       "br":         true,
-       "button":     true,
-       "caption":    true,
-       "center":     true,
-       "col":        true,
-       "colgroup":   true,
-       "dd":         true,
-       "details":    true,
-       "dir":        true,
-       "div":        true,
-       "dl":         true,
-       "dt":         true,
-       "embed":      true,
-       "fieldset":   true,
-       "figcaption": true,
-       "figure":     true,
-       "footer":     true,
-       "form":       true,
-       "frame":      true,
-       "frameset":   true,
-       "h1":         true,
-       "h2":         true,
-       "h3":         true,
-       "h4":         true,
-       "h5":         true,
-       "h6":         true,
-       "head":       true,
-       "header":     true,
-       "hgroup":     true,
-       "hr":         true,
-       "html":       true,
-       "iframe":     true,
-       "img":        true,
-       "input":      true,
-       "keygen":     true, // "keygen" has been removed from the spec, but are kept here for backwards compatibility.
-       "li":         true,
-       "link":       true,
-       "listing":    true,
-       "main":       true,
-       "marquee":    true,
-       "menu":       true,
-       "meta":       true,
-       "nav":        true,
-       "noembed":    true,
-       "noframes":   true,
-       "noscript":   true,
-       "object":     true,
-       "ol":         true,
-       "p":          true,
-       "param":      true,
-       "plaintext":  true,
-       "pre":        true,
-       "script":     true,
-       "section":    true,
-       "select":     true,
-       "source":     true,
-       "style":      true,
-       "summary":    true,
-       "table":      true,
-       "tbody":      true,
-       "td":         true,
-       "template":   true,
-       "textarea":   true,
-       "tfoot":      true,
-       "th":         true,
-       "thead":      true,
-       "title":      true,
-       "tr":         true,
-       "track":      true,
-       "ul":         true,
-       "wbr":        true,
-       "xmp":        true,
-}
-
-func isSpecialElement(element *Node) bool {
-       switch element.Namespace {
-       case "", "html":
-               return isSpecialElementMap[element.Data]
-       case "math":
-               switch element.Data {
-               case "mi", "mo", "mn", "ms", "mtext", "annotation-xml":
-                       return true
-               }
-       case "svg":
-               switch element.Data {
-               case "foreignObject", "desc", "title":
-                       return true
-               }
-       }
-       return false
-}
diff --git a/html/doc.go b/html/doc.go
deleted file mode 100644 (file)
index 23bfd3d..0000000
+++ /dev/null
@@ -1,127 +0,0 @@
-// Copyright 2010 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-/*
-Package html implements an HTML5-compliant tokenizer and parser.
-
-Tokenization is done by creating a Tokenizer for an io.Reader r. It is the
-caller's responsibility to ensure that r provides UTF-8 encoded HTML.
-
-       z := html.NewTokenizer(r)
-
-Given a Tokenizer z, the HTML is tokenized by repeatedly calling z.Next(),
-which parses the next token and returns its type, or an error:
-
-       for {
-               tt := z.Next()
-               if tt == html.ErrorToken {
-                       // ...
-                       return ...
-               }
-               // Process the current token.
-       }
-
-There are two APIs for retrieving the current token. The high-level API is to
-call Token; the low-level API is to call Text or TagName / TagAttr. Both APIs
-allow optionally calling Raw after Next but before Token, Text, TagName, or
-TagAttr. In EBNF notation, the valid call sequence per token is:
-
-       Next {Raw} [ Token | Text | TagName {TagAttr} ]
-
-Token returns an independent data structure that completely describes a token.
-Entities (such as "&lt;") are unescaped, tag names and attribute keys are
-lower-cased, and attributes are collected into a []Attribute. For example:
-
-       for {
-               if z.Next() == html.ErrorToken {
-                       // Returning io.EOF indicates success.
-                       return z.Err()
-               }
-               emitToken(z.Token())
-       }
-
-The low-level API performs fewer allocations and copies, but the contents of
-the []byte values returned by Text, TagName and TagAttr may change on the next
-call to Next. For example, to extract an HTML page's anchor text:
-
-       depth := 0
-       for {
-               tt := z.Next()
-               switch tt {
-               case html.ErrorToken:
-                       return z.Err()
-               case html.TextToken:
-                       if depth > 0 {
-                               // emitBytes should copy the []byte it receives,
-                               // if it doesn't process it immediately.
-                               emitBytes(z.Text())
-                       }
-               case html.StartTagToken, html.EndTagToken:
-                       tn, _ := z.TagName()
-                       if len(tn) == 1 && tn[0] == 'a' {
-                               if tt == html.StartTagToken {
-                                       depth++
-                               } else {
-                                       depth--
-                               }
-                       }
-               }
-       }
-
-Parsing is done by calling Parse with an io.Reader, which returns the root of
-the parse tree (the document element) as a *Node. It is the caller's
-responsibility to ensure that the Reader provides UTF-8 encoded HTML. For
-example, to process each anchor node in depth-first order:
-
-       doc, err := html.Parse(r)
-       if err != nil {
-               // ...
-       }
-       var f func(*html.Node)
-       f = func(n *html.Node) {
-               if n.Type == html.ElementNode && n.Data == "a" {
-                       // Do something with n...
-               }
-               for c := n.FirstChild; c != nil; c = c.NextSibling {
-                       f(c)
-               }
-       }
-       f(doc)
-
-The relevant specifications include:
-https://html.spec.whatwg.org/multipage/syntax.html and
-https://html.spec.whatwg.org/multipage/syntax.html#tokenization
-
-# Security Considerations
-
-Care should be taken when parsing and interpreting HTML, whether full documents
-or fragments, within the framework of the HTML specification, especially with
-regard to untrusted inputs.
-
-This package provides both a tokenizer and a parser, which implement the
-tokenization, and tokenization and tree construction stages of the WHATWG HTML
-parsing specification respectively. While the tokenizer parses and normalizes
-individual HTML tokens, only the parser constructs the DOM tree from the
-tokenized HTML, as described in the tree construction stage of the
-specification, dynamically modifying or extending the document's DOM tree.
-
-If your use case requires semantically well-formed HTML documents, as defined by
-the WHATWG specification, the parser should be used rather than the tokenizer.
-
-In security contexts, if trust decisions are being made using the tokenized or
-parsed content, the input must be re-serialized (for instance by using Render or
-Token.String) in order for those trust decisions to hold, as the process of
-tokenization or parsing may alter the content.
-*/
-package html // import "git.earlybird.gay/today-engine/html"
-
-// The tokenization algorithm implemented by this package is not a line-by-line
-// transliteration of the relatively verbose state-machine in the WHATWG
-// specification. A more direct approach is used instead, where the program
-// counter implies the state, such as whether it is tokenizing a tag or a text
-// node. Specification compliance is verified by checking expected and actual
-// outputs over a test suite rather than aiming for algorithmic fidelity.
-
-// TODO(nigeltao): Does a DOM API belong in this package or a separate one?
-// TODO(nigeltao): How does parsing interact with a JavaScript engine?
diff --git a/html/doctype.go b/html/doctype.go
deleted file mode 100644 (file)
index c484e5a..0000000
+++ /dev/null
@@ -1,156 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package html
-
-import (
-       "strings"
-)
-
-// parseDoctype parses the data from a DoctypeToken into a name,
-// public identifier, and system identifier. It returns a Node whose Type
-// is DoctypeNode, whose Data is the name, and which has attributes
-// named "system" and "public" for the two identifiers if they were present.
-// quirks is whether the document should be parsed in "quirks mode".
-func parseDoctype(s string) (n *Node, quirks bool) {
-       n = &Node{Type: DoctypeNode}
-
-       // Find the name.
-       space := strings.IndexAny(s, whitespace)
-       if space == -1 {
-               space = len(s)
-       }
-       n.Data = s[:space]
-       // The comparison to "html" is case-sensitive.
-       if n.Data != "html" {
-               quirks = true
-       }
-       n.Data = strings.ToLower(n.Data)
-       s = strings.TrimLeft(s[space:], whitespace)
-
-       if len(s) < 6 {
-               // It can't start with "PUBLIC" or "SYSTEM".
-               // Ignore the rest of the string.
-               return n, quirks || s != ""
-       }
-
-       key := strings.ToLower(s[:6])
-       s = s[6:]
-       for key == "public" || key == "system" {
-               s = strings.TrimLeft(s, whitespace)
-               if s == "" {
-                       break
-               }
-               quote := s[0]
-               if quote != '"' && quote != '\'' {
-                       break
-               }
-               s = s[1:]
-               q := strings.IndexRune(s, rune(quote))
-               var id string
-               if q == -1 {
-                       id = s
-                       s = ""
-               } else {
-                       id = s[:q]
-                       s = s[q+1:]
-               }
-               n.Attr = append(n.Attr, Attribute{Key: key, Val: id})
-               if key == "public" {
-                       key = "system"
-               } else {
-                       key = ""
-               }
-       }
-
-       if key != "" || s != "" {
-               quirks = true
-       } else if len(n.Attr) > 0 {
-               if n.Attr[0].Key == "public" {
-                       public := strings.ToLower(n.Attr[0].Val)
-                       switch public {
-                       case "-//w3o//dtd w3 html strict 3.0//en//", "-/w3d/dtd html 4.0 transitional/en", "html":
-                               quirks = true
-                       default:
-                               for _, q := range quirkyIDs {
-                                       if strings.HasPrefix(public, q) {
-                                               quirks = true
-                                               break
-                                       }
-                               }
-                       }
-                       // The following two public IDs only cause quirks mode if there is no system ID.
-                       if len(n.Attr) == 1 && (strings.HasPrefix(public, "-//w3c//dtd html 4.01 frameset//") ||
-                               strings.HasPrefix(public, "-//w3c//dtd html 4.01 transitional//")) {
-                               quirks = true
-                       }
-               }
-               if lastAttr := n.Attr[len(n.Attr)-1]; lastAttr.Key == "system" &&
-                       strings.ToLower(lastAttr.Val) == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd" {
-                       quirks = true
-               }
-       }
-
-       return n, quirks
-}
-
-// quirkyIDs is a list of public doctype identifiers that cause a document
-// to be interpreted in quirks mode. The identifiers should be in lower case.
-var quirkyIDs = []string{
-       "+//silmaril//dtd html pro v0r11 19970101//",
-       "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
-       "-//as//dtd html 3.0 aswedit + extensions//",
-       "-//ietf//dtd html 2.0 level 1//",
-       "-//ietf//dtd html 2.0 level 2//",
-       "-//ietf//dtd html 2.0 strict level 1//",
-       "-//ietf//dtd html 2.0 strict level 2//",
-       "-//ietf//dtd html 2.0 strict//",
-       "-//ietf//dtd html 2.0//",
-       "-//ietf//dtd html 2.1e//",
-       "-//ietf//dtd html 3.0//",
-       "-//ietf//dtd html 3.2 final//",
-       "-//ietf//dtd html 3.2//",
-       "-//ietf//dtd html 3//",
-       "-//ietf//dtd html level 0//",
-       "-//ietf//dtd html level 1//",
-       "-//ietf//dtd html level 2//",
-       "-//ietf//dtd html level 3//",
-       "-//ietf//dtd html strict level 0//",
-       "-//ietf//dtd html strict level 1//",
-       "-//ietf//dtd html strict level 2//",
-       "-//ietf//dtd html strict level 3//",
-       "-//ietf//dtd html strict//",
-       "-//ietf//dtd html//",
-       "-//metrius//dtd metrius presentational//",
-       "-//microsoft//dtd internet explorer 2.0 html strict//",
-       "-//microsoft//dtd internet explorer 2.0 html//",
-       "-//microsoft//dtd internet explorer 2.0 tables//",
-       "-//microsoft//dtd internet explorer 3.0 html strict//",
-       "-//microsoft//dtd internet explorer 3.0 html//",
-       "-//microsoft//dtd internet explorer 3.0 tables//",
-       "-//netscape comm. corp.//dtd html//",
-       "-//netscape comm. corp.//dtd strict html//",
-       "-//o'reilly and associates//dtd html 2.0//",
-       "-//o'reilly and associates//dtd html extended 1.0//",
-       "-//o'reilly and associates//dtd html extended relaxed 1.0//",
-       "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
-       "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
-       "-//spyglass//dtd html 2.0 extended//",
-       "-//sq//dtd html 2.0 hotmetal + extensions//",
-       "-//sun microsystems corp.//dtd hotjava html//",
-       "-//sun microsystems corp.//dtd hotjava strict html//",
-       "-//w3c//dtd html 3 1995-03-24//",
-       "-//w3c//dtd html 3.2 draft//",
-       "-//w3c//dtd html 3.2 final//",
-       "-//w3c//dtd html 3.2//",
-       "-//w3c//dtd html 3.2s draft//",
-       "-//w3c//dtd html 4.0 frameset//",
-       "-//w3c//dtd html 4.0 transitional//",
-       "-//w3c//dtd html experimental 19960712//",
-       "-//w3c//dtd html experimental 970421//",
-       "-//w3c//dtd w3 html//",
-       "-//w3o//dtd w3 html 3.0//",
-       "-//webtechs//dtd mozilla html 2.0//",
-       "-//webtechs//dtd mozilla html//",
-}
diff --git a/html/entity.go b/html/entity.go
deleted file mode 100644 (file)
index b628880..0000000
+++ /dev/null
@@ -1,2253 +0,0 @@
-// Copyright 2010 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package html
-
-// All entities that do not end with ';' are 6 or fewer bytes long.
-const longestEntityWithoutSemicolon = 6
-
-// entity is a map from HTML entity names to their values. The semicolon matters:
-// https://html.spec.whatwg.org/multipage/syntax.html#named-character-references
-// lists both "amp" and "amp;" as two separate entries.
-//
-// Note that the HTML5 list is larger than the HTML4 list at
-// http://www.w3.org/TR/html4/sgml/entities.html
-var entity = map[string]rune{
-       "AElig;":                           '\U000000C6',
-       "AMP;":                             '\U00000026',
-       "Aacute;":                          '\U000000C1',
-       "Abreve;":                          '\U00000102',
-       "Acirc;":                           '\U000000C2',
-       "Acy;":                             '\U00000410',
-       "Afr;":                             '\U0001D504',
-       "Agrave;":                          '\U000000C0',
-       "Alpha;":                           '\U00000391',
-       "Amacr;":                           '\U00000100',
-       "And;":                             '\U00002A53',
-       "Aogon;":                           '\U00000104',
-       "Aopf;":                            '\U0001D538',
-       "ApplyFunction;":                   '\U00002061',
-       "Aring;":                           '\U000000C5',
-       "Ascr;":                            '\U0001D49C',
-       "Assign;":                          '\U00002254',
-       "Atilde;":                          '\U000000C3',
-       "Auml;":                            '\U000000C4',
-       "Backslash;":                       '\U00002216',
-       "Barv;":                            '\U00002AE7',
-       "Barwed;":                          '\U00002306',
-       "Bcy;":                             '\U00000411',
-       "Because;":                         '\U00002235',
-       "Bernoullis;":                      '\U0000212C',
-       "Beta;":                            '\U00000392',
-       "Bfr;":                             '\U0001D505',
-       "Bopf;":                            '\U0001D539',
-       "Breve;":                           '\U000002D8',
-       "Bscr;":                            '\U0000212C',
-       "Bumpeq;":                          '\U0000224E',
-       "CHcy;":                            '\U00000427',
-       "COPY;":                            '\U000000A9',
-       "Cacute;":                          '\U00000106',
-       "Cap;":                             '\U000022D2',
-       "CapitalDifferentialD;":            '\U00002145',
-       "Cayleys;":                         '\U0000212D',
-       "Ccaron;":                          '\U0000010C',
-       "Ccedil;":                          '\U000000C7',
-       "Ccirc;":                           '\U00000108',
-       "Cconint;":                         '\U00002230',
-       "Cdot;":                            '\U0000010A',
-       "Cedilla;":                         '\U000000B8',
-       "CenterDot;":                       '\U000000B7',
-       "Cfr;":                             '\U0000212D',
-       "Chi;":                             '\U000003A7',
-       "CircleDot;":                       '\U00002299',
-       "CircleMinus;":                     '\U00002296',
-       "CirclePlus;":                      '\U00002295',
-       "CircleTimes;":                     '\U00002297',
-       "ClockwiseContourIntegral;":        '\U00002232',
-       "CloseCurlyDoubleQuote;":           '\U0000201D',
-       "CloseCurlyQuote;":                 '\U00002019',
-       "Colon;":                           '\U00002237',
-       "Colone;":                          '\U00002A74',
-       "Congruent;":                       '\U00002261',
-       "Conint;":                          '\U0000222F',
-       "ContourIntegral;":                 '\U0000222E',
-       "Copf;":                            '\U00002102',
-       "Coproduct;":                       '\U00002210',
-       "CounterClockwiseContourIntegral;": '\U00002233',
-       "Cross;":                           '\U00002A2F',
-       "Cscr;":                            '\U0001D49E',
-       "Cup;":                             '\U000022D3',
-       "CupCap;":                          '\U0000224D',
-       "DD;":                              '\U00002145',
-       "DDotrahd;":                        '\U00002911',
-       "DJcy;":                            '\U00000402',
-       "DScy;":                            '\U00000405',
-       "DZcy;":                            '\U0000040F',
-       "Dagger;":                          '\U00002021',
-       "Darr;":                            '\U000021A1',
-       "Dashv;":                           '\U00002AE4',
-       "Dcaron;":                          '\U0000010E',
-       "Dcy;":                             '\U00000414',
-       "Del;":                             '\U00002207',
-       "Delta;":                           '\U00000394',
-       "Dfr;":                             '\U0001D507',
-       "DiacriticalAcute;":                '\U000000B4',
-       "DiacriticalDot;":                  '\U000002D9',
-       "DiacriticalDoubleAcute;":          '\U000002DD',
-       "DiacriticalGrave;":                '\U00000060',
-       "DiacriticalTilde;":                '\U000002DC',
-       "Diamond;":                         '\U000022C4',
-       "DifferentialD;":                   '\U00002146',
-       "Dopf;":                            '\U0001D53B',
-       "Dot;":                             '\U000000A8',
-       "DotDot;":                          '\U000020DC',
-       "DotEqual;":                        '\U00002250',
-       "DoubleContourIntegral;":           '\U0000222F',
-       "DoubleDot;":                       '\U000000A8',
-       "DoubleDownArrow;":                 '\U000021D3',
-       "DoubleLeftArrow;":                 '\U000021D0',
-       "DoubleLeftRightArrow;":            '\U000021D4',
-       "DoubleLeftTee;":                   '\U00002AE4',
-       "DoubleLongLeftArrow;":             '\U000027F8',
-       "DoubleLongLeftRightArrow;":        '\U000027FA',
-       "DoubleLongRightArrow;":            '\U000027F9',
-       "DoubleRightArrow;":                '\U000021D2',
-       "DoubleRightTee;":                  '\U000022A8',
-       "DoubleUpArrow;":                   '\U000021D1',
-       "DoubleUpDownArrow;":               '\U000021D5',
-       "DoubleVerticalBar;":               '\U00002225',
-       "DownArrow;":                       '\U00002193',
-       "DownArrowBar;":                    '\U00002913',
-       "DownArrowUpArrow;":                '\U000021F5',
-       "DownBreve;":                       '\U00000311',
-       "DownLeftRightVector;":             '\U00002950',
-       "DownLeftTeeVector;":               '\U0000295E',
-       "DownLeftVector;":                  '\U000021BD',
-       "DownLeftVectorBar;":               '\U00002956',
-       "DownRightTeeVector;":              '\U0000295F',
-       "DownRightVector;":                 '\U000021C1',
-       "DownRightVectorBar;":              '\U00002957',
-       "DownTee;":                         '\U000022A4',
-       "DownTeeArrow;":                    '\U000021A7',
-       "Downarrow;":                       '\U000021D3',
-       "Dscr;":                            '\U0001D49F',
-       "Dstrok;":                          '\U00000110',
-       "ENG;":                             '\U0000014A',
-       "ETH;":                             '\U000000D0',
-       "Eacute;":                          '\U000000C9',
-       "Ecaron;":                          '\U0000011A',
-       "Ecirc;":                           '\U000000CA',
-       "Ecy;":                             '\U0000042D',
-       "Edot;":                            '\U00000116',
-       "Efr;":                             '\U0001D508',
-       "Egrave;":                          '\U000000C8',
-       "Element;":                         '\U00002208',
-       "Emacr;":                           '\U00000112',
-       "EmptySmallSquare;":                '\U000025FB',
-       "EmptyVerySmallSquare;":            '\U000025AB',
-       "Eogon;":                           '\U00000118',
-       "Eopf;":                            '\U0001D53C',
-       "Epsilon;":                         '\U00000395',
-       "Equal;":                           '\U00002A75',
-       "EqualTilde;":                      '\U00002242',
-       "Equilibrium;":                     '\U000021CC',
-       "Escr;":                            '\U00002130',
-       "Esim;":                            '\U00002A73',
-       "Eta;":                             '\U00000397',
-       "Euml;":                            '\U000000CB',
-       "Exists;":                          '\U00002203',
-       "ExponentialE;":                    '\U00002147',
-       "Fcy;":                             '\U00000424',
-       "Ffr;":                             '\U0001D509',
-       "FilledSmallSquare;":               '\U000025FC',
-       "FilledVerySmallSquare;":           '\U000025AA',
-       "Fopf;":                            '\U0001D53D',
-       "ForAll;":                          '\U00002200',
-       "Fouriertrf;":                      '\U00002131',
-       "Fscr;":                            '\U00002131',
-       "GJcy;":                            '\U00000403',
-       "GT;":                              '\U0000003E',
-       "Gamma;":                           '\U00000393',
-       "Gammad;":                          '\U000003DC',
-       "Gbreve;":                          '\U0000011E',
-       "Gcedil;":                          '\U00000122',
-       "Gcirc;":                           '\U0000011C',
-       "Gcy;":                             '\U00000413',
-       "Gdot;":                            '\U00000120',
-       "Gfr;":                             '\U0001D50A',
-       "Gg;":                              '\U000022D9',
-       "Gopf;":                            '\U0001D53E',
-       "GreaterEqual;":                    '\U00002265',
-       "GreaterEqualLess;":                '\U000022DB',
-       "GreaterFullEqual;":                '\U00002267',
-       "GreaterGreater;":                  '\U00002AA2',
-       "GreaterLess;":                     '\U00002277',
-       "GreaterSlantEqual;":               '\U00002A7E',
-       "GreaterTilde;":                    '\U00002273',
-       "Gscr;":                            '\U0001D4A2',
-       "Gt;":                              '\U0000226B',
-       "HARDcy;":                          '\U0000042A',
-       "Hacek;":                           '\U000002C7',
-       "Hat;":                             '\U0000005E',
-       "Hcirc;":                           '\U00000124',
-       "Hfr;":                             '\U0000210C',
-       "HilbertSpace;":                    '\U0000210B',
-       "Hopf;":                            '\U0000210D',
-       "HorizontalLine;":                  '\U00002500',
-       "Hscr;":                            '\U0000210B',
-       "Hstrok;":                          '\U00000126',
-       "HumpDownHump;":                    '\U0000224E',
-       "HumpEqual;":                       '\U0000224F',
-       "IEcy;":                            '\U00000415',
-       "IJlig;":                           '\U00000132',
-       "IOcy;":                            '\U00000401',
-       "Iacute;":                          '\U000000CD',
-       "Icirc;":                           '\U000000CE',
-       "Icy;":                             '\U00000418',
-       "Idot;":                            '\U00000130',
-       "Ifr;":                             '\U00002111',
-       "Igrave;":                          '\U000000CC',
-       "Im;":                              '\U00002111',
-       "Imacr;":                           '\U0000012A',
-       "ImaginaryI;":                      '\U00002148',
-       "Implies;":                         '\U000021D2',
-       "Int;":                             '\U0000222C',
-       "Integral;":                        '\U0000222B',
-       "Intersection;":                    '\U000022C2',
-       "InvisibleComma;":                  '\U00002063',
-       "InvisibleTimes;":                  '\U00002062',
-       "Iogon;":                           '\U0000012E',
-       "Iopf;":                            '\U0001D540',
-       "Iota;":                            '\U00000399',
-       "Iscr;":                            '\U00002110',
-       "Itilde;":                          '\U00000128',
-       "Iukcy;":                           '\U00000406',
-       "Iuml;":                            '\U000000CF',
-       "Jcirc;":                           '\U00000134',
-       "Jcy;":                             '\U00000419',
-       "Jfr;":                             '\U0001D50D',
-       "Jopf;":                            '\U0001D541',
-       "Jscr;":                            '\U0001D4A5',
-       "Jsercy;":                          '\U00000408',
-       "Jukcy;":                           '\U00000404',
-       "KHcy;":                            '\U00000425',
-       "KJcy;":                            '\U0000040C',
-       "Kappa;":                           '\U0000039A',
-       "Kcedil;":                          '\U00000136',
-       "Kcy;":                             '\U0000041A',
-       "Kfr;":                             '\U0001D50E',
-       "Kopf;":                            '\U0001D542',
-       "Kscr;":                            '\U0001D4A6',
-       "LJcy;":                            '\U00000409',
-       "LT;":                              '\U0000003C',
-       "Lacute;":                          '\U00000139',
-       "Lambda;":                          '\U0000039B',
-       "Lang;":                            '\U000027EA',
-       "Laplacetrf;":                      '\U00002112',
-       "Larr;":                            '\U0000219E',
-       "Lcaron;":                          '\U0000013D',
-       "Lcedil;":                          '\U0000013B',
-       "Lcy;":                             '\U0000041B',
-       "LeftAngleBracket;":                '\U000027E8',
-       "LeftArrow;":                       '\U00002190',
-       "LeftArrowBar;":                    '\U000021E4',
-       "LeftArrowRightArrow;":             '\U000021C6',
-       "LeftCeiling;":                     '\U00002308',
-       "LeftDoubleBracket;":               '\U000027E6',
-       "LeftDownTeeVector;":               '\U00002961',
-       "LeftDownVector;":                  '\U000021C3',
-       "LeftDownVectorBar;":               '\U00002959',
-       "LeftFloor;":                       '\U0000230A',
-       "LeftRightArrow;":                  '\U00002194',
-       "LeftRightVector;":                 '\U0000294E',
-       "LeftTee;":                         '\U000022A3',
-       "LeftTeeArrow;":                    '\U000021A4',
-       "LeftTeeVector;":                   '\U0000295A',
-       "LeftTriangle;":                    '\U000022B2',
-       "LeftTriangleBar;":                 '\U000029CF',
-       "LeftTriangleEqual;":               '\U000022B4',
-       "LeftUpDownVector;":                '\U00002951',
-       "LeftUpTeeVector;":                 '\U00002960',
-       "LeftUpVector;":                    '\U000021BF',
-       "LeftUpVectorBar;":                 '\U00002958',
-       "LeftVector;":                      '\U000021BC',
-       "LeftVectorBar;":                   '\U00002952',
-       "Leftarrow;":                       '\U000021D0',
-       "Leftrightarrow;":                  '\U000021D4',
-       "LessEqualGreater;":                '\U000022DA',
-       "LessFullEqual;":                   '\U00002266',
-       "LessGreater;":                     '\U00002276',
-       "LessLess;":                        '\U00002AA1',
-       "LessSlantEqual;":                  '\U00002A7D',
-       "LessTilde;":                       '\U00002272',
-       "Lfr;":                             '\U0001D50F',
-       "Ll;":                              '\U000022D8',
-       "Lleftarrow;":                      '\U000021DA',
-       "Lmidot;":                          '\U0000013F',
-       "LongLeftArrow;":                   '\U000027F5',
-       "LongLeftRightArrow;":              '\U000027F7',
-       "LongRightArrow;":                  '\U000027F6',
-       "Longleftarrow;":                   '\U000027F8',
-       "Longleftrightarrow;":              '\U000027FA',
-       "Longrightarrow;":                  '\U000027F9',
-       "Lopf;":                            '\U0001D543',
-       "LowerLeftArrow;":                  '\U00002199',
-       "LowerRightArrow;":                 '\U00002198',
-       "Lscr;":                            '\U00002112',
-       "Lsh;":                             '\U000021B0',
-       "Lstrok;":                          '\U00000141',
-       "Lt;":                              '\U0000226A',
-       "Map;":                             '\U00002905',
-       "Mcy;":                             '\U0000041C',
-       "MediumSpace;":                     '\U0000205F',
-       "Mellintrf;":                       '\U00002133',
-       "Mfr;":                             '\U0001D510',
-       "MinusPlus;":                       '\U00002213',
-       "Mopf;":                            '\U0001D544',
-       "Mscr;":                            '\U00002133',
-       "Mu;":                              '\U0000039C',
-       "NJcy;":                            '\U0000040A',
-       "Nacute;":                          '\U00000143',
-       "Ncaron;":                          '\U00000147',
-       "Ncedil;":                          '\U00000145',
-       "Ncy;":                             '\U0000041D',
-       "NegativeMediumSpace;":             '\U0000200B',
-       "NegativeThickSpace;":              '\U0000200B',
-       "NegativeThinSpace;":               '\U0000200B',
-       "NegativeVeryThinSpace;":           '\U0000200B',
-       "NestedGreaterGreater;":            '\U0000226B',
-       "NestedLessLess;":                  '\U0000226A',
-       "NewLine;":                         '\U0000000A',
-       "Nfr;":                             '\U0001D511',
-       "NoBreak;":                         '\U00002060',
-       "NonBreakingSpace;":                '\U000000A0',
-       "Nopf;":                            '\U00002115',
-       "Not;":                             '\U00002AEC',
-       "NotCongruent;":                    '\U00002262',
-       "NotCupCap;":                       '\U0000226D',
-       "NotDoubleVerticalBar;":            '\U00002226',
-       "NotElement;":                      '\U00002209',
-       "NotEqual;":                        '\U00002260',
-       "NotExists;":                       '\U00002204',
-       "NotGreater;":                      '\U0000226F',
-       "NotGreaterEqual;":                 '\U00002271',
-       "NotGreaterLess;":                  '\U00002279',
-       "NotGreaterTilde;":                 '\U00002275',
-       "NotLeftTriangle;":                 '\U000022EA',
-       "NotLeftTriangleEqual;":            '\U000022EC',
-       "NotLess;":                         '\U0000226E',
-       "NotLessEqual;":                    '\U00002270',
-       "NotLessGreater;":                  '\U00002278',
-       "NotLessTilde;":                    '\U00002274',
-       "NotPrecedes;":                     '\U00002280',
-       "NotPrecedesSlantEqual;":           '\U000022E0',
-       "NotReverseElement;":               '\U0000220C',
-       "NotRightTriangle;":                '\U000022EB',
-       "NotRightTriangleEqual;":           '\U000022ED',
-       "NotSquareSubsetEqual;":            '\U000022E2',
-       "NotSquareSupersetEqual;":          '\U000022E3',
-       "NotSubsetEqual;":                  '\U00002288',
-       "NotSucceeds;":                     '\U00002281',
-       "NotSucceedsSlantEqual;":           '\U000022E1',
-       "NotSupersetEqual;":                '\U00002289',
-       "NotTilde;":                        '\U00002241',
-       "NotTildeEqual;":                   '\U00002244',
-       "NotTildeFullEqual;":               '\U00002247',
-       "NotTildeTilde;":                   '\U00002249',
-       "NotVerticalBar;":                  '\U00002224',
-       "Nscr;":                            '\U0001D4A9',
-       "Ntilde;":                          '\U000000D1',
-       "Nu;":                              '\U0000039D',
-       "OElig;":                           '\U00000152',
-       "Oacute;":                          '\U000000D3',
-       "Ocirc;":                           '\U000000D4',
-       "Ocy;":                             '\U0000041E',
-       "Odblac;":                          '\U00000150',
-       "Ofr;":                             '\U0001D512',
-       "Ograve;":                          '\U000000D2',
-       "Omacr;":                           '\U0000014C',
-       "Omega;":                           '\U000003A9',
-       "Omicron;":                         '\U0000039F',
-       "Oopf;":                            '\U0001D546',
-       "OpenCurlyDoubleQuote;":            '\U0000201C',
-       "OpenCurlyQuote;":                  '\U00002018',
-       "Or;":                              '\U00002A54',
-       "Oscr;":                            '\U0001D4AA',
-       "Oslash;":                          '\U000000D8',
-       "Otilde;":                          '\U000000D5',
-       "Otimes;":                          '\U00002A37',
-       "Ouml;":                            '\U000000D6',
-       "OverBar;":                         '\U0000203E',
-       "OverBrace;":                       '\U000023DE',
-       "OverBracket;":                     '\U000023B4',
-       "OverParenthesis;":                 '\U000023DC',
-       "PartialD;":                        '\U00002202',
-       "Pcy;":                             '\U0000041F',
-       "Pfr;":                             '\U0001D513',
-       "Phi;":                             '\U000003A6',
-       "Pi;":                              '\U000003A0',
-       "PlusMinus;":                       '\U000000B1',
-       "Poincareplane;":                   '\U0000210C',
-       "Popf;":                            '\U00002119',
-       "Pr;":                              '\U00002ABB',
-       "Precedes;":                        '\U0000227A',
-       "PrecedesEqual;":                   '\U00002AAF',
-       "PrecedesSlantEqual;":              '\U0000227C',
-       "PrecedesTilde;":                   '\U0000227E',
-       "Prime;":                           '\U00002033',
-       "Product;":                         '\U0000220F',
-       "Proportion;":                      '\U00002237',
-       "Proportional;":                    '\U0000221D',
-       "Pscr;":                            '\U0001D4AB',
-       "Psi;":                             '\U000003A8',
-       "QUOT;":                            '\U00000022',
-       "Qfr;":                             '\U0001D514',
-       "Qopf;":                            '\U0000211A',
-       "Qscr;":                            '\U0001D4AC',
-       "RBarr;":                           '\U00002910',
-       "REG;":                             '\U000000AE',
-       "Racute;":                          '\U00000154',
-       "Rang;":                            '\U000027EB',
-       "Rarr;":                            '\U000021A0',
-       "Rarrtl;":                          '\U00002916',
-       "Rcaron;":                          '\U00000158',
-       "Rcedil;":                          '\U00000156',
-       "Rcy;":                             '\U00000420',
-       "Re;":                              '\U0000211C',
-       "ReverseElement;":                  '\U0000220B',
-       "ReverseEquilibrium;":              '\U000021CB',
-       "ReverseUpEquilibrium;":            '\U0000296F',
-       "Rfr;":                             '\U0000211C',
-       "Rho;":                             '\U000003A1',
-       "RightAngleBracket;":               '\U000027E9',
-       "RightArrow;":                      '\U00002192',
-       "RightArrowBar;":                   '\U000021E5',
-       "RightArrowLeftArrow;":             '\U000021C4',
-       "RightCeiling;":                    '\U00002309',
-       "RightDoubleBracket;":              '\U000027E7',
-       "RightDownTeeVector;":              '\U0000295D',
-       "RightDownVector;":                 '\U000021C2',
-       "RightDownVectorBar;":              '\U00002955',
-       "RightFloor;":                      '\U0000230B',
-       "RightTee;":                        '\U000022A2',
-       "RightTeeArrow;":                   '\U000021A6',
-       "RightTeeVector;":                  '\U0000295B',
-       "RightTriangle;":                   '\U000022B3',
-       "RightTriangleBar;":                '\U000029D0',
-       "RightTriangleEqual;":              '\U000022B5',
-       "RightUpDownVector;":               '\U0000294F',
-       "RightUpTeeVector;":                '\U0000295C',
-       "RightUpVector;":                   '\U000021BE',
-       "RightUpVectorBar;":                '\U00002954',
-       "RightVector;":                     '\U000021C0',
-       "RightVectorBar;":                  '\U00002953',
-       "Rightarrow;":                      '\U000021D2',
-       "Ropf;":                            '\U0000211D',
-       "RoundImplies;":                    '\U00002970',
-       "Rrightarrow;":                     '\U000021DB',
-       "Rscr;":                            '\U0000211B',
-       "Rsh;":                             '\U000021B1',
-       "RuleDelayed;":                     '\U000029F4',
-       "SHCHcy;":                          '\U00000429',
-       "SHcy;":                            '\U00000428',
-       "SOFTcy;":                          '\U0000042C',
-       "Sacute;":                          '\U0000015A',
-       "Sc;":                              '\U00002ABC',
-       "Scaron;":                          '\U00000160',
-       "Scedil;":                          '\U0000015E',
-       "Scirc;":                           '\U0000015C',
-       "Scy;":                             '\U00000421',
-       "Sfr;":                             '\U0001D516',
-       "ShortDownArrow;":                  '\U00002193',
-       "ShortLeftArrow;":                  '\U00002190',
-       "ShortRightArrow;":                 '\U00002192',
-       "ShortUpArrow;":                    '\U00002191',
-       "Sigma;":                           '\U000003A3',
-       "SmallCircle;":                     '\U00002218',
-       "Sopf;":                            '\U0001D54A',
-       "Sqrt;":                            '\U0000221A',
-       "Square;":                          '\U000025A1',
-       "SquareIntersection;":              '\U00002293',
-       "SquareSubset;":                    '\U0000228F',
-       "SquareSubsetEqual;":               '\U00002291',
-       "SquareSuperset;":                  '\U00002290',
-       "SquareSupersetEqual;":             '\U00002292',
-       "SquareUnion;":                     '\U00002294',
-       "Sscr;":                            '\U0001D4AE',
-       "Star;":                            '\U000022C6',
-       "Sub;":                             '\U000022D0',
-       "Subset;":                          '\U000022D0',
-       "SubsetEqual;":                     '\U00002286',
-       "Succeeds;":                        '\U0000227B',
-       "SucceedsEqual;":                   '\U00002AB0',
-       "SucceedsSlantEqual;":              '\U0000227D',
-       "SucceedsTilde;":                   '\U0000227F',
-       "SuchThat;":                        '\U0000220B',
-       "Sum;":                             '\U00002211',
-       "Sup;":                             '\U000022D1',
-       "Superset;":                        '\U00002283',
-       "SupersetEqual;":                   '\U00002287',
-       "Supset;":                          '\U000022D1',
-       "THORN;":                           '\U000000DE',
-       "TRADE;":                           '\U00002122',
-       "TSHcy;":                           '\U0000040B',
-       "TScy;":                            '\U00000426',
-       "Tab;":                             '\U00000009',
-       "Tau;":                             '\U000003A4',
-       "Tcaron;":                          '\U00000164',
-       "Tcedil;":                          '\U00000162',
-       "Tcy;":                             '\U00000422',
-       "Tfr;":                             '\U0001D517',
-       "Therefore;":                       '\U00002234',
-       "Theta;":                           '\U00000398',
-       "ThinSpace;":                       '\U00002009',
-       "Tilde;":                           '\U0000223C',
-       "TildeEqual;":                      '\U00002243',
-       "TildeFullEqual;":                  '\U00002245',
-       "TildeTilde;":                      '\U00002248',
-       "Topf;":                            '\U0001D54B',
-       "TripleDot;":                       '\U000020DB',
-       "Tscr;":                            '\U0001D4AF',
-       "Tstrok;":                          '\U00000166',
-       "Uacute;":                          '\U000000DA',
-       "Uarr;":                            '\U0000219F',
-       "Uarrocir;":                        '\U00002949',
-       "Ubrcy;":                           '\U0000040E',
-       "Ubreve;":                          '\U0000016C',
-       "Ucirc;":                           '\U000000DB',
-       "Ucy;":                             '\U00000423',
-       "Udblac;":                          '\U00000170',
-       "Ufr;":                             '\U0001D518',
-       "Ugrave;":                          '\U000000D9',
-       "Umacr;":                           '\U0000016A',
-       "UnderBar;":                        '\U0000005F',
-       "UnderBrace;":                      '\U000023DF',
-       "UnderBracket;":                    '\U000023B5',
-       "UnderParenthesis;":                '\U000023DD',
-       "Union;":                           '\U000022C3',
-       "UnionPlus;":                       '\U0000228E',
-       "Uogon;":                           '\U00000172',
-       "Uopf;":                            '\U0001D54C',
-       "UpArrow;":                         '\U00002191',
-       "UpArrowBar;":                      '\U00002912',
-       "UpArrowDownArrow;":                '\U000021C5',
-       "UpDownArrow;":                     '\U00002195',
-       "UpEquilibrium;":                   '\U0000296E',
-       "UpTee;":                           '\U000022A5',
-       "UpTeeArrow;":                      '\U000021A5',
-       "Uparrow;":                         '\U000021D1',
-       "Updownarrow;":                     '\U000021D5',
-       "UpperLeftArrow;":                  '\U00002196',
-       "UpperRightArrow;":                 '\U00002197',
-       "Upsi;":                            '\U000003D2',
-       "Upsilon;":                         '\U000003A5',
-       "Uring;":                           '\U0000016E',
-       "Uscr;":                            '\U0001D4B0',
-       "Utilde;":                          '\U00000168',
-       "Uuml;":                            '\U000000DC',
-       "VDash;":                           '\U000022AB',
-       "Vbar;":                            '\U00002AEB',
-       "Vcy;":                             '\U00000412',
-       "Vdash;":                           '\U000022A9',
-       "Vdashl;":                          '\U00002AE6',
-       "Vee;":                             '\U000022C1',
-       "Verbar;":                          '\U00002016',
-       "Vert;":                            '\U00002016',
-       "VerticalBar;":                     '\U00002223',
-       "VerticalLine;":                    '\U0000007C',
-       "VerticalSeparator;":               '\U00002758',
-       "VerticalTilde;":                   '\U00002240',
-       "VeryThinSpace;":                   '\U0000200A',
-       "Vfr;":                             '\U0001D519',
-       "Vopf;":                            '\U0001D54D',
-       "Vscr;":                            '\U0001D4B1',
-       "Vvdash;":                          '\U000022AA',
-       "Wcirc;":                           '\U00000174',
-       "Wedge;":                           '\U000022C0',
-       "Wfr;":                             '\U0001D51A',
-       "Wopf;":                            '\U0001D54E',
-       "Wscr;":                            '\U0001D4B2',
-       "Xfr;":                             '\U0001D51B',
-       "Xi;":                              '\U0000039E',
-       "Xopf;":                            '\U0001D54F',
-       "Xscr;":                            '\U0001D4B3',
-       "YAcy;":                            '\U0000042F',
-       "YIcy;":                            '\U00000407',
-       "YUcy;":                            '\U0000042E',
-       "Yacute;":                          '\U000000DD',
-       "Ycirc;":                           '\U00000176',
-       "Ycy;":                             '\U0000042B',
-       "Yfr;":                             '\U0001D51C',
-       "Yopf;":                            '\U0001D550',
-       "Yscr;":                            '\U0001D4B4',
-       "Yuml;":                            '\U00000178',
-       "ZHcy;":                            '\U00000416',
-       "Zacute;":                          '\U00000179',
-       "Zcaron;":                          '\U0000017D',
-       "Zcy;":                             '\U00000417',
-       "Zdot;":                            '\U0000017B',
-       "ZeroWidthSpace;":                  '\U0000200B',
-       "Zeta;":                            '\U00000396',
-       "Zfr;":                             '\U00002128',
-       "Zopf;":                            '\U00002124',
-       "Zscr;":                            '\U0001D4B5',
-       "aacute;":                          '\U000000E1',
-       "abreve;":                          '\U00000103',
-       "ac;":                              '\U0000223E',
-       "acd;":                             '\U0000223F',
-       "acirc;":                           '\U000000E2',
-       "acute;":                           '\U000000B4',
-       "acy;":                             '\U00000430',
-       "aelig;":                           '\U000000E6',
-       "af;":                              '\U00002061',
-       "afr;":                             '\U0001D51E',
-       "agrave;":                          '\U000000E0',
-       "alefsym;":                         '\U00002135',
-       "aleph;":                           '\U00002135',
-       "alpha;":                           '\U000003B1',
-       "amacr;":                           '\U00000101',
-       "amalg;":                           '\U00002A3F',
-       "amp;":                             '\U00000026',
-       "and;":                             '\U00002227',
-       "andand;":                          '\U00002A55',
-       "andd;":                            '\U00002A5C',
-       "andslope;":                        '\U00002A58',
-       "andv;":                            '\U00002A5A',
-       "ang;":                             '\U00002220',
-       "ange;":                            '\U000029A4',
-       "angle;":                           '\U00002220',
-       "angmsd;":                          '\U00002221',
-       "angmsdaa;":                        '\U000029A8',
-       "angmsdab;":                        '\U000029A9',
-       "angmsdac;":                        '\U000029AA',
-       "angmsdad;":                        '\U000029AB',
-       "angmsdae;":                        '\U000029AC',
-       "angmsdaf;":                        '\U000029AD',
-       "angmsdag;":                        '\U000029AE',
-       "angmsdah;":                        '\U000029AF',
-       "angrt;":                           '\U0000221F',
-       "angrtvb;":                         '\U000022BE',
-       "angrtvbd;":                        '\U0000299D',
-       "angsph;":                          '\U00002222',
-       "angst;":                           '\U000000C5',
-       "angzarr;":                         '\U0000237C',
-       "aogon;":                           '\U00000105',
-       "aopf;":                            '\U0001D552',
-       "ap;":                              '\U00002248',
-       "apE;":                             '\U00002A70',
-       "apacir;":                          '\U00002A6F',
-       "ape;":                             '\U0000224A',
-       "apid;":                            '\U0000224B',
-       "apos;":                            '\U00000027',
-       "approx;":                          '\U00002248',
-       "approxeq;":                        '\U0000224A',
-       "aring;":                           '\U000000E5',
-       "ascr;":                            '\U0001D4B6',
-       "ast;":                             '\U0000002A',
-       "asymp;":                           '\U00002248',
-       "asympeq;":                         '\U0000224D',
-       "atilde;":                          '\U000000E3',
-       "auml;":                            '\U000000E4',
-       "awconint;":                        '\U00002233',
-       "awint;":                           '\U00002A11',
-       "bNot;":                            '\U00002AED',
-       "backcong;":                        '\U0000224C',
-       "backepsilon;":                     '\U000003F6',
-       "backprime;":                       '\U00002035',
-       "backsim;":                         '\U0000223D',
-       "backsimeq;":                       '\U000022CD',
-       "barvee;":                          '\U000022BD',
-       "barwed;":                          '\U00002305',
-       "barwedge;":                        '\U00002305',
-       "bbrk;":                            '\U000023B5',
-       "bbrktbrk;":                        '\U000023B6',
-       "bcong;":                           '\U0000224C',
-       "bcy;":                             '\U00000431',
-       "bdquo;":                           '\U0000201E',
-       "becaus;":                          '\U00002235',
-       "because;":                         '\U00002235',
-       "bemptyv;":                         '\U000029B0',
-       "bepsi;":                           '\U000003F6',
-       "bernou;":                          '\U0000212C',
-       "beta;":                            '\U000003B2',
-       "beth;":                            '\U00002136',
-       "between;":                         '\U0000226C',
-       "bfr;":                             '\U0001D51F',
-       "bigcap;":                          '\U000022C2',
-       "bigcirc;":                         '\U000025EF',
-       "bigcup;":                          '\U000022C3',
-       "bigodot;":                         '\U00002A00',
-       "bigoplus;":                        '\U00002A01',
-       "bigotimes;":                       '\U00002A02',
-       "bigsqcup;":                        '\U00002A06',
-       "bigstar;":                         '\U00002605',
-       "bigtriangledown;":                 '\U000025BD',
-       "bigtriangleup;":                   '\U000025B3',
-       "biguplus;":                        '\U00002A04',
-       "bigvee;":                          '\U000022C1',
-       "bigwedge;":                        '\U000022C0',
-       "bkarow;":                          '\U0000290D',
-       "blacklozenge;":                    '\U000029EB',
-       "blacksquare;":                     '\U000025AA',
-       "blacktriangle;":                   '\U000025B4',
-       "blacktriangledown;":               '\U000025BE',
-       "blacktriangleleft;":               '\U000025C2',
-       "blacktriangleright;":              '\U000025B8',
-       "blank;":                           '\U00002423',
-       "blk12;":                           '\U00002592',
-       "blk14;":                           '\U00002591',
-       "blk34;":                           '\U00002593',
-       "block;":                           '\U00002588',
-       "bnot;":                            '\U00002310',
-       "bopf;":                            '\U0001D553',
-       "bot;":                             '\U000022A5',
-       "bottom;":                          '\U000022A5',
-       "bowtie;":                          '\U000022C8',
-       "boxDL;":                           '\U00002557',
-       "boxDR;":                           '\U00002554',
-       "boxDl;":                           '\U00002556',
-       "boxDr;":                           '\U00002553',
-       "boxH;":                            '\U00002550',
-       "boxHD;":                           '\U00002566',
-       "boxHU;":                           '\U00002569',
-       "boxHd;":                           '\U00002564',
-       "boxHu;":                           '\U00002567',
-       "boxUL;":                           '\U0000255D',
-       "boxUR;":                           '\U0000255A',
-       "boxUl;":                           '\U0000255C',
-       "boxUr;":                           '\U00002559',
-       "boxV;":                            '\U00002551',
-       "boxVH;":                           '\U0000256C',
-       "boxVL;":                           '\U00002563',
-       "boxVR;":                           '\U00002560',
-       "boxVh;":                           '\U0000256B',
-       "boxVl;":                           '\U00002562',
-       "boxVr;":                           '\U0000255F',
-       "boxbox;":                          '\U000029C9',
-       "boxdL;":                           '\U00002555',
-       "boxdR;":                           '\U00002552',
-       "boxdl;":                           '\U00002510',
-       "boxdr;":                           '\U0000250C',
-       "boxh;":                            '\U00002500',
-       "boxhD;":                           '\U00002565',
-       "boxhU;":                           '\U00002568',
-       "boxhd;":                           '\U0000252C',
-       "boxhu;":                           '\U00002534',
-       "boxminus;":                        '\U0000229F',
-       "boxplus;":                         '\U0000229E',
-       "boxtimes;":                        '\U000022A0',
-       "boxuL;":                           '\U0000255B',
-       "boxuR;":                           '\U00002558',
-       "boxul;":                           '\U00002518',
-       "boxur;":                           '\U00002514',
-       "boxv;":                            '\U00002502',
-       "boxvH;":                           '\U0000256A',
-       "boxvL;":                           '\U00002561',
-       "boxvR;":                           '\U0000255E',
-       "boxvh;":                           '\U0000253C',
-       "boxvl;":                           '\U00002524',
-       "boxvr;":                           '\U0000251C',
-       "bprime;":                          '\U00002035',
-       "breve;":                           '\U000002D8',
-       "brvbar;":                          '\U000000A6',
-       "bscr;":                            '\U0001D4B7',
-       "bsemi;":                           '\U0000204F',
-       "bsim;":                            '\U0000223D',
-       "bsime;":                           '\U000022CD',
-       "bsol;":                            '\U0000005C',
-       "bsolb;":                           '\U000029C5',
-       "bsolhsub;":                        '\U000027C8',
-       "bull;":                            '\U00002022',
-       "bullet;":                          '\U00002022',
-       "bump;":                            '\U0000224E',
-       "bumpE;":                           '\U00002AAE',
-       "bumpe;":                           '\U0000224F',
-       "bumpeq;":                          '\U0000224F',
-       "cacute;":                          '\U00000107',
-       "cap;":                             '\U00002229',
-       "capand;":                          '\U00002A44',
-       "capbrcup;":                        '\U00002A49',
-       "capcap;":                          '\U00002A4B',
-       "capcup;":                          '\U00002A47',
-       "capdot;":                          '\U00002A40',
-       "caret;":                           '\U00002041',
-       "caron;":                           '\U000002C7',
-       "ccaps;":                           '\U00002A4D',
-       "ccaron;":                          '\U0000010D',
-       "ccedil;":                          '\U000000E7',
-       "ccirc;":                           '\U00000109',
-       "ccups;":                           '\U00002A4C',
-       "ccupssm;":                         '\U00002A50',
-       "cdot;":                            '\U0000010B',
-       "cedil;":                           '\U000000B8',
-       "cemptyv;":                         '\U000029B2',
-       "cent;":                            '\U000000A2',
-       "centerdot;":                       '\U000000B7',
-       "cfr;":                             '\U0001D520',
-       "chcy;":                            '\U00000447',
-       "check;":                           '\U00002713',
-       "checkmark;":                       '\U00002713',
-       "chi;":                             '\U000003C7',
-       "cir;":                             '\U000025CB',
-       "cirE;":                            '\U000029C3',
-       "circ;":                            '\U000002C6',
-       "circeq;":                          '\U00002257',
-       "circlearrowleft;":                 '\U000021BA',
-       "circlearrowright;":                '\U000021BB',
-       "circledR;":                        '\U000000AE',
-       "circledS;":                        '\U000024C8',
-       "circledast;":                      '\U0000229B',
-       "circledcirc;":                     '\U0000229A',
-       "circleddash;":                     '\U0000229D',
-       "cire;":                            '\U00002257',
-       "cirfnint;":                        '\U00002A10',
-       "cirmid;":                          '\U00002AEF',
-       "cirscir;":                         '\U000029C2',
-       "clubs;":                           '\U00002663',
-       "clubsuit;":                        '\U00002663',
-       "colon;":                           '\U0000003A',
-       "colone;":                          '\U00002254',
-       "coloneq;":                         '\U00002254',
-       "comma;":                           '\U0000002C',
-       "commat;":                          '\U00000040',
-       "comp;":                            '\U00002201',
-       "compfn;":                          '\U00002218',
-       "complement;":                      '\U00002201',
-       "complexes;":                       '\U00002102',
-       "cong;":                            '\U00002245',
-       "congdot;":                         '\U00002A6D',
-       "conint;":                          '\U0000222E',
-       "copf;":                            '\U0001D554',
-       "coprod;":                          '\U00002210',
-       "copy;":                            '\U000000A9',
-       "copysr;":                          '\U00002117',
-       "crarr;":                           '\U000021B5',
-       "cross;":                           '\U00002717',
-       "cscr;":                            '\U0001D4B8',
-       "csub;":                            '\U00002ACF',
-       "csube;":                           '\U00002AD1',
-       "csup;":                            '\U00002AD0',
-       "csupe;":                           '\U00002AD2',
-       "ctdot;":                           '\U000022EF',
-       "cudarrl;":                         '\U00002938',
-       "cudarrr;":                         '\U00002935',
-       "cuepr;":                           '\U000022DE',
-       "cuesc;":                           '\U000022DF',
-       "cularr;":                          '\U000021B6',
-       "cularrp;":                         '\U0000293D',
-       "cup;":                             '\U0000222A',
-       "cupbrcap;":                        '\U00002A48',
-       "cupcap;":                          '\U00002A46',
-       "cupcup;":                          '\U00002A4A',
-       "cupdot;":                          '\U0000228D',
-       "cupor;":                           '\U00002A45',
-       "curarr;":                          '\U000021B7',
-       "curarrm;":                         '\U0000293C',
-       "curlyeqprec;":                     '\U000022DE',
-       "curlyeqsucc;":                     '\U000022DF',
-       "curlyvee;":                        '\U000022CE',
-       "curlywedge;":                      '\U000022CF',
-       "curren;":                          '\U000000A4',
-       "curvearrowleft;":                  '\U000021B6',
-       "curvearrowright;":                 '\U000021B7',
-       "cuvee;":                           '\U000022CE',
-       "cuwed;":                           '\U000022CF',
-       "cwconint;":                        '\U00002232',
-       "cwint;":                           '\U00002231',
-       "cylcty;":                          '\U0000232D',
-       "dArr;":                            '\U000021D3',
-       "dHar;":                            '\U00002965',
-       "dagger;":                          '\U00002020',
-       "daleth;":                          '\U00002138',
-       "darr;":                            '\U00002193',
-       "dash;":                            '\U00002010',
-       "dashv;":                           '\U000022A3',
-       "dbkarow;":                         '\U0000290F',
-       "dblac;":                           '\U000002DD',
-       "dcaron;":                          '\U0000010F',
-       "dcy;":                             '\U00000434',
-       "dd;":                              '\U00002146',
-       "ddagger;":                         '\U00002021',
-       "ddarr;":                           '\U000021CA',
-       "ddotseq;":                         '\U00002A77',
-       "deg;":                             '\U000000B0',
-       "delta;":                           '\U000003B4',
-       "demptyv;":                         '\U000029B1',
-       "dfisht;":                          '\U0000297F',
-       "dfr;":                             '\U0001D521',
-       "dharl;":                           '\U000021C3',
-       "dharr;":                           '\U000021C2',
-       "diam;":                            '\U000022C4',
-       "diamond;":                         '\U000022C4',
-       "diamondsuit;":                     '\U00002666',
-       "diams;":                           '\U00002666',
-       "die;":                             '\U000000A8',
-       "digamma;":                         '\U000003DD',
-       "disin;":                           '\U000022F2',
-       "div;":                             '\U000000F7',
-       "divide;":                          '\U000000F7',
-       "divideontimes;":                   '\U000022C7',
-       "divonx;":                          '\U000022C7',
-       "djcy;":                            '\U00000452',
-       "dlcorn;":                          '\U0000231E',
-       "dlcrop;":                          '\U0000230D',
-       "dollar;":                          '\U00000024',
-       "dopf;":                            '\U0001D555',
-       "dot;":                             '\U000002D9',
-       "doteq;":                           '\U00002250',
-       "doteqdot;":                        '\U00002251',
-       "dotminus;":                        '\U00002238',
-       "dotplus;":                         '\U00002214',
-       "dotsquare;":                       '\U000022A1',
-       "doublebarwedge;":                  '\U00002306',
-       "downarrow;":                       '\U00002193',
-       "downdownarrows;":                  '\U000021CA',
-       "downharpoonleft;":                 '\U000021C3',
-       "downharpoonright;":                '\U000021C2',
-       "drbkarow;":                        '\U00002910',
-       "drcorn;":                          '\U0000231F',
-       "drcrop;":                          '\U0000230C',
-       "dscr;":                            '\U0001D4B9',
-       "dscy;":                            '\U00000455',
-       "dsol;":                            '\U000029F6',
-       "dstrok;":                          '\U00000111',
-       "dtdot;":                           '\U000022F1',
-       "dtri;":                            '\U000025BF',
-       "dtrif;":                           '\U000025BE',
-       "duarr;":                           '\U000021F5',
-       "duhar;":                           '\U0000296F',
-       "dwangle;":                         '\U000029A6',
-       "dzcy;":                            '\U0000045F',
-       "dzigrarr;":                        '\U000027FF',
-       "eDDot;":                           '\U00002A77',
-       "eDot;":                            '\U00002251',
-       "eacute;":                          '\U000000E9',
-       "easter;":                          '\U00002A6E',
-       "ecaron;":                          '\U0000011B',
-       "ecir;":                            '\U00002256',
-       "ecirc;":                           '\U000000EA',
-       "ecolon;":                          '\U00002255',
-       "ecy;":                             '\U0000044D',
-       "edot;":                            '\U00000117',
-       "ee;":                              '\U00002147',
-       "efDot;":                           '\U00002252',
-       "efr;":                             '\U0001D522',
-       "eg;":                              '\U00002A9A',
-       "egrave;":                          '\U000000E8',
-       "egs;":                             '\U00002A96',
-       "egsdot;":                          '\U00002A98',
-       "el;":                              '\U00002A99',
-       "elinters;":                        '\U000023E7',
-       "ell;":                             '\U00002113',
-       "els;":                             '\U00002A95',
-       "elsdot;":                          '\U00002A97',
-       "emacr;":                           '\U00000113',
-       "empty;":                           '\U00002205',
-       "emptyset;":                        '\U00002205',
-       "emptyv;":                          '\U00002205',
-       "emsp;":                            '\U00002003',
-       "emsp13;":                          '\U00002004',
-       "emsp14;":                          '\U00002005',
-       "eng;":                             '\U0000014B',
-       "ensp;":                            '\U00002002',
-       "eogon;":                           '\U00000119',
-       "eopf;":                            '\U0001D556',
-       "epar;":                            '\U000022D5',
-       "eparsl;":                          '\U000029E3',
-       "eplus;":                           '\U00002A71',
-       "epsi;":                            '\U000003B5',
-       "epsilon;":                         '\U000003B5',
-       "epsiv;":                           '\U000003F5',
-       "eqcirc;":                          '\U00002256',
-       "eqcolon;":                         '\U00002255',
-       "eqsim;":                           '\U00002242',
-       "eqslantgtr;":                      '\U00002A96',
-       "eqslantless;":                     '\U00002A95',
-       "equals;":                          '\U0000003D',
-       "equest;":                          '\U0000225F',
-       "equiv;":                           '\U00002261',
-       "equivDD;":                         '\U00002A78',
-       "eqvparsl;":                        '\U000029E5',
-       "erDot;":                           '\U00002253',
-       "erarr;":                           '\U00002971',
-       "escr;":                            '\U0000212F',
-       "esdot;":                           '\U00002250',
-       "esim;":                            '\U00002242',
-       "eta;":                             '\U000003B7',
-       "eth;":                             '\U000000F0',
-       "euml;":                            '\U000000EB',
-       "euro;":                            '\U000020AC',
-       "excl;":                            '\U00000021',
-       "exist;":                           '\U00002203',
-       "expectation;":                     '\U00002130',
-       "exponentiale;":                    '\U00002147',
-       "fallingdotseq;":                   '\U00002252',
-       "fcy;":                             '\U00000444',
-       "female;":                          '\U00002640',
-       "ffilig;":                          '\U0000FB03',
-       "fflig;":                           '\U0000FB00',
-       "ffllig;":                          '\U0000FB04',
-       "ffr;":                             '\U0001D523',
-       "filig;":                           '\U0000FB01',
-       "flat;":                            '\U0000266D',
-       "fllig;":                           '\U0000FB02',
-       "fltns;":                           '\U000025B1',
-       "fnof;":                            '\U00000192',
-       "fopf;":                            '\U0001D557',
-       "forall;":                          '\U00002200',
-       "fork;":                            '\U000022D4',
-       "forkv;":                           '\U00002AD9',
-       "fpartint;":                        '\U00002A0D',
-       "frac12;":                          '\U000000BD',
-       "frac13;":                          '\U00002153',
-       "frac14;":                          '\U000000BC',
-       "frac15;":                          '\U00002155',
-       "frac16;":                          '\U00002159',
-       "frac18;":                          '\U0000215B',
-       "frac23;":                          '\U00002154',
-       "frac25;":                          '\U00002156',
-       "frac34;":                          '\U000000BE',
-       "frac35;":                          '\U00002157',
-       "frac38;":                          '\U0000215C',
-       "frac45;":                          '\U00002158',
-       "frac56;":                          '\U0000215A',
-       "frac58;":                          '\U0000215D',
-       "frac78;":                          '\U0000215E',
-       "frasl;":                           '\U00002044',
-       "frown;":                           '\U00002322',
-       "fscr;":                            '\U0001D4BB',
-       "gE;":                              '\U00002267',
-       "gEl;":                             '\U00002A8C',
-       "gacute;":                          '\U000001F5',
-       "gamma;":                           '\U000003B3',
-       "gammad;":                          '\U000003DD',
-       "gap;":                             '\U00002A86',
-       "gbreve;":                          '\U0000011F',
-       "gcirc;":                           '\U0000011D',
-       "gcy;":                             '\U00000433',
-       "gdot;":                            '\U00000121',
-       "ge;":                              '\U00002265',
-       "gel;":                             '\U000022DB',
-       "geq;":                             '\U00002265',
-       "geqq;":                            '\U00002267',
-       "geqslant;":                        '\U00002A7E',
-       "ges;":                             '\U00002A7E',
-       "gescc;":                           '\U00002AA9',
-       "gesdot;":                          '\U00002A80',
-       "gesdoto;":                         '\U00002A82',
-       "gesdotol;":                        '\U00002A84',
-       "gesles;":                          '\U00002A94',
-       "gfr;":                             '\U0001D524',
-       "gg;":                              '\U0000226B',
-       "ggg;":                             '\U000022D9',
-       "gimel;":                           '\U00002137',
-       "gjcy;":                            '\U00000453',
-       "gl;":                              '\U00002277',
-       "glE;":                             '\U00002A92',
-       "gla;":                             '\U00002AA5',
-       "glj;":                             '\U00002AA4',
-       "gnE;":                             '\U00002269',
-       "gnap;":                            '\U00002A8A',
-       "gnapprox;":                        '\U00002A8A',
-       "gne;":                             '\U00002A88',
-       "gneq;":                            '\U00002A88',
-       "gneqq;":                           '\U00002269',
-       "gnsim;":                           '\U000022E7',
-       "gopf;":                            '\U0001D558',
-       "grave;":                           '\U00000060',
-       "gscr;":                            '\U0000210A',
-       "gsim;":                            '\U00002273',
-       "gsime;":                           '\U00002A8E',
-       "gsiml;":                           '\U00002A90',
-       "gt;":                              '\U0000003E',
-       "gtcc;":                            '\U00002AA7',
-       "gtcir;":                           '\U00002A7A',
-       "gtdot;":                           '\U000022D7',
-       "gtlPar;":                          '\U00002995',
-       "gtquest;":                         '\U00002A7C',
-       "gtrapprox;":                       '\U00002A86',
-       "gtrarr;":                          '\U00002978',
-       "gtrdot;":                          '\U000022D7',
-       "gtreqless;":                       '\U000022DB',
-       "gtreqqless;":                      '\U00002A8C',
-       "gtrless;":                         '\U00002277',
-       "gtrsim;":                          '\U00002273',
-       "hArr;":                            '\U000021D4',
-       "hairsp;":                          '\U0000200A',
-       "half;":                            '\U000000BD',
-       "hamilt;":                          '\U0000210B',
-       "hardcy;":                          '\U0000044A',
-       "harr;":                            '\U00002194',
-       "harrcir;":                         '\U00002948',
-       "harrw;":                           '\U000021AD',
-       "hbar;":                            '\U0000210F',
-       "hcirc;":                           '\U00000125',
-       "hearts;":                          '\U00002665',
-       "heartsuit;":                       '\U00002665',
-       "hellip;":                          '\U00002026',
-       "hercon;":                          '\U000022B9',
-       "hfr;":                             '\U0001D525',
-       "hksearow;":                        '\U00002925',
-       "hkswarow;":                        '\U00002926',
-       "hoarr;":                           '\U000021FF',
-       "homtht;":                          '\U0000223B',
-       "hookleftarrow;":                   '\U000021A9',
-       "hookrightarrow;":                  '\U000021AA',
-       "hopf;":                            '\U0001D559',
-       "horbar;":                          '\U00002015',
-       "hscr;":                            '\U0001D4BD',
-       "hslash;":                          '\U0000210F',
-       "hstrok;":                          '\U00000127',
-       "hybull;":                          '\U00002043',
-       "hyphen;":                          '\U00002010',
-       "iacute;":                          '\U000000ED',
-       "ic;":                              '\U00002063',
-       "icirc;":                           '\U000000EE',
-       "icy;":                             '\U00000438',
-       "iecy;":                            '\U00000435',
-       "iexcl;":                           '\U000000A1',
-       "iff;":                             '\U000021D4',
-       "ifr;":                             '\U0001D526',
-       "igrave;":                          '\U000000EC',
-       "ii;":                              '\U00002148',
-       "iiiint;":                          '\U00002A0C',
-       "iiint;":                           '\U0000222D',
-       "iinfin;":                          '\U000029DC',
-       "iiota;":                           '\U00002129',
-       "ijlig;":                           '\U00000133',
-       "imacr;":                           '\U0000012B',
-       "image;":                           '\U00002111',
-       "imagline;":                        '\U00002110',
-       "imagpart;":                        '\U00002111',
-       "imath;":                           '\U00000131',
-       "imof;":                            '\U000022B7',
-       "imped;":                           '\U000001B5',
-       "in;":                              '\U00002208',
-       "incare;":                          '\U00002105',
-       "infin;":                           '\U0000221E',
-       "infintie;":                        '\U000029DD',
-       "inodot;":                          '\U00000131',
-       "int;":                             '\U0000222B',
-       "intcal;":                          '\U000022BA',
-       "integers;":                        '\U00002124',
-       "intercal;":                        '\U000022BA',
-       "intlarhk;":                        '\U00002A17',
-       "intprod;":                         '\U00002A3C',
-       "iocy;":                            '\U00000451',
-       "iogon;":                           '\U0000012F',
-       "iopf;":                            '\U0001D55A',
-       "iota;":                            '\U000003B9',
-       "iprod;":                           '\U00002A3C',
-       "iquest;":                          '\U000000BF',
-       "iscr;":                            '\U0001D4BE',
-       "isin;":                            '\U00002208',
-       "isinE;":                           '\U000022F9',
-       "isindot;":                         '\U000022F5',
-       "isins;":                           '\U000022F4',
-       "isinsv;":                          '\U000022F3',
-       "isinv;":                           '\U00002208',
-       "it;":                              '\U00002062',
-       "itilde;":                          '\U00000129',
-       "iukcy;":                           '\U00000456',
-       "iuml;":                            '\U000000EF',
-       "jcirc;":                           '\U00000135',
-       "jcy;":                             '\U00000439',
-       "jfr;":                             '\U0001D527',
-       "jmath;":                           '\U00000237',
-       "jopf;":                            '\U0001D55B',
-       "jscr;":                            '\U0001D4BF',
-       "jsercy;":                          '\U00000458',
-       "jukcy;":                           '\U00000454',
-       "kappa;":                           '\U000003BA',
-       "kappav;":                          '\U000003F0',
-       "kcedil;":                          '\U00000137',
-       "kcy;":                             '\U0000043A',
-       "kfr;":                             '\U0001D528',
-       "kgreen;":                          '\U00000138',
-       "khcy;":                            '\U00000445',
-       "kjcy;":                            '\U0000045C',
-       "kopf;":                            '\U0001D55C',
-       "kscr;":                            '\U0001D4C0',
-       "lAarr;":                           '\U000021DA',
-       "lArr;":                            '\U000021D0',
-       "lAtail;":                          '\U0000291B',
-       "lBarr;":                           '\U0000290E',
-       "lE;":                              '\U00002266',
-       "lEg;":                             '\U00002A8B',
-       "lHar;":                            '\U00002962',
-       "lacute;":                          '\U0000013A',
-       "laemptyv;":                        '\U000029B4',
-       "lagran;":                          '\U00002112',
-       "lambda;":                          '\U000003BB',
-       "lang;":                            '\U000027E8',
-       "langd;":                           '\U00002991',
-       "langle;":                          '\U000027E8',
-       "lap;":                             '\U00002A85',
-       "laquo;":                           '\U000000AB',
-       "larr;":                            '\U00002190',
-       "larrb;":                           '\U000021E4',
-       "larrbfs;":                         '\U0000291F',
-       "larrfs;":                          '\U0000291D',
-       "larrhk;":                          '\U000021A9',
-       "larrlp;":                          '\U000021AB',
-       "larrpl;":                          '\U00002939',
-       "larrsim;":                         '\U00002973',
-       "larrtl;":                          '\U000021A2',
-       "lat;":                             '\U00002AAB',
-       "latail;":                          '\U00002919',
-       "late;":                            '\U00002AAD',
-       "lbarr;":                           '\U0000290C',
-       "lbbrk;":                           '\U00002772',
-       "lbrace;":                          '\U0000007B',
-       "lbrack;":                          '\U0000005B',
-       "lbrke;":                           '\U0000298B',
-       "lbrksld;":                         '\U0000298F',
-       "lbrkslu;":                         '\U0000298D',
-       "lcaron;":                          '\U0000013E',
-       "lcedil;":                          '\U0000013C',
-       "lceil;":                           '\U00002308',
-       "lcub;":                            '\U0000007B',
-       "lcy;":                             '\U0000043B',
-       "ldca;":                            '\U00002936',
-       "ldquo;":                           '\U0000201C',
-       "ldquor;":                          '\U0000201E',
-       "ldrdhar;":                         '\U00002967',
-       "ldrushar;":                        '\U0000294B',
-       "ldsh;":                            '\U000021B2',
-       "le;":                              '\U00002264',
-       "leftarrow;":                       '\U00002190',
-       "leftarrowtail;":                   '\U000021A2',
-       "leftharpoondown;":                 '\U000021BD',
-       "leftharpoonup;":                   '\U000021BC',
-       "leftleftarrows;":                  '\U000021C7',
-       "leftrightarrow;":                  '\U00002194',
-       "leftrightarrows;":                 '\U000021C6',
-       "leftrightharpoons;":               '\U000021CB',
-       "leftrightsquigarrow;":             '\U000021AD',
-       "leftthreetimes;":                  '\U000022CB',
-       "leg;":                             '\U000022DA',
-       "leq;":                             '\U00002264',
-       "leqq;":                            '\U00002266',
-       "leqslant;":                        '\U00002A7D',
-       "les;":                             '\U00002A7D',
-       "lescc;":                           '\U00002AA8',
-       "lesdot;":                          '\U00002A7F',
-       "lesdoto;":                         '\U00002A81',
-       "lesdotor;":                        '\U00002A83',
-       "lesges;":                          '\U00002A93',
-       "lessapprox;":                      '\U00002A85',
-       "lessdot;":                         '\U000022D6',
-       "lesseqgtr;":                       '\U000022DA',
-       "lesseqqgtr;":                      '\U00002A8B',
-       "lessgtr;":                         '\U00002276',
-       "lesssim;":                         '\U00002272',
-       "lfisht;":                          '\U0000297C',
-       "lfloor;":                          '\U0000230A',
-       "lfr;":                             '\U0001D529',
-       "lg;":                              '\U00002276',
-       "lgE;":                             '\U00002A91',
-       "lhard;":                           '\U000021BD',
-       "lharu;":                           '\U000021BC',
-       "lharul;":                          '\U0000296A',
-       "lhblk;":                           '\U00002584',
-       "ljcy;":                            '\U00000459',
-       "ll;":                              '\U0000226A',
-       "llarr;":                           '\U000021C7',
-       "llcorner;":                        '\U0000231E',
-       "llhard;":                          '\U0000296B',
-       "lltri;":                           '\U000025FA',
-       "lmidot;":                          '\U00000140',
-       "lmoust;":                          '\U000023B0',
-       "lmoustache;":                      '\U000023B0',
-       "lnE;":                             '\U00002268',
-       "lnap;":                            '\U00002A89',
-       "lnapprox;":                        '\U00002A89',
-       "lne;":                             '\U00002A87',
-       "lneq;":                            '\U00002A87',
-       "lneqq;":                           '\U00002268',
-       "lnsim;":                           '\U000022E6',
-       "loang;":                           '\U000027EC',
-       "loarr;":                           '\U000021FD',
-       "lobrk;":                           '\U000027E6',
-       "longleftarrow;":                   '\U000027F5',
-       "longleftrightarrow;":              '\U000027F7',
-       "longmapsto;":                      '\U000027FC',
-       "longrightarrow;":                  '\U000027F6',
-       "looparrowleft;":                   '\U000021AB',
-       "looparrowright;":                  '\U000021AC',
-       "lopar;":                           '\U00002985',
-       "lopf;":                            '\U0001D55D',
-       "loplus;":                          '\U00002A2D',
-       "lotimes;":                         '\U00002A34',
-       "lowast;":                          '\U00002217',
-       "lowbar;":                          '\U0000005F',
-       "loz;":                             '\U000025CA',
-       "lozenge;":                         '\U000025CA',
-       "lozf;":                            '\U000029EB',
-       "lpar;":                            '\U00000028',
-       "lparlt;":                          '\U00002993',
-       "lrarr;":                           '\U000021C6',
-       "lrcorner;":                        '\U0000231F',
-       "lrhar;":                           '\U000021CB',
-       "lrhard;":                          '\U0000296D',
-       "lrm;":                             '\U0000200E',
-       "lrtri;":                           '\U000022BF',
-       "lsaquo;":                          '\U00002039',
-       "lscr;":                            '\U0001D4C1',
-       "lsh;":                             '\U000021B0',
-       "lsim;":                            '\U00002272',
-       "lsime;":                           '\U00002A8D',
-       "lsimg;":                           '\U00002A8F',
-       "lsqb;":                            '\U0000005B',
-       "lsquo;":                           '\U00002018',
-       "lsquor;":                          '\U0000201A',
-       "lstrok;":                          '\U00000142',
-       "lt;":                              '\U0000003C',
-       "ltcc;":                            '\U00002AA6',
-       "ltcir;":                           '\U00002A79',
-       "ltdot;":                           '\U000022D6',
-       "lthree;":                          '\U000022CB',
-       "ltimes;":                          '\U000022C9',
-       "ltlarr;":                          '\U00002976',
-       "ltquest;":                         '\U00002A7B',
-       "ltrPar;":                          '\U00002996',
-       "ltri;":                            '\U000025C3',
-       "ltrie;":                           '\U000022B4',
-       "ltrif;":                           '\U000025C2',
-       "lurdshar;":                        '\U0000294A',
-       "luruhar;":                         '\U00002966',
-       "mDDot;":                           '\U0000223A',
-       "macr;":                            '\U000000AF',
-       "male;":                            '\U00002642',
-       "malt;":                            '\U00002720',
-       "maltese;":                         '\U00002720',
-       "map;":                             '\U000021A6',
-       "mapsto;":                          '\U000021A6',
-       "mapstodown;":                      '\U000021A7',
-       "mapstoleft;":                      '\U000021A4',
-       "mapstoup;":                        '\U000021A5',
-       "marker;":                          '\U000025AE',
-       "mcomma;":                          '\U00002A29',
-       "mcy;":                             '\U0000043C',
-       "mdash;":                           '\U00002014',
-       "measuredangle;":                   '\U00002221',
-       "mfr;":                             '\U0001D52A',
-       "mho;":                             '\U00002127',
-       "micro;":                           '\U000000B5',
-       "mid;":                             '\U00002223',
-       "midast;":                          '\U0000002A',
-       "midcir;":                          '\U00002AF0',
-       "middot;":                          '\U000000B7',
-       "minus;":                           '\U00002212',
-       "minusb;":                          '\U0000229F',
-       "minusd;":                          '\U00002238',
-       "minusdu;":                         '\U00002A2A',
-       "mlcp;":                            '\U00002ADB',
-       "mldr;":                            '\U00002026',
-       "mnplus;":                          '\U00002213',
-       "models;":                          '\U000022A7',
-       "mopf;":                            '\U0001D55E',
-       "mp;":                              '\U00002213',
-       "mscr;":                            '\U0001D4C2',
-       "mstpos;":                          '\U0000223E',
-       "mu;":                              '\U000003BC',
-       "multimap;":                        '\U000022B8',
-       "mumap;":                           '\U000022B8',
-       "nLeftarrow;":                      '\U000021CD',
-       "nLeftrightarrow;":                 '\U000021CE',
-       "nRightarrow;":                     '\U000021CF',
-       "nVDash;":                          '\U000022AF',
-       "nVdash;":                          '\U000022AE',
-       "nabla;":                           '\U00002207',
-       "nacute;":                          '\U00000144',
-       "nap;":                             '\U00002249',
-       "napos;":                           '\U00000149',
-       "napprox;":                         '\U00002249',
-       "natur;":                           '\U0000266E',
-       "natural;":                         '\U0000266E',
-       "naturals;":                        '\U00002115',
-       "nbsp;":                            '\U000000A0',
-       "ncap;":                            '\U00002A43',
-       "ncaron;":                          '\U00000148',
-       "ncedil;":                          '\U00000146',
-       "ncong;":                           '\U00002247',
-       "ncup;":                            '\U00002A42',
-       "ncy;":                             '\U0000043D',
-       "ndash;":                           '\U00002013',
-       "ne;":                              '\U00002260',
-       "neArr;":                           '\U000021D7',
-       "nearhk;":                          '\U00002924',
-       "nearr;":                           '\U00002197',
-       "nearrow;":                         '\U00002197',
-       "nequiv;":                          '\U00002262',
-       "nesear;":                          '\U00002928',
-       "nexist;":                          '\U00002204',
-       "nexists;":                         '\U00002204',
-       "nfr;":                             '\U0001D52B',
-       "nge;":                             '\U00002271',
-       "ngeq;":                            '\U00002271',
-       "ngsim;":                           '\U00002275',
-       "ngt;":                             '\U0000226F',
-       "ngtr;":                            '\U0000226F',
-       "nhArr;":                           '\U000021CE',
-       "nharr;":                           '\U000021AE',
-       "nhpar;":                           '\U00002AF2',
-       "ni;":                              '\U0000220B',
-       "nis;":                             '\U000022FC',
-       "nisd;":                            '\U000022FA',
-       "niv;":                             '\U0000220B',
-       "njcy;":                            '\U0000045A',
-       "nlArr;":                           '\U000021CD',
-       "nlarr;":                           '\U0000219A',
-       "nldr;":                            '\U00002025',
-       "nle;":                             '\U00002270',
-       "nleftarrow;":                      '\U0000219A',
-       "nleftrightarrow;":                 '\U000021AE',
-       "nleq;":                            '\U00002270',
-       "nless;":                           '\U0000226E',
-       "nlsim;":                           '\U00002274',
-       "nlt;":                             '\U0000226E',
-       "nltri;":                           '\U000022EA',
-       "nltrie;":                          '\U000022EC',
-       "nmid;":                            '\U00002224',
-       "nopf;":                            '\U0001D55F',
-       "not;":                             '\U000000AC',
-       "notin;":                           '\U00002209',
-       "notinva;":                         '\U00002209',
-       "notinvb;":                         '\U000022F7',
-       "notinvc;":                         '\U000022F6',
-       "notni;":                           '\U0000220C',
-       "notniva;":                         '\U0000220C',
-       "notnivb;":                         '\U000022FE',
-       "notnivc;":                         '\U000022FD',
-       "npar;":                            '\U00002226',
-       "nparallel;":                       '\U00002226',
-       "npolint;":                         '\U00002A14',
-       "npr;":                             '\U00002280',
-       "nprcue;":                          '\U000022E0',
-       "nprec;":                           '\U00002280',
-       "nrArr;":                           '\U000021CF',
-       "nrarr;":                           '\U0000219B',
-       "nrightarrow;":                     '\U0000219B',
-       "nrtri;":                           '\U000022EB',
-       "nrtrie;":                          '\U000022ED',
-       "nsc;":                             '\U00002281',
-       "nsccue;":                          '\U000022E1',
-       "nscr;":                            '\U0001D4C3',
-       "nshortmid;":                       '\U00002224',
-       "nshortparallel;":                  '\U00002226',
-       "nsim;":                            '\U00002241',
-       "nsime;":                           '\U00002244',
-       "nsimeq;":                          '\U00002244',
-       "nsmid;":                           '\U00002224',
-       "nspar;":                           '\U00002226',
-       "nsqsube;":                         '\U000022E2',
-       "nsqsupe;":                         '\U000022E3',
-       "nsub;":                            '\U00002284',
-       "nsube;":                           '\U00002288',
-       "nsubseteq;":                       '\U00002288',
-       "nsucc;":                           '\U00002281',
-       "nsup;":                            '\U00002285',
-       "nsupe;":                           '\U00002289',
-       "nsupseteq;":                       '\U00002289',
-       "ntgl;":                            '\U00002279',
-       "ntilde;":                          '\U000000F1',
-       "ntlg;":                            '\U00002278',
-       "ntriangleleft;":                   '\U000022EA',
-       "ntrianglelefteq;":                 '\U000022EC',
-       "ntriangleright;":                  '\U000022EB',
-       "ntrianglerighteq;":                '\U000022ED',
-       "nu;":                              '\U000003BD',
-       "num;":                             '\U00000023',
-       "numero;":                          '\U00002116',
-       "numsp;":                           '\U00002007',
-       "nvDash;":                          '\U000022AD',
-       "nvHarr;":                          '\U00002904',
-       "nvdash;":                          '\U000022AC',
-       "nvinfin;":                         '\U000029DE',
-       "nvlArr;":                          '\U00002902',
-       "nvrArr;":                          '\U00002903',
-       "nwArr;":                           '\U000021D6',
-       "nwarhk;":                          '\U00002923',
-       "nwarr;":                           '\U00002196',
-       "nwarrow;":                         '\U00002196',
-       "nwnear;":                          '\U00002927',
-       "oS;":                              '\U000024C8',
-       "oacute;":                          '\U000000F3',
-       "oast;":                            '\U0000229B',
-       "ocir;":                            '\U0000229A',
-       "ocirc;":                           '\U000000F4',
-       "ocy;":                             '\U0000043E',
-       "odash;":                           '\U0000229D',
-       "odblac;":                          '\U00000151',
-       "odiv;":                            '\U00002A38',
-       "odot;":                            '\U00002299',
-       "odsold;":                          '\U000029BC',
-       "oelig;":                           '\U00000153',
-       "ofcir;":                           '\U000029BF',
-       "ofr;":                             '\U0001D52C',
-       "ogon;":                            '\U000002DB',
-       "ograve;":                          '\U000000F2',
-       "ogt;":                             '\U000029C1',
-       "ohbar;":                           '\U000029B5',
-       "ohm;":                             '\U000003A9',
-       "oint;":                            '\U0000222E',
-       "olarr;":                           '\U000021BA',
-       "olcir;":                           '\U000029BE',
-       "olcross;":                         '\U000029BB',
-       "oline;":                           '\U0000203E',
-       "olt;":                             '\U000029C0',
-       "omacr;":                           '\U0000014D',
-       "omega;":                           '\U000003C9',
-       "omicron;":                         '\U000003BF',
-       "omid;":                            '\U000029B6',
-       "ominus;":                          '\U00002296',
-       "oopf;":                            '\U0001D560',
-       "opar;":                            '\U000029B7',
-       "operp;":                           '\U000029B9',
-       "oplus;":                           '\U00002295',
-       "or;":                              '\U00002228',
-       "orarr;":                           '\U000021BB',
-       "ord;":                             '\U00002A5D',
-       "order;":                           '\U00002134',
-       "orderof;":                         '\U00002134',
-       "ordf;":                            '\U000000AA',
-       "ordm;":                            '\U000000BA',
-       "origof;":                          '\U000022B6',
-       "oror;":                            '\U00002A56',
-       "orslope;":                         '\U00002A57',
-       "orv;":                             '\U00002A5B',
-       "oscr;":                            '\U00002134',
-       "oslash;":                          '\U000000F8',
-       "osol;":                            '\U00002298',
-       "otilde;":                          '\U000000F5',
-       "otimes;":                          '\U00002297',
-       "otimesas;":                        '\U00002A36',
-       "ouml;":                            '\U000000F6',
-       "ovbar;":                           '\U0000233D',
-       "par;":                             '\U00002225',
-       "para;":                            '\U000000B6',
-       "parallel;":                        '\U00002225',
-       "parsim;":                          '\U00002AF3',
-       "parsl;":                           '\U00002AFD',
-       "part;":                            '\U00002202',
-       "pcy;":                             '\U0000043F',
-       "percnt;":                          '\U00000025',
-       "period;":                          '\U0000002E',
-       "permil;":                          '\U00002030',
-       "perp;":                            '\U000022A5',
-       "pertenk;":                         '\U00002031',
-       "pfr;":                             '\U0001D52D',
-       "phi;":                             '\U000003C6',
-       "phiv;":                            '\U000003D5',
-       "phmmat;":                          '\U00002133',
-       "phone;":                           '\U0000260E',
-       "pi;":                              '\U000003C0',
-       "pitchfork;":                       '\U000022D4',
-       "piv;":                             '\U000003D6',
-       "planck;":                          '\U0000210F',
-       "planckh;":                         '\U0000210E',
-       "plankv;":                          '\U0000210F',
-       "plus;":                            '\U0000002B',
-       "plusacir;":                        '\U00002A23',
-       "plusb;":                           '\U0000229E',
-       "pluscir;":                         '\U00002A22',
-       "plusdo;":                          '\U00002214',
-       "plusdu;":                          '\U00002A25',
-       "pluse;":                           '\U00002A72',
-       "plusmn;":                          '\U000000B1',
-       "plussim;":                         '\U00002A26',
-       "plustwo;":                         '\U00002A27',
-       "pm;":                              '\U000000B1',
-       "pointint;":                        '\U00002A15',
-       "popf;":                            '\U0001D561',
-       "pound;":                           '\U000000A3',
-       "pr;":                              '\U0000227A',
-       "prE;":                             '\U00002AB3',
-       "prap;":                            '\U00002AB7',
-       "prcue;":                           '\U0000227C',
-       "pre;":                             '\U00002AAF',
-       "prec;":                            '\U0000227A',
-       "precapprox;":                      '\U00002AB7',
-       "preccurlyeq;":                     '\U0000227C',
-       "preceq;":                          '\U00002AAF',
-       "precnapprox;":                     '\U00002AB9',
-       "precneqq;":                        '\U00002AB5',
-       "precnsim;":                        '\U000022E8',
-       "precsim;":                         '\U0000227E',
-       "prime;":                           '\U00002032',
-       "primes;":                          '\U00002119',
-       "prnE;":                            '\U00002AB5',
-       "prnap;":                           '\U00002AB9',
-       "prnsim;":                          '\U000022E8',
-       "prod;":                            '\U0000220F',
-       "profalar;":                        '\U0000232E',
-       "profline;":                        '\U00002312',
-       "profsurf;":                        '\U00002313',
-       "prop;":                            '\U0000221D',
-       "propto;":                          '\U0000221D',
-       "prsim;":                           '\U0000227E',
-       "prurel;":                          '\U000022B0',
-       "pscr;":                            '\U0001D4C5',
-       "psi;":                             '\U000003C8',
-       "puncsp;":                          '\U00002008',
-       "qfr;":                             '\U0001D52E',
-       "qint;":                            '\U00002A0C',
-       "qopf;":                            '\U0001D562',
-       "qprime;":                          '\U00002057',
-       "qscr;":                            '\U0001D4C6',
-       "quaternions;":                     '\U0000210D',
-       "quatint;":                         '\U00002A16',
-       "quest;":                           '\U0000003F',
-       "questeq;":                         '\U0000225F',
-       "quot;":                            '\U00000022',
-       "rAarr;":                           '\U000021DB',
-       "rArr;":                            '\U000021D2',
-       "rAtail;":                          '\U0000291C',
-       "rBarr;":                           '\U0000290F',
-       "rHar;":                            '\U00002964',
-       "racute;":                          '\U00000155',
-       "radic;":                           '\U0000221A',
-       "raemptyv;":                        '\U000029B3',
-       "rang;":                            '\U000027E9',
-       "rangd;":                           '\U00002992',
-       "range;":                           '\U000029A5',
-       "rangle;":                          '\U000027E9',
-       "raquo;":                           '\U000000BB',
-       "rarr;":                            '\U00002192',
-       "rarrap;":                          '\U00002975',
-       "rarrb;":                           '\U000021E5',
-       "rarrbfs;":                         '\U00002920',
-       "rarrc;":                           '\U00002933',
-       "rarrfs;":                          '\U0000291E',
-       "rarrhk;":                          '\U000021AA',
-       "rarrlp;":                          '\U000021AC',
-       "rarrpl;":                          '\U00002945',
-       "rarrsim;":                         '\U00002974',
-       "rarrtl;":                          '\U000021A3',
-       "rarrw;":                           '\U0000219D',
-       "ratail;":                          '\U0000291A',
-       "ratio;":                           '\U00002236',
-       "rationals;":                       '\U0000211A',
-       "rbarr;":                           '\U0000290D',
-       "rbbrk;":                           '\U00002773',
-       "rbrace;":                          '\U0000007D',
-       "rbrack;":                          '\U0000005D',
-       "rbrke;":                           '\U0000298C',
-       "rbrksld;":                         '\U0000298E',
-       "rbrkslu;":                         '\U00002990',
-       "rcaron;":                          '\U00000159',
-       "rcedil;":                          '\U00000157',
-       "rceil;":                           '\U00002309',
-       "rcub;":                            '\U0000007D',
-       "rcy;":                             '\U00000440',
-       "rdca;":                            '\U00002937',
-       "rdldhar;":                         '\U00002969',
-       "rdquo;":                           '\U0000201D',
-       "rdquor;":                          '\U0000201D',
-       "rdsh;":                            '\U000021B3',
-       "real;":                            '\U0000211C',
-       "realine;":                         '\U0000211B',
-       "realpart;":                        '\U0000211C',
-       "reals;":                           '\U0000211D',
-       "rect;":                            '\U000025AD',
-       "reg;":                             '\U000000AE',
-       "rfisht;":                          '\U0000297D',
-       "rfloor;":                          '\U0000230B',
-       "rfr;":                             '\U0001D52F',
-       "rhard;":                           '\U000021C1',
-       "rharu;":                           '\U000021C0',
-       "rharul;":                          '\U0000296C',
-       "rho;":                             '\U000003C1',
-       "rhov;":                            '\U000003F1',
-       "rightarrow;":                      '\U00002192',
-       "rightarrowtail;":                  '\U000021A3',
-       "rightharpoondown;":                '\U000021C1',
-       "rightharpoonup;":                  '\U000021C0',
-       "rightleftarrows;":                 '\U000021C4',
-       "rightleftharpoons;":               '\U000021CC',
-       "rightrightarrows;":                '\U000021C9',
-       "rightsquigarrow;":                 '\U0000219D',
-       "rightthreetimes;":                 '\U000022CC',
-       "ring;":                            '\U000002DA',
-       "risingdotseq;":                    '\U00002253',
-       "rlarr;":                           '\U000021C4',
-       "rlhar;":                           '\U000021CC',
-       "rlm;":                             '\U0000200F',
-       "rmoust;":                          '\U000023B1',
-       "rmoustache;":                      '\U000023B1',
-       "rnmid;":                           '\U00002AEE',
-       "roang;":                           '\U000027ED',
-       "roarr;":                           '\U000021FE',
-       "robrk;":                           '\U000027E7',
-       "ropar;":                           '\U00002986',
-       "ropf;":                            '\U0001D563',
-       "roplus;":                          '\U00002A2E',
-       "rotimes;":                         '\U00002A35',
-       "rpar;":                            '\U00000029',
-       "rpargt;":                          '\U00002994',
-       "rppolint;":                        '\U00002A12',
-       "rrarr;":                           '\U000021C9',
-       "rsaquo;":                          '\U0000203A',
-       "rscr;":                            '\U0001D4C7',
-       "rsh;":                             '\U000021B1',
-       "rsqb;":                            '\U0000005D',
-       "rsquo;":                           '\U00002019',
-       "rsquor;":                          '\U00002019',
-       "rthree;":                          '\U000022CC',
-       "rtimes;":                          '\U000022CA',
-       "rtri;":                            '\U000025B9',
-       "rtrie;":                           '\U000022B5',
-       "rtrif;":                           '\U000025B8',
-       "rtriltri;":                        '\U000029CE',
-       "ruluhar;":                         '\U00002968',
-       "rx;":                              '\U0000211E',
-       "sacute;":                          '\U0000015B',
-       "sbquo;":                           '\U0000201A',
-       "sc;":                              '\U0000227B',
-       "scE;":                             '\U00002AB4',
-       "scap;":                            '\U00002AB8',
-       "scaron;":                          '\U00000161',
-       "sccue;":                           '\U0000227D',
-       "sce;":                             '\U00002AB0',
-       "scedil;":                          '\U0000015F',
-       "scirc;":                           '\U0000015D',
-       "scnE;":                            '\U00002AB6',
-       "scnap;":                           '\U00002ABA',
-       "scnsim;":                          '\U000022E9',
-       "scpolint;":                        '\U00002A13',
-       "scsim;":                           '\U0000227F',
-       "scy;":                             '\U00000441',
-       "sdot;":                            '\U000022C5',
-       "sdotb;":                           '\U000022A1',
-       "sdote;":                           '\U00002A66',
-       "seArr;":                           '\U000021D8',
-       "searhk;":                          '\U00002925',
-       "searr;":                           '\U00002198',
-       "searrow;":                         '\U00002198',
-       "sect;":                            '\U000000A7',
-       "semi;":                            '\U0000003B',
-       "seswar;":                          '\U00002929',
-       "setminus;":                        '\U00002216',
-       "setmn;":                           '\U00002216',
-       "sext;":                            '\U00002736',
-       "sfr;":                             '\U0001D530',
-       "sfrown;":                          '\U00002322',
-       "sharp;":                           '\U0000266F',
-       "shchcy;":                          '\U00000449',
-       "shcy;":                            '\U00000448',
-       "shortmid;":                        '\U00002223',
-       "shortparallel;":                   '\U00002225',
-       "shy;":                             '\U000000AD',
-       "sigma;":                           '\U000003C3',
-       "sigmaf;":                          '\U000003C2',
-       "sigmav;":                          '\U000003C2',
-       "sim;":                             '\U0000223C',
-       "simdot;":                          '\U00002A6A',
-       "sime;":                            '\U00002243',
-       "simeq;":                           '\U00002243',
-       "simg;":                            '\U00002A9E',
-       "simgE;":                           '\U00002AA0',
-       "siml;":                            '\U00002A9D',
-       "simlE;":                           '\U00002A9F',
-       "simne;":                           '\U00002246',
-       "simplus;":                         '\U00002A24',
-       "simrarr;":                         '\U00002972',
-       "slarr;":                           '\U00002190',
-       "smallsetminus;":                   '\U00002216',
-       "smashp;":                          '\U00002A33',
-       "smeparsl;":                        '\U000029E4',
-       "smid;":                            '\U00002223',
-       "smile;":                           '\U00002323',
-       "smt;":                             '\U00002AAA',
-       "smte;":                            '\U00002AAC',
-       "softcy;":                          '\U0000044C',
-       "sol;":                             '\U0000002F',
-       "solb;":                            '\U000029C4',
-       "solbar;":                          '\U0000233F',
-       "sopf;":                            '\U0001D564',
-       "spades;":                          '\U00002660',
-       "spadesuit;":                       '\U00002660',
-       "spar;":                            '\U00002225',
-       "sqcap;":                           '\U00002293',
-       "sqcup;":                           '\U00002294',
-       "sqsub;":                           '\U0000228F',
-       "sqsube;":                          '\U00002291',
-       "sqsubset;":                        '\U0000228F',
-       "sqsubseteq;":                      '\U00002291',
-       "sqsup;":                           '\U00002290',
-       "sqsupe;":                          '\U00002292',
-       "sqsupset;":                        '\U00002290',
-       "sqsupseteq;":                      '\U00002292',
-       "squ;":                             '\U000025A1',
-       "square;":                          '\U000025A1',
-       "squarf;":                          '\U000025AA',
-       "squf;":                            '\U000025AA',
-       "srarr;":                           '\U00002192',
-       "sscr;":                            '\U0001D4C8',
-       "ssetmn;":                          '\U00002216',
-       "ssmile;":                          '\U00002323',
-       "sstarf;":                          '\U000022C6',
-       "star;":                            '\U00002606',
-       "starf;":                           '\U00002605',
-       "straightepsilon;":                 '\U000003F5',
-       "straightphi;":                     '\U000003D5',
-       "strns;":                           '\U000000AF',
-       "sub;":                             '\U00002282',
-       "subE;":                            '\U00002AC5',
-       "subdot;":                          '\U00002ABD',
-       "sube;":                            '\U00002286',
-       "subedot;":                         '\U00002AC3',
-       "submult;":                         '\U00002AC1',
-       "subnE;":                           '\U00002ACB',
-       "subne;":                           '\U0000228A',
-       "subplus;":                         '\U00002ABF',
-       "subrarr;":                         '\U00002979',
-       "subset;":                          '\U00002282',
-       "subseteq;":                        '\U00002286',
-       "subseteqq;":                       '\U00002AC5',
-       "subsetneq;":                       '\U0000228A',
-       "subsetneqq;":                      '\U00002ACB',
-       "subsim;":                          '\U00002AC7',
-       "subsub;":                          '\U00002AD5',
-       "subsup;":                          '\U00002AD3',
-       "succ;":                            '\U0000227B',
-       "succapprox;":                      '\U00002AB8',
-       "succcurlyeq;":                     '\U0000227D',
-       "succeq;":                          '\U00002AB0',
-       "succnapprox;":                     '\U00002ABA',
-       "succneqq;":                        '\U00002AB6',
-       "succnsim;":                        '\U000022E9',
-       "succsim;":                         '\U0000227F',
-       "sum;":                             '\U00002211',
-       "sung;":                            '\U0000266A',
-       "sup;":                             '\U00002283',
-       "sup1;":                            '\U000000B9',
-       "sup2;":                            '\U000000B2',
-       "sup3;":                            '\U000000B3',
-       "supE;":                            '\U00002AC6',
-       "supdot;":                          '\U00002ABE',
-       "supdsub;":                         '\U00002AD8',
-       "supe;":                            '\U00002287',
-       "supedot;":                         '\U00002AC4',
-       "suphsol;":                         '\U000027C9',
-       "suphsub;":                         '\U00002AD7',
-       "suplarr;":                         '\U0000297B',
-       "supmult;":                         '\U00002AC2',
-       "supnE;":                           '\U00002ACC',
-       "supne;":                           '\U0000228B',
-       "supplus;":                         '\U00002AC0',
-       "supset;":                          '\U00002283',
-       "supseteq;":                        '\U00002287',
-       "supseteqq;":                       '\U00002AC6',
-       "supsetneq;":                       '\U0000228B',
-       "supsetneqq;":                      '\U00002ACC',
-       "supsim;":                          '\U00002AC8',
-       "supsub;":                          '\U00002AD4',
-       "supsup;":                          '\U00002AD6',
-       "swArr;":                           '\U000021D9',
-       "swarhk;":                          '\U00002926',
-       "swarr;":                           '\U00002199',
-       "swarrow;":                         '\U00002199',
-       "swnwar;":                          '\U0000292A',
-       "szlig;":                           '\U000000DF',
-       "target;":                          '\U00002316',
-       "tau;":                             '\U000003C4',
-       "tbrk;":                            '\U000023B4',
-       "tcaron;":                          '\U00000165',
-       "tcedil;":                          '\U00000163',
-       "tcy;":                             '\U00000442',
-       "tdot;":                            '\U000020DB',
-       "telrec;":                          '\U00002315',
-       "tfr;":                             '\U0001D531',
-       "there4;":                          '\U00002234',
-       "therefore;":                       '\U00002234',
-       "theta;":                           '\U000003B8',
-       "thetasym;":                        '\U000003D1',
-       "thetav;":                          '\U000003D1',
-       "thickapprox;":                     '\U00002248',
-       "thicksim;":                        '\U0000223C',
-       "thinsp;":                          '\U00002009',
-       "thkap;":                           '\U00002248',
-       "thksim;":                          '\U0000223C',
-       "thorn;":                           '\U000000FE',
-       "tilde;":                           '\U000002DC',
-       "times;":                           '\U000000D7',
-       "timesb;":                          '\U000022A0',
-       "timesbar;":                        '\U00002A31',
-       "timesd;":                          '\U00002A30',
-       "tint;":                            '\U0000222D',
-       "toea;":                            '\U00002928',
-       "top;":                             '\U000022A4',
-       "topbot;":                          '\U00002336',
-       "topcir;":                          '\U00002AF1',
-       "topf;":                            '\U0001D565',
-       "topfork;":                         '\U00002ADA',
-       "tosa;":                            '\U00002929',
-       "tprime;":                          '\U00002034',
-       "trade;":                           '\U00002122',
-       "triangle;":                        '\U000025B5',
-       "triangledown;":                    '\U000025BF',
-       "triangleleft;":                    '\U000025C3',
-       "trianglelefteq;":                  '\U000022B4',
-       "triangleq;":                       '\U0000225C',
-       "triangleright;":                   '\U000025B9',
-       "trianglerighteq;":                 '\U000022B5',
-       "tridot;":                          '\U000025EC',
-       "trie;":                            '\U0000225C',
-       "triminus;":                        '\U00002A3A',
-       "triplus;":                         '\U00002A39',
-       "trisb;":                           '\U000029CD',
-       "tritime;":                         '\U00002A3B',
-       "trpezium;":                        '\U000023E2',
-       "tscr;":                            '\U0001D4C9',
-       "tscy;":                            '\U00000446',
-       "tshcy;":                           '\U0000045B',
-       "tstrok;":                          '\U00000167',
-       "twixt;":                           '\U0000226C',
-       "twoheadleftarrow;":                '\U0000219E',
-       "twoheadrightarrow;":               '\U000021A0',
-       "uArr;":                            '\U000021D1',
-       "uHar;":                            '\U00002963',
-       "uacute;":                          '\U000000FA',
-       "uarr;":                            '\U00002191',
-       "ubrcy;":                           '\U0000045E',
-       "ubreve;":                          '\U0000016D',
-       "ucirc;":                           '\U000000FB',
-       "ucy;":                             '\U00000443',
-       "udarr;":                           '\U000021C5',
-       "udblac;":                          '\U00000171',
-       "udhar;":                           '\U0000296E',
-       "ufisht;":                          '\U0000297E',
-       "ufr;":                             '\U0001D532',
-       "ugrave;":                          '\U000000F9',
-       "uharl;":                           '\U000021BF',
-       "uharr;":                           '\U000021BE',
-       "uhblk;":                           '\U00002580',
-       "ulcorn;":                          '\U0000231C',
-       "ulcorner;":                        '\U0000231C',
-       "ulcrop;":                          '\U0000230F',
-       "ultri;":                           '\U000025F8',
-       "umacr;":                           '\U0000016B',
-       "uml;":                             '\U000000A8',
-       "uogon;":                           '\U00000173',
-       "uopf;":                            '\U0001D566',
-       "uparrow;":                         '\U00002191',
-       "updownarrow;":                     '\U00002195',
-       "upharpoonleft;":                   '\U000021BF',
-       "upharpoonright;":                  '\U000021BE',
-       "uplus;":                           '\U0000228E',
-       "upsi;":                            '\U000003C5',
-       "upsih;":                           '\U000003D2',
-       "upsilon;":                         '\U000003C5',
-       "upuparrows;":                      '\U000021C8',
-       "urcorn;":                          '\U0000231D',
-       "urcorner;":                        '\U0000231D',
-       "urcrop;":                          '\U0000230E',
-       "uring;":                           '\U0000016F',
-       "urtri;":                           '\U000025F9',
-       "uscr;":                            '\U0001D4CA',
-       "utdot;":                           '\U000022F0',
-       "utilde;":                          '\U00000169',
-       "utri;":                            '\U000025B5',
-       "utrif;":                           '\U000025B4',
-       "uuarr;":                           '\U000021C8',
-       "uuml;":                            '\U000000FC',
-       "uwangle;":                         '\U000029A7',
-       "vArr;":                            '\U000021D5',
-       "vBar;":                            '\U00002AE8',
-       "vBarv;":                           '\U00002AE9',
-       "vDash;":                           '\U000022A8',
-       "vangrt;":                          '\U0000299C',
-       "varepsilon;":                      '\U000003F5',
-       "varkappa;":                        '\U000003F0',
-       "varnothing;":                      '\U00002205',
-       "varphi;":                          '\U000003D5',
-       "varpi;":                           '\U000003D6',
-       "varpropto;":                       '\U0000221D',
-       "varr;":                            '\U00002195',
-       "varrho;":                          '\U000003F1',
-       "varsigma;":                        '\U000003C2',
-       "vartheta;":                        '\U000003D1',
-       "vartriangleleft;":                 '\U000022B2',
-       "vartriangleright;":                '\U000022B3',
-       "vcy;":                             '\U00000432',
-       "vdash;":                           '\U000022A2',
-       "vee;":                             '\U00002228',
-       "veebar;":                          '\U000022BB',
-       "veeeq;":                           '\U0000225A',
-       "vellip;":                          '\U000022EE',
-       "verbar;":                          '\U0000007C',
-       "vert;":                            '\U0000007C',
-       "vfr;":                             '\U0001D533',
-       "vltri;":                           '\U000022B2',
-       "vopf;":                            '\U0001D567',
-       "vprop;":                           '\U0000221D',
-       "vrtri;":                           '\U000022B3',
-       "vscr;":                            '\U0001D4CB',
-       "vzigzag;":                         '\U0000299A',
-       "wcirc;":                           '\U00000175',
-       "wedbar;":                          '\U00002A5F',
-       "wedge;":                           '\U00002227',
-       "wedgeq;":                          '\U00002259',
-       "weierp;":                          '\U00002118',
-       "wfr;":                             '\U0001D534',
-       "wopf;":                            '\U0001D568',
-       "wp;":                              '\U00002118',
-       "wr;":                              '\U00002240',
-       "wreath;":                          '\U00002240',
-       "wscr;":                            '\U0001D4CC',
-       "xcap;":                            '\U000022C2',
-       "xcirc;":                           '\U000025EF',
-       "xcup;":                            '\U000022C3',
-       "xdtri;":                           '\U000025BD',
-       "xfr;":                             '\U0001D535',
-       "xhArr;":                           '\U000027FA',
-       "xharr;":                           '\U000027F7',
-       "xi;":                              '\U000003BE',
-       "xlArr;":                           '\U000027F8',
-       "xlarr;":                           '\U000027F5',
-       "xmap;":                            '\U000027FC',
-       "xnis;":                            '\U000022FB',
-       "xodot;":                           '\U00002A00',
-       "xopf;":                            '\U0001D569',
-       "xoplus;":                          '\U00002A01',
-       "xotime;":                          '\U00002A02',
-       "xrArr;":                           '\U000027F9',
-       "xrarr;":                           '\U000027F6',
-       "xscr;":                            '\U0001D4CD',
-       "xsqcup;":                          '\U00002A06',
-       "xuplus;":                          '\U00002A04',
-       "xutri;":                           '\U000025B3',
-       "xvee;":                            '\U000022C1',
-       "xwedge;":                          '\U000022C0',
-       "yacute;":                          '\U000000FD',
-       "yacy;":                            '\U0000044F',
-       "ycirc;":                           '\U00000177',
-       "ycy;":                             '\U0000044B',
-       "yen;":                             '\U000000A5',
-       "yfr;":                             '\U0001D536',
-       "yicy;":                            '\U00000457',
-       "yopf;":                            '\U0001D56A',
-       "yscr;":                            '\U0001D4CE',
-       "yucy;":                            '\U0000044E',
-       "yuml;":                            '\U000000FF',
-       "zacute;":                          '\U0000017A',
-       "zcaron;":                          '\U0000017E',
-       "zcy;":                             '\U00000437',
-       "zdot;":                            '\U0000017C',
-       "zeetrf;":                          '\U00002128',
-       "zeta;":                            '\U000003B6',
-       "zfr;":                             '\U0001D537',
-       "zhcy;":                            '\U00000436',
-       "zigrarr;":                         '\U000021DD',
-       "zopf;":                            '\U0001D56B',
-       "zscr;":                            '\U0001D4CF',
-       "zwj;":                             '\U0000200D',
-       "zwnj;":                            '\U0000200C',
-       "AElig":                            '\U000000C6',
-       "AMP":                              '\U00000026',
-       "Aacute":                           '\U000000C1',
-       "Acirc":                            '\U000000C2',
-       "Agrave":                           '\U000000C0',
-       "Aring":                            '\U000000C5',
-       "Atilde":                           '\U000000C3',
-       "Auml":                             '\U000000C4',
-       "COPY":                             '\U000000A9',
-       "Ccedil":                           '\U000000C7',
-       "ETH":                              '\U000000D0',
-       "Eacute":                           '\U000000C9',
-       "Ecirc":                            '\U000000CA',
-       "Egrave":                           '\U000000C8',
-       "Euml":                             '\U000000CB',
-       "GT":                               '\U0000003E',
-       "Iacute":                           '\U000000CD',
-       "Icirc":                            '\U000000CE',
-       "Igrave":                           '\U000000CC',
-       "Iuml":                             '\U000000CF',
-       "LT":                               '\U0000003C',
-       "Ntilde":                           '\U000000D1',
-       "Oacute":                           '\U000000D3',
-       "Ocirc":                            '\U000000D4',
-       "Ograve":                           '\U000000D2',
-       "Oslash":                           '\U000000D8',
-       "Otilde":                           '\U000000D5',
-       "Ouml":                             '\U000000D6',
-       "QUOT":                             '\U00000022',
-       "REG":                              '\U000000AE',
-       "THORN":                            '\U000000DE',
-       "Uacute":                           '\U000000DA',
-       "Ucirc":                            '\U000000DB',
-       "Ugrave":                           '\U000000D9',
-       "Uuml":                             '\U000000DC',
-       "Yacute":                           '\U000000DD',
-       "aacute":                           '\U000000E1',
-       "acirc":                            '\U000000E2',
-       "acute":                            '\U000000B4',
-       "aelig":                            '\U000000E6',
-       "agrave":                           '\U000000E0',
-       "amp":                              '\U00000026',
-       "aring":                            '\U000000E5',
-       "atilde":                           '\U000000E3',
-       "auml":                             '\U000000E4',
-       "brvbar":                           '\U000000A6',
-       "ccedil":                           '\U000000E7',
-       "cedil":                            '\U000000B8',
-       "cent":                             '\U000000A2',
-       "copy":                             '\U000000A9',
-       "curren":                           '\U000000A4',
-       "deg":                              '\U000000B0',
-       "divide":                           '\U000000F7',
-       "eacute":                           '\U000000E9',
-       "ecirc":                            '\U000000EA',
-       "egrave":                           '\U000000E8',
-       "eth":                              '\U000000F0',
-       "euml":                             '\U000000EB',
-       "frac12":                           '\U000000BD',
-       "frac14":                           '\U000000BC',
-       "frac34":                           '\U000000BE',
-       "gt":                               '\U0000003E',
-       "iacute":                           '\U000000ED',
-       "icirc":                            '\U000000EE',
-       "iexcl":                            '\U000000A1',
-       "igrave":                           '\U000000EC',
-       "iquest":                           '\U000000BF',
-       "iuml":                             '\U000000EF',
-       "laquo":                            '\U000000AB',
-       "lt":                               '\U0000003C',
-       "macr":                             '\U000000AF',
-       "micro":                            '\U000000B5',
-       "middot":                           '\U000000B7',
-       "nbsp":                             '\U000000A0',
-       "not":                              '\U000000AC',
-       "ntilde":                           '\U000000F1',
-       "oacute":                           '\U000000F3',
-       "ocirc":                            '\U000000F4',
-       "ograve":                           '\U000000F2',
-       "ordf":                             '\U000000AA',
-       "ordm":                             '\U000000BA',
-       "oslash":                           '\U000000F8',
-       "otilde":                           '\U000000F5',
-       "ouml":                             '\U000000F6',
-       "para":                             '\U000000B6',
-       "plusmn":                           '\U000000B1',
-       "pound":                            '\U000000A3',
-       "quot":                             '\U00000022',
-       "raquo":                            '\U000000BB',
-       "reg":                              '\U000000AE',
-       "sect":                             '\U000000A7',
-       "shy":                              '\U000000AD',
-       "sup1":                             '\U000000B9',
-       "sup2":                             '\U000000B2',
-       "sup3":                             '\U000000B3',
-       "szlig":                            '\U000000DF',
-       "thorn":                            '\U000000FE',
-       "times":                            '\U000000D7',
-       "uacute":                           '\U000000FA',
-       "ucirc":                            '\U000000FB',
-       "ugrave":                           '\U000000F9',
-       "uml":                              '\U000000A8',
-       "uuml":                             '\U000000FC',
-       "yacute":                           '\U000000FD',
-       "yen":                              '\U000000A5',
-       "yuml":                             '\U000000FF',
-}
-
-// HTML entities that are two unicode codepoints.
-var entity2 = map[string][2]rune{
-       // TODO(nigeltao): Handle replacements that are wider than their names.
-       // "nLt;":                     {'\u226A', '\u20D2'},
-       // "nGt;":                     {'\u226B', '\u20D2'},
-       "NotEqualTilde;":           {'\u2242', '\u0338'},
-       "NotGreaterFullEqual;":     {'\u2267', '\u0338'},
-       "NotGreaterGreater;":       {'\u226B', '\u0338'},
-       "NotGreaterSlantEqual;":    {'\u2A7E', '\u0338'},
-       "NotHumpDownHump;":         {'\u224E', '\u0338'},
-       "NotHumpEqual;":            {'\u224F', '\u0338'},
-       "NotLeftTriangleBar;":      {'\u29CF', '\u0338'},
-       "NotLessLess;":             {'\u226A', '\u0338'},
-       "NotLessSlantEqual;":       {'\u2A7D', '\u0338'},
-       "NotNestedGreaterGreater;": {'\u2AA2', '\u0338'},
-       "NotNestedLessLess;":       {'\u2AA1', '\u0338'},
-       "NotPrecedesEqual;":        {'\u2AAF', '\u0338'},
-       "NotRightTriangleBar;":     {'\u29D0', '\u0338'},
-       "NotSquareSubset;":         {'\u228F', '\u0338'},
-       "NotSquareSuperset;":       {'\u2290', '\u0338'},
-       "NotSubset;":               {'\u2282', '\u20D2'},
-       "NotSucceedsEqual;":        {'\u2AB0', '\u0338'},
-       "NotSucceedsTilde;":        {'\u227F', '\u0338'},
-       "NotSuperset;":             {'\u2283', '\u20D2'},
-       "ThickSpace;":              {'\u205F', '\u200A'},
-       "acE;":                     {'\u223E', '\u0333'},
-       "bne;":                     {'\u003D', '\u20E5'},
-       "bnequiv;":                 {'\u2261', '\u20E5'},
-       "caps;":                    {'\u2229', '\uFE00'},
-       "cups;":                    {'\u222A', '\uFE00'},
-       "fjlig;":                   {'\u0066', '\u006A'},
-       "gesl;":                    {'\u22DB', '\uFE00'},
-       "gvertneqq;":               {'\u2269', '\uFE00'},
-       "gvnE;":                    {'\u2269', '\uFE00'},
-       "lates;":                   {'\u2AAD', '\uFE00'},
-       "lesg;":                    {'\u22DA', '\uFE00'},
-       "lvertneqq;":               {'\u2268', '\uFE00'},
-       "lvnE;":                    {'\u2268', '\uFE00'},
-       "nGg;":                     {'\u22D9', '\u0338'},
-       "nGtv;":                    {'\u226B', '\u0338'},
-       "nLl;":                     {'\u22D8', '\u0338'},
-       "nLtv;":                    {'\u226A', '\u0338'},
-       "nang;":                    {'\u2220', '\u20D2'},
-       "napE;":                    {'\u2A70', '\u0338'},
-       "napid;":                   {'\u224B', '\u0338'},
-       "nbump;":                   {'\u224E', '\u0338'},
-       "nbumpe;":                  {'\u224F', '\u0338'},
-       "ncongdot;":                {'\u2A6D', '\u0338'},
-       "nedot;":                   {'\u2250', '\u0338'},
-       "nesim;":                   {'\u2242', '\u0338'},
-       "ngE;":                     {'\u2267', '\u0338'},
-       "ngeqq;":                   {'\u2267', '\u0338'},
-       "ngeqslant;":               {'\u2A7E', '\u0338'},
-       "nges;":                    {'\u2A7E', '\u0338'},
-       "nlE;":                     {'\u2266', '\u0338'},
-       "nleqq;":                   {'\u2266', '\u0338'},
-       "nleqslant;":               {'\u2A7D', '\u0338'},
-       "nles;":                    {'\u2A7D', '\u0338'},
-       "notinE;":                  {'\u22F9', '\u0338'},
-       "notindot;":                {'\u22F5', '\u0338'},
-       "nparsl;":                  {'\u2AFD', '\u20E5'},
-       "npart;":                   {'\u2202', '\u0338'},
-       "npre;":                    {'\u2AAF', '\u0338'},
-       "npreceq;":                 {'\u2AAF', '\u0338'},
-       "nrarrc;":                  {'\u2933', '\u0338'},
-       "nrarrw;":                  {'\u219D', '\u0338'},
-       "nsce;":                    {'\u2AB0', '\u0338'},
-       "nsubE;":                   {'\u2AC5', '\u0338'},
-       "nsubset;":                 {'\u2282', '\u20D2'},
-       "nsubseteqq;":              {'\u2AC5', '\u0338'},
-       "nsucceq;":                 {'\u2AB0', '\u0338'},
-       "nsupE;":                   {'\u2AC6', '\u0338'},
-       "nsupset;":                 {'\u2283', '\u20D2'},
-       "nsupseteqq;":              {'\u2AC6', '\u0338'},
-       "nvap;":                    {'\u224D', '\u20D2'},
-       "nvge;":                    {'\u2265', '\u20D2'},
-       "nvgt;":                    {'\u003E', '\u20D2'},
-       "nvle;":                    {'\u2264', '\u20D2'},
-       "nvlt;":                    {'\u003C', '\u20D2'},
-       "nvltrie;":                 {'\u22B4', '\u20D2'},
-       "nvrtrie;":                 {'\u22B5', '\u20D2'},
-       "nvsim;":                   {'\u223C', '\u20D2'},
-       "race;":                    {'\u223D', '\u0331'},
-       "smtes;":                   {'\u2AAC', '\uFE00'},
-       "sqcaps;":                  {'\u2293', '\uFE00'},
-       "sqcups;":                  {'\u2294', '\uFE00'},
-       "varsubsetneq;":            {'\u228A', '\uFE00'},
-       "varsubsetneqq;":           {'\u2ACB', '\uFE00'},
-       "varsupsetneq;":            {'\u228B', '\uFE00'},
-       "varsupsetneqq;":           {'\u2ACC', '\uFE00'},
-       "vnsub;":                   {'\u2282', '\u20D2'},
-       "vnsup;":                   {'\u2283', '\u20D2'},
-       "vsubnE;":                  {'\u2ACB', '\uFE00'},
-       "vsubne;":                  {'\u228A', '\uFE00'},
-       "vsupnE;":                  {'\u2ACC', '\uFE00'},
-       "vsupne;":                  {'\u228B', '\uFE00'},
-}
diff --git a/html/entity_test.go b/html/entity_test.go
deleted file mode 100644 (file)
index b53f866..0000000
+++ /dev/null
@@ -1,29 +0,0 @@
-// Copyright 2010 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package html
-
-import (
-       "testing"
-       "unicode/utf8"
-)
-
-func TestEntityLength(t *testing.T) {
-       // We verify that the length of UTF-8 encoding of each value is <= 1 + len(key).
-       // The +1 comes from the leading "&". This property implies that the length of
-       // unescaped text is <= the length of escaped text.
-       for k, v := range entity {
-               if 1+len(k) < utf8.RuneLen(v) {
-                       t.Error("escaped entity &" + k + " is shorter than its UTF-8 encoding " + string(v))
-               }
-               if len(k) > longestEntityWithoutSemicolon && k[len(k)-1] != ';' {
-                       t.Errorf("entity name %s is %d characters, but longestEntityWithoutSemicolon=%d", k, len(k), longestEntityWithoutSemicolon)
-               }
-       }
-       for k, v := range entity2 {
-               if 1+len(k) < utf8.RuneLen(v[0])+utf8.RuneLen(v[1]) {
-                       t.Error("escaped entity &" + k + " is shorter than its UTF-8 encoding " + string(v[0]) + string(v[1]))
-               }
-       }
-}
diff --git a/html/escape.go b/html/escape.go
deleted file mode 100644 (file)
index 04c6bec..0000000
+++ /dev/null
@@ -1,339 +0,0 @@
-// Copyright 2010 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package html
-
-import (
-       "bytes"
-       "strings"
-       "unicode/utf8"
-)
-
-// These replacements permit compatibility with old numeric entities that
-// assumed Windows-1252 encoding.
-// https://html.spec.whatwg.org/multipage/syntax.html#consume-a-character-reference
-var replacementTable = [...]rune{
-       '\u20AC', // First entry is what 0x80 should be replaced with.
-       '\u0081',
-       '\u201A',
-       '\u0192',
-       '\u201E',
-       '\u2026',
-       '\u2020',
-       '\u2021',
-       '\u02C6',
-       '\u2030',
-       '\u0160',
-       '\u2039',
-       '\u0152',
-       '\u008D',
-       '\u017D',
-       '\u008F',
-       '\u0090',
-       '\u2018',
-       '\u2019',
-       '\u201C',
-       '\u201D',
-       '\u2022',
-       '\u2013',
-       '\u2014',
-       '\u02DC',
-       '\u2122',
-       '\u0161',
-       '\u203A',
-       '\u0153',
-       '\u009D',
-       '\u017E',
-       '\u0178', // Last entry is 0x9F.
-       // 0x00->'\uFFFD' is handled programmatically.
-       // 0x0D->'\u000D' is a no-op.
-}
-
-// unescapeEntity reads an entity like "&lt;" from b[src:] and writes the
-// corresponding "<" to b[dst:], returning the incremented dst and src cursors.
-// Precondition: b[src] == '&' && dst <= src.
-// attribute should be true if parsing an attribute value.
-func unescapeEntity(b []byte, dst, src int, attribute bool) (dst1, src1 int) {
-       // https://html.spec.whatwg.org/multipage/syntax.html#consume-a-character-reference
-
-       // i starts at 1 because we already know that s[0] == '&'.
-       i, s := 1, b[src:]
-
-       if len(s) <= 1 {
-               b[dst] = b[src]
-               return dst + 1, src + 1
-       }
-
-       if s[i] == '#' {
-               if len(s) <= 3 { // We need to have at least "&#.".
-                       b[dst] = b[src]
-                       return dst + 1, src + 1
-               }
-               i++
-               c := s[i]
-               hex := false
-               if c == 'x' || c == 'X' {
-                       hex = true
-                       i++
-               }
-
-               x := '\x00'
-               for i < len(s) {
-                       c = s[i]
-                       i++
-                       if hex {
-                               if '0' <= c && c <= '9' {
-                                       x = 16*x + rune(c) - '0'
-                                       continue
-                               } else if 'a' <= c && c <= 'f' {
-                                       x = 16*x + rune(c) - 'a' + 10
-                                       continue
-                               } else if 'A' <= c && c <= 'F' {
-                                       x = 16*x + rune(c) - 'A' + 10
-                                       continue
-                               }
-                       } else if '0' <= c && c <= '9' {
-                               x = 10*x + rune(c) - '0'
-                               continue
-                       }
-                       if c != ';' {
-                               i--
-                       }
-                       break
-               }
-
-               if i <= 3 { // No characters matched.
-                       b[dst] = b[src]
-                       return dst + 1, src + 1
-               }
-
-               if 0x80 <= x && x <= 0x9F {
-                       // Replace characters from Windows-1252 with UTF-8 equivalents.
-                       x = replacementTable[x-0x80]
-               } else if x == 0 || (0xD800 <= x && x <= 0xDFFF) || x > 0x10FFFF {
-                       // Replace invalid characters with the replacement character.
-                       x = '\uFFFD'
-               }
-
-               return dst + utf8.EncodeRune(b[dst:], x), src + i
-       }
-
-       // Consume the maximum number of characters possible, with the
-       // consumed characters matching one of the named references.
-
-       for i < len(s) {
-               c := s[i]
-               i++
-               // Lower-cased characters are more common in entities, so we check for them first.
-               if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' {
-                       continue
-               }
-               if c != ';' {
-                       i--
-               }
-               break
-       }
-
-       entityName := string(s[1:i])
-       if entityName == "" {
-               // No-op.
-       } else if attribute && entityName[len(entityName)-1] != ';' && len(s) > i && s[i] == '=' {
-               // No-op.
-       } else if x := entity[entityName]; x != 0 {
-               return dst + utf8.EncodeRune(b[dst:], x), src + i
-       } else if x := entity2[entityName]; x[0] != 0 {
-               dst1 := dst + utf8.EncodeRune(b[dst:], x[0])
-               return dst1 + utf8.EncodeRune(b[dst1:], x[1]), src + i
-       } else if !attribute {
-               maxLen := len(entityName) - 1
-               if maxLen > longestEntityWithoutSemicolon {
-                       maxLen = longestEntityWithoutSemicolon
-               }
-               for j := maxLen; j > 1; j-- {
-                       if x := entity[entityName[:j]]; x != 0 {
-                               return dst + utf8.EncodeRune(b[dst:], x), src + j + 1
-                       }
-               }
-       }
-
-       dst1, src1 = dst+i, src+i
-       copy(b[dst:dst1], b[src:src1])
-       return dst1, src1
-}
-
-// unescape unescapes b's entities in-place, so that "a&lt;b" becomes "a<b".
-// attribute should be true if parsing an attribute value.
-func unescape(b []byte, attribute bool) []byte {
-       for i, c := range b {
-               if c == '&' {
-                       dst, src := unescapeEntity(b, i, i, attribute)
-                       for src < len(b) {
-                               c := b[src]
-                               if c == '&' {
-                                       dst, src = unescapeEntity(b, dst, src, attribute)
-                               } else {
-                                       b[dst] = c
-                                       dst, src = dst+1, src+1
-                               }
-                       }
-                       return b[0:dst]
-               }
-       }
-       return b
-}
-
-// lower lower-cases the A-Z bytes in b in-place, so that "aBc" becomes "abc".
-func lower(b []byte) []byte {
-       for i, c := range b {
-               if 'A' <= c && c <= 'Z' {
-                       b[i] = c + 'a' - 'A'
-               }
-       }
-       return b
-}
-
-// escapeComment is like func escape but escapes its input bytes less often.
-// Per https://github.com/golang/go/issues/58246 some HTML comments are (1)
-// meaningful and (2) contain angle brackets that we'd like to avoid escaping
-// unless we have to.
-//
-// "We have to" includes the '&' byte, since that introduces other escapes.
-//
-// It also includes those bytes (not including EOF) that would otherwise end
-// the comment. Per the summary table at the bottom of comment_test.go, this is
-// the '>' byte that, per above, we'd like to avoid escaping unless we have to.
-//
-// Studying the summary table (and T actions in its '>' column) closely, we
-// only need to escape in states 43, 44, 49, 51 and 52. State 43 is at the
-// start of the comment data. State 52 is after a '!'. The other three states
-// are after a '-'.
-//
-// Our algorithm is thus to escape every '&' and to escape '>' if and only if:
-//   - The '>' is after a '!' or '-' (in the unescaped data) or
-//   - The '>' is at the start of the comment data (after the opening "<!--").
-func escapeComment(w writer, s string) error {
-       // When modifying this function, consider manually increasing the
-       // maxSuffixLen constant in func TestComments, from 6 to e.g. 9 or more.
-       // That increase should only be temporary, not committed, as it
-       // exponentially affects the test running time.
-
-       if len(s) == 0 {
-               return nil
-       }
-
-       // Loop:
-       //   - Grow j such that s[i:j] does not need escaping.
-       //   - If s[j] does need escaping, output s[i:j] and an escaped s[j],
-       //     resetting i and j to point past that s[j] byte.
-       i := 0
-       for j := 0; j < len(s); j++ {
-               escaped := ""
-               switch s[j] {
-               case '&':
-                       escaped = "&amp;"
-
-               case '>':
-                       if j > 0 {
-                               if prev := s[j-1]; (prev != '!') && (prev != '-') {
-                                       continue
-                               }
-                       }
-                       escaped = "&gt;"
-
-               default:
-                       continue
-               }
-
-               if i < j {
-                       if _, err := w.WriteString(s[i:j]); err != nil {
-                               return err
-                       }
-               }
-               if _, err := w.WriteString(escaped); err != nil {
-                       return err
-               }
-               i = j + 1
-       }
-
-       if i < len(s) {
-               if _, err := w.WriteString(s[i:]); err != nil {
-                       return err
-               }
-       }
-       return nil
-}
-
-// escapeCommentString is to EscapeString as escapeComment is to escape.
-func escapeCommentString(s string) string {
-       if strings.IndexAny(s, "&>") == -1 {
-               return s
-       }
-       var buf bytes.Buffer
-       escapeComment(&buf, s)
-       return buf.String()
-}
-
-const escapedChars = "&'<>\"\r"
-
-func escape(w writer, s string) error {
-       i := strings.IndexAny(s, escapedChars)
-       for i != -1 {
-               if _, err := w.WriteString(s[:i]); err != nil {
-                       return err
-               }
-               var esc string
-               switch s[i] {
-               case '&':
-                       esc = "&amp;"
-               case '\'':
-                       // "&#39;" is shorter than "&apos;" and apos was not in HTML until HTML5.
-                       esc = "&#39;"
-               case '<':
-                       esc = "&lt;"
-               case '>':
-                       esc = "&gt;"
-               case '"':
-                       // "&#34;" is shorter than "&quot;".
-                       esc = "&#34;"
-               case '\r':
-                       esc = "&#13;"
-               default:
-                       panic("unrecognized escape character")
-               }
-               s = s[i+1:]
-               if _, err := w.WriteString(esc); err != nil {
-                       return err
-               }
-               i = strings.IndexAny(s, escapedChars)
-       }
-       _, err := w.WriteString(s)
-       return err
-}
-
-// EscapeString escapes special characters like "<" to become "&lt;". It
-// escapes only five such characters: <, >, &, ' and ".
-// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't
-// always true.
-func EscapeString(s string) string {
-       if strings.IndexAny(s, escapedChars) == -1 {
-               return s
-       }
-       var buf bytes.Buffer
-       escape(&buf, s)
-       return buf.String()
-}
-
-// UnescapeString unescapes entities like "&lt;" to become "<". It unescapes a
-// larger range of entities than EscapeString escapes. For example, "&aacute;"
-// unescapes to "á", as does "&#225;" and "&xE1;".
-// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't
-// always true.
-func UnescapeString(s string) string {
-       for _, c := range s {
-               if c == '&' {
-                       return string(unescape([]byte(s), false))
-               }
-       }
-       return s
-}
diff --git a/html/escape_test.go b/html/escape_test.go
deleted file mode 100644 (file)
index b405d4b..0000000
+++ /dev/null
@@ -1,97 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package html
-
-import "testing"
-
-type unescapeTest struct {
-       // A short description of the test case.
-       desc string
-       // The HTML text.
-       html string
-       // The unescaped text.
-       unescaped string
-}
-
-var unescapeTests = []unescapeTest{
-       // Handle no entities.
-       {
-               "copy",
-               "A\ttext\nstring",
-               "A\ttext\nstring",
-       },
-       // Handle simple named entities.
-       {
-               "simple",
-               "&amp; &gt; &lt;",
-               "& > <",
-       },
-       // Handle hitting the end of the string.
-       {
-               "stringEnd",
-               "&amp &amp",
-               "& &",
-       },
-       // Handle entities with two codepoints.
-       {
-               "multiCodepoint",
-               "text &gesl; blah",
-               "text \u22db\ufe00 blah",
-       },
-       // Handle decimal numeric entities.
-       {
-               "decimalEntity",
-               "Delta = &#916; ",
-               "Delta = Δ ",
-       },
-       // Handle hexadecimal numeric entities.
-       {
-               "hexadecimalEntity",
-               "Lambda = &#x3bb; = &#X3Bb ",
-               "Lambda = λ = λ ",
-       },
-       // Handle numeric early termination.
-       {
-               "numericEnds",
-               "&# &#x &#128;43 &copy = &#169f = &#xa9",
-               "&# &#x €43 © = ©f = ©",
-       },
-       // Handle numeric ISO-8859-1 entity replacements.
-       {
-               "numericReplacements",
-               "Footnote&#x87;",
-               "Footnote‡",
-       },
-}
-
-func TestUnescape(t *testing.T) {
-       for _, tt := range unescapeTests {
-               unescaped := UnescapeString(tt.html)
-               if unescaped != tt.unescaped {
-                       t.Errorf("TestUnescape %s: want %q, got %q", tt.desc, tt.unescaped, unescaped)
-               }
-       }
-}
-
-func TestUnescapeEscape(t *testing.T) {
-       ss := []string{
-               ``,
-               `abc def`,
-               `a & b`,
-               `a&amp;b`,
-               `a &amp b`,
-               `&quot;`,
-               `"`,
-               `"<&>"`,
-               `&quot;&lt;&amp;&gt;&quot;`,
-               `3&5==1 && 0<1, "0&lt;1", a+acute=&aacute;`,
-               `The special characters are: <, >, &, ' and "`,
-       }
-       for _, s := range ss {
-               if got := UnescapeString(EscapeString(s)); got != s {
-                       t.Errorf("got %q want %q", got, s)
-               }
-       }
-}
diff --git a/html/example_test.go b/html/example_test.go
deleted file mode 100644 (file)
index bb428d1..0000000
+++ /dev/null
@@ -1,40 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// This example demonstrates parsing HTML data and walking the resulting tree.
-package html_test
-
-import (
-       "fmt"
-       "log"
-       "strings"
-
-       "git.earlybird.gay/today-engine/html"
-)
-
-func ExampleParse() {
-       s := `<p>Links:</p><ul><li><a href="foo">Foo</a><li><a href="/bar/baz">BarBaz</a></ul>`
-       doc, err := html.Parse(strings.NewReader(s))
-       if err != nil {
-               log.Fatal(err)
-       }
-       var f func(*html.Node)
-       f = func(n *html.Node) {
-               if n.Type == html.ElementNode && n.Data == "a" {
-                       for _, a := range n.Attr {
-                               if a.Key == "href" {
-                                       fmt.Println(a.Val)
-                                       break
-                               }
-                       }
-               }
-               for c := n.FirstChild; c != nil; c = c.NextSibling {
-                       f(c)
-               }
-       }
-       f(doc)
-       // Output:
-       // foo
-       // /bar/baz
-}
diff --git a/html/foreign.go b/html/foreign.go
deleted file mode 100644 (file)
index 9da9e9d..0000000
+++ /dev/null
@@ -1,222 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package html
-
-import (
-       "strings"
-)
-
-func adjustAttributeNames(aa []Attribute, nameMap map[string]string) {
-       for i := range aa {
-               if newName, ok := nameMap[aa[i].Key]; ok {
-                       aa[i].Key = newName
-               }
-       }
-}
-
-func adjustForeignAttributes(aa []Attribute) {
-       for i, a := range aa {
-               if a.Key == "" || a.Key[0] != 'x' {
-                       continue
-               }
-               switch a.Key {
-               case "xlink:actuate", "xlink:arcrole", "xlink:href", "xlink:role", "xlink:show",
-                       "xlink:title", "xlink:type", "xml:base", "xml:lang", "xml:space", "xmlns:xlink":
-                       j := strings.Index(a.Key, ":")
-                       aa[i].Namespace = a.Key[:j]
-                       aa[i].Key = a.Key[j+1:]
-               }
-       }
-}
-
-func htmlIntegrationPoint(n *Node) bool {
-       if n.Type != ElementNode {
-               return false
-       }
-       switch n.Namespace {
-       case "math":
-               if n.Data == "annotation-xml" {
-                       for _, a := range n.Attr {
-                               if a.Key == "encoding" {
-                                       val := strings.ToLower(a.Val)
-                                       if val == "text/html" || val == "application/xhtml+xml" {
-                                               return true
-                                       }
-                               }
-                       }
-               }
-       case "svg":
-               switch n.Data {
-               case "desc", "foreignObject", "title":
-                       return true
-               }
-       }
-       return false
-}
-
-func mathMLTextIntegrationPoint(n *Node) bool {
-       if n.Namespace != "math" {
-               return false
-       }
-       switch n.Data {
-       case "mi", "mo", "mn", "ms", "mtext":
-               return true
-       }
-       return false
-}
-
-// Section 12.2.6.5.
-var breakout = map[string]bool{
-       "b":          true,
-       "big":        true,
-       "blockquote": true,
-       "body":       true,
-       "br":         true,
-       "center":     true,
-       "code":       true,
-       "dd":         true,
-       "div":        true,
-       "dl":         true,
-       "dt":         true,
-       "em":         true,
-       "embed":      true,
-       "h1":         true,
-       "h2":         true,
-       "h3":         true,
-       "h4":         true,
-       "h5":         true,
-       "h6":         true,
-       "head":       true,
-       "hr":         true,
-       "i":          true,
-       "img":        true,
-       "li":         true,
-       "listing":    true,
-       "menu":       true,
-       "meta":       true,
-       "nobr":       true,
-       "ol":         true,
-       "p":          true,
-       "pre":        true,
-       "ruby":       true,
-       "s":          true,
-       "small":      true,
-       "span":       true,
-       "strong":     true,
-       "strike":     true,
-       "sub":        true,
-       "sup":        true,
-       "table":      true,
-       "tt":         true,
-       "u":          true,
-       "ul":         true,
-       "var":        true,
-}
-
-// Section 12.2.6.5.
-var svgTagNameAdjustments = map[string]string{
-       "altglyph":            "altGlyph",
-       "altglyphdef":         "altGlyphDef",
-       "altglyphitem":        "altGlyphItem",
-       "animatecolor":        "animateColor",
-       "animatemotion":       "animateMotion",
-       "animatetransform":    "animateTransform",
-       "clippath":            "clipPath",
-       "feblend":             "feBlend",
-       "fecolormatrix":       "feColorMatrix",
-       "fecomponenttransfer": "feComponentTransfer",
-       "fecomposite":         "feComposite",
-       "feconvolvematrix":    "feConvolveMatrix",
-       "fediffuselighting":   "feDiffuseLighting",
-       "fedisplacementmap":   "feDisplacementMap",
-       "fedistantlight":      "feDistantLight",
-       "feflood":             "feFlood",
-       "fefunca":             "feFuncA",
-       "fefuncb":             "feFuncB",
-       "fefuncg":             "feFuncG",
-       "fefuncr":             "feFuncR",
-       "fegaussianblur":      "feGaussianBlur",
-       "feimage":             "feImage",
-       "femerge":             "feMerge",
-       "femergenode":         "feMergeNode",
-       "femorphology":        "feMorphology",
-       "feoffset":            "feOffset",
-       "fepointlight":        "fePointLight",
-       "fespecularlighting":  "feSpecularLighting",
-       "fespotlight":         "feSpotLight",
-       "fetile":              "feTile",
-       "feturbulence":        "feTurbulence",
-       "foreignobject":       "foreignObject",
-       "glyphref":            "glyphRef",
-       "lineargradient":      "linearGradient",
-       "radialgradient":      "radialGradient",
-       "textpath":            "textPath",
-}
-
-// Section 12.2.6.1
-var mathMLAttributeAdjustments = map[string]string{
-       "definitionurl": "definitionURL",
-}
-
-var svgAttributeAdjustments = map[string]string{
-       "attributename":       "attributeName",
-       "attributetype":       "attributeType",
-       "basefrequency":       "baseFrequency",
-       "baseprofile":         "baseProfile",
-       "calcmode":            "calcMode",
-       "clippathunits":       "clipPathUnits",
-       "diffuseconstant":     "diffuseConstant",
-       "edgemode":            "edgeMode",
-       "filterunits":         "filterUnits",
-       "glyphref":            "glyphRef",
-       "gradienttransform":   "gradientTransform",
-       "gradientunits":       "gradientUnits",
-       "kernelmatrix":        "kernelMatrix",
-       "kernelunitlength":    "kernelUnitLength",
-       "keypoints":           "keyPoints",
-       "keysplines":          "keySplines",
-       "keytimes":            "keyTimes",
-       "lengthadjust":        "lengthAdjust",
-       "limitingconeangle":   "limitingConeAngle",
-       "markerheight":        "markerHeight",
-       "markerunits":         "markerUnits",
-       "markerwidth":         "markerWidth",
-       "maskcontentunits":    "maskContentUnits",
-       "maskunits":           "maskUnits",
-       "numoctaves":          "numOctaves",
-       "pathlength":          "pathLength",
-       "patterncontentunits": "patternContentUnits",
-       "patterntransform":    "patternTransform",
-       "patternunits":        "patternUnits",
-       "pointsatx":           "pointsAtX",
-       "pointsaty":           "pointsAtY",
-       "pointsatz":           "pointsAtZ",
-       "preservealpha":       "preserveAlpha",
-       "preserveaspectratio": "preserveAspectRatio",
-       "primitiveunits":      "primitiveUnits",
-       "refx":                "refX",
-       "refy":                "refY",
-       "repeatcount":         "repeatCount",
-       "repeatdur":           "repeatDur",
-       "requiredextensions":  "requiredExtensions",
-       "requiredfeatures":    "requiredFeatures",
-       "specularconstant":    "specularConstant",
-       "specularexponent":    "specularExponent",
-       "spreadmethod":        "spreadMethod",
-       "startoffset":         "startOffset",
-       "stddeviation":        "stdDeviation",
-       "stitchtiles":         "stitchTiles",
-       "surfacescale":        "surfaceScale",
-       "systemlanguage":      "systemLanguage",
-       "tablevalues":         "tableValues",
-       "targetx":             "targetX",
-       "targety":             "targetY",
-       "textlength":          "textLength",
-       "viewbox":             "viewBox",
-       "viewtarget":          "viewTarget",
-       "xchannelselector":    "xChannelSelector",
-       "ychannelselector":    "yChannelSelector",
-       "zoomandpan":          "zoomAndPan",
-}
diff --git a/html/node.go b/html/node.go
deleted file mode 100644 (file)
index 4a110ab..0000000
+++ /dev/null
@@ -1,225 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package html
-
-import (
-       "git.earlybird.gay/today-engine/html/atom"
-)
-
-// A NodeType is the type of a Node.
-type NodeType uint32
-
-const (
-       ErrorNode NodeType = iota
-       TextNode
-       DocumentNode
-       ElementNode
-       CommentNode
-       DoctypeNode
-       // RawNode nodes are not returned by the parser, but can be part of the
-       // Node tree passed to func Render to insert raw HTML (without escaping).
-       // If so, this package makes no guarantee that the rendered HTML is secure
-       // (from e.g. Cross Site Scripting attacks) or well-formed.
-       RawNode
-       scopeMarkerNode
-)
-
-// Section 12.2.4.3 says "The markers are inserted when entering applet,
-// object, marquee, template, td, th, and caption elements, and are used
-// to prevent formatting from "leaking" into applet, object, marquee,
-// template, td, th, and caption elements".
-var scopeMarker = Node{Type: scopeMarkerNode}
-
-// A Node consists of a NodeType and some Data (tag name for element nodes,
-// content for text) and are part of a tree of Nodes. Element nodes may also
-// have a Namespace and contain a slice of Attributes. Data is unescaped, so
-// that it looks like "a<b" rather than "a&lt;b". For element nodes, DataAtom
-// is the atom for Data, or zero if Data is not a known tag name.
-//
-// An empty Namespace implies a "http://www.w3.org/1999/xhtml" namespace.
-// Similarly, "math" is short for "http://www.w3.org/1998/Math/MathML", and
-// "svg" is short for "http://www.w3.org/2000/svg".
-type Node struct {
-       Parent, FirstChild, LastChild, PrevSibling, NextSibling *Node
-
-       Type      NodeType
-       DataAtom  atom.Atom
-       Data      string
-       Namespace string
-       Attr      []Attribute
-}
-
-// InsertBefore inserts newChild as a child of n, immediately before oldChild
-// in the sequence of n's children. oldChild may be nil, in which case newChild
-// is appended to the end of n's children.
-//
-// It will panic if newChild already has a parent or siblings.
-func (n *Node) InsertBefore(newChild, oldChild *Node) {
-       if newChild.Parent != nil || newChild.PrevSibling != nil || newChild.NextSibling != nil {
-               panic("html: InsertBefore called for an attached child Node")
-       }
-       var prev, next *Node
-       if oldChild != nil {
-               prev, next = oldChild.PrevSibling, oldChild
-       } else {
-               prev = n.LastChild
-       }
-       if prev != nil {
-               prev.NextSibling = newChild
-       } else {
-               n.FirstChild = newChild
-       }
-       if next != nil {
-               next.PrevSibling = newChild
-       } else {
-               n.LastChild = newChild
-       }
-       newChild.Parent = n
-       newChild.PrevSibling = prev
-       newChild.NextSibling = next
-}
-
-// AppendChild adds a node c as a child of n.
-//
-// It will panic if c already has a parent or siblings.
-func (n *Node) AppendChild(c *Node) {
-       if c.Parent != nil || c.PrevSibling != nil || c.NextSibling != nil {
-               panic("html: AppendChild called for an attached child Node")
-       }
-       last := n.LastChild
-       if last != nil {
-               last.NextSibling = c
-       } else {
-               n.FirstChild = c
-       }
-       n.LastChild = c
-       c.Parent = n
-       c.PrevSibling = last
-}
-
-// RemoveChild removes a node c that is a child of n. Afterwards, c will have
-// no parent and no siblings.
-//
-// It will panic if c's parent is not n.
-func (n *Node) RemoveChild(c *Node) {
-       if c.Parent != n {
-               panic("html: RemoveChild called for a non-child Node")
-       }
-       if n.FirstChild == c {
-               n.FirstChild = c.NextSibling
-       }
-       if c.NextSibling != nil {
-               c.NextSibling.PrevSibling = c.PrevSibling
-       }
-       if n.LastChild == c {
-               n.LastChild = c.PrevSibling
-       }
-       if c.PrevSibling != nil {
-               c.PrevSibling.NextSibling = c.NextSibling
-       }
-       c.Parent = nil
-       c.PrevSibling = nil
-       c.NextSibling = nil
-}
-
-// reparentChildren reparents all of src's child nodes to dst.
-func reparentChildren(dst, src *Node) {
-       for {
-               child := src.FirstChild
-               if child == nil {
-                       break
-               }
-               src.RemoveChild(child)
-               dst.AppendChild(child)
-       }
-}
-
-// clone returns a new node with the same type, data and attributes.
-// The clone has no parent, no siblings and no children.
-func (n *Node) clone() *Node {
-       m := &Node{
-               Type:     n.Type,
-               DataAtom: n.DataAtom,
-               Data:     n.Data,
-               Attr:     make([]Attribute, len(n.Attr)),
-       }
-       copy(m.Attr, n.Attr)
-       return m
-}
-
-// nodeStack is a stack of nodes.
-type nodeStack []*Node
-
-// pop pops the stack. It will panic if s is empty.
-func (s *nodeStack) pop() *Node {
-       i := len(*s)
-       n := (*s)[i-1]
-       *s = (*s)[:i-1]
-       return n
-}
-
-// top returns the most recently pushed node, or nil if s is empty.
-func (s *nodeStack) top() *Node {
-       if i := len(*s); i > 0 {
-               return (*s)[i-1]
-       }
-       return nil
-}
-
-// index returns the index of the top-most occurrence of n in the stack, or -1
-// if n is not present.
-func (s *nodeStack) index(n *Node) int {
-       for i := len(*s) - 1; i >= 0; i-- {
-               if (*s)[i] == n {
-                       return i
-               }
-       }
-       return -1
-}
-
-// contains returns whether a is within s.
-func (s *nodeStack) contains(a atom.Atom) bool {
-       for _, n := range *s {
-               if n.DataAtom == a && n.Namespace == "" {
-                       return true
-               }
-       }
-       return false
-}
-
-// insert inserts a node at the given index.
-func (s *nodeStack) insert(i int, n *Node) {
-       (*s) = append(*s, nil)
-       copy((*s)[i+1:], (*s)[i:])
-       (*s)[i] = n
-}
-
-// remove removes a node from the stack. It is a no-op if n is not present.
-func (s *nodeStack) remove(n *Node) {
-       i := s.index(n)
-       if i == -1 {
-               return
-       }
-       copy((*s)[i:], (*s)[i+1:])
-       j := len(*s) - 1
-       (*s)[j] = nil
-       *s = (*s)[:j]
-}
-
-type insertionModeStack []insertionMode
-
-func (s *insertionModeStack) pop() (im insertionMode) {
-       i := len(*s)
-       im = (*s)[i-1]
-       *s = (*s)[:i-1]
-       return im
-}
-
-func (s *insertionModeStack) top() insertionMode {
-       if i := len(*s); i > 0 {
-               return (*s)[i-1]
-       }
-       return nil
-}
diff --git a/html/node_test.go b/html/node_test.go
deleted file mode 100644 (file)
index 471102f..0000000
+++ /dev/null
@@ -1,146 +0,0 @@
-// Copyright 2010 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package html
-
-import (
-       "fmt"
-)
-
-// checkTreeConsistency checks that a node and its descendants are all
-// consistent in their parent/child/sibling relationships.
-func checkTreeConsistency(n *Node) error {
-       return checkTreeConsistency1(n, 0)
-}
-
-func checkTreeConsistency1(n *Node, depth int) error {
-       if depth == 1e4 {
-               return fmt.Errorf("html: tree looks like it contains a cycle")
-       }
-       if err := checkNodeConsistency(n); err != nil {
-               return err
-       }
-       for c := n.FirstChild; c != nil; c = c.NextSibling {
-               if err := checkTreeConsistency1(c, depth+1); err != nil {
-                       return err
-               }
-       }
-       return nil
-}
-
-// checkNodeConsistency checks that a node's parent/child/sibling relationships
-// are consistent.
-func checkNodeConsistency(n *Node) error {
-       if n == nil {
-               return nil
-       }
-
-       nParent := 0
-       for p := n.Parent; p != nil; p = p.Parent {
-               nParent++
-               if nParent == 1e4 {
-                       return fmt.Errorf("html: parent list looks like an infinite loop")
-               }
-       }
-
-       nForward := 0
-       for c := n.FirstChild; c != nil; c = c.NextSibling {
-               nForward++
-               if nForward == 1e6 {
-                       return fmt.Errorf("html: forward list of children looks like an infinite loop")
-               }
-               if c.Parent != n {
-                       return fmt.Errorf("html: inconsistent child/parent relationship")
-               }
-       }
-
-       nBackward := 0
-       for c := n.LastChild; c != nil; c = c.PrevSibling {
-               nBackward++
-               if nBackward == 1e6 {
-                       return fmt.Errorf("html: backward list of children looks like an infinite loop")
-               }
-               if c.Parent != n {
-                       return fmt.Errorf("html: inconsistent child/parent relationship")
-               }
-       }
-
-       if n.Parent != nil {
-               if n.Parent == n {
-                       return fmt.Errorf("html: inconsistent parent relationship")
-               }
-               if n.Parent == n.FirstChild {
-                       return fmt.Errorf("html: inconsistent parent/first relationship")
-               }
-               if n.Parent == n.LastChild {
-                       return fmt.Errorf("html: inconsistent parent/last relationship")
-               }
-               if n.Parent == n.PrevSibling {
-                       return fmt.Errorf("html: inconsistent parent/prev relationship")
-               }
-               if n.Parent == n.NextSibling {
-                       return fmt.Errorf("html: inconsistent parent/next relationship")
-               }
-
-               parentHasNAsAChild := false
-               for c := n.Parent.FirstChild; c != nil; c = c.NextSibling {
-                       if c == n {
-                               parentHasNAsAChild = true
-                               break
-                       }
-               }
-               if !parentHasNAsAChild {
-                       return fmt.Errorf("html: inconsistent parent/child relationship")
-               }
-       }
-
-       if n.PrevSibling != nil && n.PrevSibling.NextSibling != n {
-               return fmt.Errorf("html: inconsistent prev/next relationship")
-       }
-       if n.NextSibling != nil && n.NextSibling.PrevSibling != n {
-               return fmt.Errorf("html: inconsistent next/prev relationship")
-       }
-
-       if (n.FirstChild == nil) != (n.LastChild == nil) {
-               return fmt.Errorf("html: inconsistent first/last relationship")
-       }
-       if n.FirstChild != nil && n.FirstChild == n.LastChild {
-               // We have a sole child.
-               if n.FirstChild.PrevSibling != nil || n.FirstChild.NextSibling != nil {
-                       return fmt.Errorf("html: inconsistent sole child's sibling relationship")
-               }
-       }
-
-       seen := map[*Node]bool{}
-
-       var last *Node
-       for c := n.FirstChild; c != nil; c = c.NextSibling {
-               if seen[c] {
-                       return fmt.Errorf("html: inconsistent repeated child")
-               }
-               seen[c] = true
-               last = c
-       }
-       if last != n.LastChild {
-               return fmt.Errorf("html: inconsistent last relationship")
-       }
-
-       var first *Node
-       for c := n.LastChild; c != nil; c = c.PrevSibling {
-               if !seen[c] {
-                       return fmt.Errorf("html: inconsistent missing child")
-               }
-               delete(seen, c)
-               first = c
-       }
-       if first != n.FirstChild {
-               return fmt.Errorf("html: inconsistent first relationship")
-       }
-
-       if len(seen) != 0 {
-               return fmt.Errorf("html: inconsistent forwards/backwards child list")
-       }
-
-       return nil
-}
diff --git a/html/parse.go b/html/parse.go
deleted file mode 100644 (file)
index d813c7b..0000000
+++ /dev/null
@@ -1,2454 +0,0 @@
-// Copyright 2010 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package html
-
-import (
-       "errors"
-       "fmt"
-       "io"
-       "strings"
-
-       a "git.earlybird.gay/today-engine/html/atom"
-)
-
-// A parser implements the HTML5 parsing algorithm:
-// https://html.spec.whatwg.org/multipage/syntax.html#tree-construction
-type parser struct {
-       // tokenizer provides the tokens for the parser.
-       tokenizer *Tokenizer
-       // tok is the most recently read token.
-       tok Token
-       // Self-closing tags like <hr/> are treated as start tags, except that
-       // hasSelfClosingToken is set while they are being processed.
-       hasSelfClosingToken bool
-       // doc is the document root element.
-       doc *Node
-       // The stack of open elements (section 12.2.4.2) and active formatting
-       // elements (section 12.2.4.3).
-       oe, afe nodeStack
-       // Element pointers (section 12.2.4.4).
-       head, form *Node
-       // Other parsing state flags (section 12.2.4.5).
-       scripting, framesetOK bool
-       // The stack of template insertion modes
-       templateStack insertionModeStack
-       // im is the current insertion mode.
-       im insertionMode
-       // originalIM is the insertion mode to go back to after completing a text
-       // or inTableText insertion mode.
-       originalIM insertionMode
-       // fosterParenting is whether new elements should be inserted according to
-       // the foster parenting rules (section 12.2.6.1).
-       fosterParenting bool
-       // quirks is whether the parser is operating in "quirks mode."
-       quirks bool
-       // fragment is whether the parser is parsing an HTML fragment.
-       fragment bool
-       // context is the context element when parsing an HTML fragment
-       // (section 12.4).
-       context *Node
-}
-
-func (p *parser) top() *Node {
-       if n := p.oe.top(); n != nil {
-               return n
-       }
-       return p.doc
-}
-
-// Stop tags for use in popUntil. These come from section 12.2.4.2.
-var (
-       defaultScopeStopTags = map[string][]a.Atom{
-               "":     {a.Applet, a.Caption, a.Html, a.Table, a.Td, a.Th, a.Marquee, a.Object, a.Template},
-               "math": {a.AnnotationXml, a.Mi, a.Mn, a.Mo, a.Ms, a.Mtext},
-               "svg":  {a.Desc, a.ForeignObject, a.Title},
-       }
-)
-
-type scope int
-
-const (
-       defaultScope scope = iota
-       listItemScope
-       buttonScope
-       tableScope
-       tableRowScope
-       tableBodyScope
-       selectScope
-)
-
-// popUntil pops the stack of open elements at the highest element whose tag
-// is in matchTags, provided there is no higher element in the scope's stop
-// tags (as defined in section 12.2.4.2). It returns whether or not there was
-// such an element. If there was not, popUntil leaves the stack unchanged.
-//
-// For example, the set of stop tags for table scope is: "html", "table". If
-// the stack was:
-// ["html", "body", "font", "table", "b", "i", "u"]
-// then popUntil(tableScope, "font") would return false, but
-// popUntil(tableScope, "i") would return true and the stack would become:
-// ["html", "body", "font", "table", "b"]
-//
-// If an element's tag is in both the stop tags and matchTags, then the stack
-// will be popped and the function returns true (provided, of course, there was
-// no higher element in the stack that was also in the stop tags). For example,
-// popUntil(tableScope, "table") returns true and leaves:
-// ["html", "body", "font"]
-func (p *parser) popUntil(s scope, matchTags ...a.Atom) bool {
-       if i := p.indexOfElementInScope(s, matchTags...); i != -1 {
-               p.oe = p.oe[:i]
-               return true
-       }
-       return false
-}
-
-// indexOfElementInScope returns the index in p.oe of the highest element whose
-// tag is in matchTags that is in scope. If no matching element is in scope, it
-// returns -1.
-func (p *parser) indexOfElementInScope(s scope, matchTags ...a.Atom) int {
-       for i := len(p.oe) - 1; i >= 0; i-- {
-               tagAtom := p.oe[i].DataAtom
-               if p.oe[i].Namespace == "" {
-                       for _, t := range matchTags {
-                               if t == tagAtom {
-                                       return i
-                               }
-                       }
-                       switch s {
-                       case defaultScope:
-                               // No-op.
-                       case listItemScope:
-                               if tagAtom == a.Ol || tagAtom == a.Ul {
-                                       return -1
-                               }
-                       case buttonScope:
-                               if tagAtom == a.Button {
-                                       return -1
-                               }
-                       case tableScope:
-                               if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template {
-                                       return -1
-                               }
-                       case selectScope:
-                               if tagAtom != a.Optgroup && tagAtom != a.Option {
-                                       return -1
-                               }
-                       default:
-                               panic("unreachable")
-                       }
-               }
-               switch s {
-               case defaultScope, listItemScope, buttonScope:
-                       for _, t := range defaultScopeStopTags[p.oe[i].Namespace] {
-                               if t == tagAtom {
-                                       return -1
-                               }
-                       }
-               }
-       }
-       return -1
-}
-
-// elementInScope is like popUntil, except that it doesn't modify the stack of
-// open elements.
-func (p *parser) elementInScope(s scope, matchTags ...a.Atom) bool {
-       return p.indexOfElementInScope(s, matchTags...) != -1
-}
-
-// clearStackToContext pops elements off the stack of open elements until a
-// scope-defined element is found.
-func (p *parser) clearStackToContext(s scope) {
-       for i := len(p.oe) - 1; i >= 0; i-- {
-               tagAtom := p.oe[i].DataAtom
-               switch s {
-               case tableScope:
-                       if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template {
-                               p.oe = p.oe[:i+1]
-                               return
-                       }
-               case tableRowScope:
-                       if tagAtom == a.Html || tagAtom == a.Tr || tagAtom == a.Template {
-                               p.oe = p.oe[:i+1]
-                               return
-                       }
-               case tableBodyScope:
-                       if tagAtom == a.Html || tagAtom == a.Tbody || tagAtom == a.Tfoot || tagAtom == a.Thead || tagAtom == a.Template {
-                               p.oe = p.oe[:i+1]
-                               return
-                       }
-               default:
-                       panic("unreachable")
-               }
-       }
-}
-
-// parseGenericRawTextElement implements the generic raw text element parsing
-// algorithm defined in 12.2.6.2.
-// https://html.spec.whatwg.org/multipage/parsing.html#parsing-elements-that-contain-only-text
-// TODO: Since both RAWTEXT and RCDATA states are treated as tokenizer's part
-// officially, need to make tokenizer consider both states.
-func (p *parser) parseGenericRawTextElement() {
-       p.addElement()
-       p.originalIM = p.im
-       p.im = textIM
-}
-
-// generateImpliedEndTags pops nodes off the stack of open elements as long as
-// the top node has a tag name of dd, dt, li, optgroup, option, p, rb, rp, rt or rtc.
-// If exceptions are specified, nodes with that name will not be popped off.
-func (p *parser) generateImpliedEndTags(exceptions ...string) {
-       var i int
-loop:
-       for i = len(p.oe) - 1; i >= 0; i-- {
-               n := p.oe[i]
-               if n.Type != ElementNode {
-                       break
-               }
-               switch n.DataAtom {
-               case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc:
-                       for _, except := range exceptions {
-                               if n.Data == except {
-                                       break loop
-                               }
-                       }
-                       continue
-               }
-               break
-       }
-
-       p.oe = p.oe[:i+1]
-}
-
-// addChild adds a child node n to the top element, and pushes n onto the stack
-// of open elements if it is an element node.
-func (p *parser) addChild(n *Node) {
-       if p.shouldFosterParent() {
-               p.fosterParent(n)
-       } else {
-               p.top().AppendChild(n)
-       }
-
-       if n.Type == ElementNode {
-               p.oe = append(p.oe, n)
-       }
-}
-
-// shouldFosterParent returns whether the next node to be added should be
-// foster parented.
-func (p *parser) shouldFosterParent() bool {
-       return false
-}
-
-// fosterParent adds a child node according to the foster parenting rules.
-// Section 12.2.6.1, "foster parenting".
-func (p *parser) fosterParent(n *Node) {
-       var table, parent, prev, template *Node
-       var i int
-       for i = len(p.oe) - 1; i >= 0; i-- {
-               if p.oe[i].DataAtom == a.Table {
-                       table = p.oe[i]
-                       break
-               }
-       }
-
-       var j int
-       for j = len(p.oe) - 1; j >= 0; j-- {
-               if p.oe[j].DataAtom == a.Template {
-                       template = p.oe[j]
-                       break
-               }
-       }
-
-       if template != nil && (table == nil || j > i) {
-               template.AppendChild(n)
-               return
-       }
-
-       if table == nil {
-               // The foster parent is the html element.
-               parent = p.oe[0]
-       } else {
-               parent = table.Parent
-       }
-       if parent == nil {
-               parent = p.oe[i-1]
-       }
-
-       if table != nil {
-               prev = table.PrevSibling
-       } else {
-               prev = parent.LastChild
-       }
-       if prev != nil && prev.Type == TextNode && n.Type == TextNode {
-               prev.Data += n.Data
-               return
-       }
-
-       parent.InsertBefore(n, table)
-}
-
-// addText adds text to the preceding node if it is a text node, or else it
-// calls addChild with a new text node.
-func (p *parser) addText(text string) {
-       if text == "" {
-               return
-       }
-
-       if p.shouldFosterParent() {
-               p.fosterParent(&Node{
-                       Type: TextNode,
-                       Data: text,
-               })
-               return
-       }
-
-       t := p.top()
-       if n := t.LastChild; n != nil && n.Type == TextNode {
-               n.Data += text
-               return
-       }
-       p.addChild(&Node{
-               Type: TextNode,
-               Data: text,
-       })
-}
-
-// addElement adds a child element based on the current token.
-func (p *parser) addElement() {
-       p.addChild(&Node{
-               Type:     ElementNode,
-               DataAtom: p.tok.DataAtom,
-               Data:     p.tok.Data,
-               Attr:     p.tok.Attr,
-       })
-}
-
-// Section 12.2.4.3.
-func (p *parser) addFormattingElement() {
-       tagAtom, attr := p.tok.DataAtom, p.tok.Attr
-       p.addElement()
-
-       // Implement the Noah's Ark clause, but with three per family instead of two.
-       identicalElements := 0
-findIdenticalElements:
-       for i := len(p.afe) - 1; i >= 0; i-- {
-               n := p.afe[i]
-               if n.Type == scopeMarkerNode {
-                       break
-               }
-               if n.Type != ElementNode {
-                       continue
-               }
-               if n.Namespace != "" {
-                       continue
-               }
-               if n.DataAtom != tagAtom {
-                       continue
-               }
-               if len(n.Attr) != len(attr) {
-                       continue
-               }
-       compareAttributes:
-               for _, t0 := range n.Attr {
-                       for _, t1 := range attr {
-                               if t0.Key == t1.Key && t0.Namespace == t1.Namespace && t0.Val == t1.Val {
-                                       // Found a match for this attribute, continue with the next attribute.
-                                       continue compareAttributes
-                               }
-                       }
-                       // If we get here, there is no attribute that matches a.
-                       // Therefore the element is not identical to the new one.
-                       continue findIdenticalElements
-               }
-
-               identicalElements++
-               if identicalElements >= 3 {
-                       p.afe.remove(n)
-               }
-       }
-
-       p.afe = append(p.afe, p.top())
-}
-
-// Section 12.2.4.3.
-func (p *parser) clearActiveFormattingElements() {
-       for {
-               if n := p.afe.pop(); len(p.afe) == 0 || n.Type == scopeMarkerNode {
-                       return
-               }
-       }
-}
-
-// Section 12.2.4.3.
-func (p *parser) reconstructActiveFormattingElements() {
-       n := p.afe.top()
-       if n == nil {
-               return
-       }
-       if n.Type == scopeMarkerNode || p.oe.index(n) != -1 {
-               return
-       }
-       i := len(p.afe) - 1
-       for n.Type != scopeMarkerNode && p.oe.index(n) == -1 {
-               if i == 0 {
-                       i = -1
-                       break
-               }
-               i--
-               n = p.afe[i]
-       }
-       for {
-               i++
-               clone := p.afe[i].clone()
-               p.addChild(clone)
-               p.afe[i] = clone
-               if i == len(p.afe)-1 {
-                       break
-               }
-       }
-}
-
-// Section 12.2.5.
-func (p *parser) acknowledgeSelfClosingTag() {
-       p.hasSelfClosingToken = false
-}
-
-// An insertion mode (section 12.2.4.1) is the state transition function from
-// a particular state in the HTML5 parser's state machine. It updates the
-// parser's fields depending on parser.tok (where ErrorToken means EOF).
-// It returns whether the token was consumed.
-type insertionMode func(*parser) bool
-
-// setOriginalIM sets the insertion mode to return to after completing a text or
-// inTableText insertion mode.
-// Section 12.2.4.1, "using the rules for".
-func (p *parser) setOriginalIM() {
-       if p.originalIM != nil {
-               panic("html: bad parser state: originalIM was set twice")
-       }
-       p.originalIM = p.im
-}
-
-// Section 12.2.4.1, "reset the insertion mode".
-func (p *parser) resetInsertionMode() {
-       for i := len(p.oe) - 1; i >= 0; i-- {
-               n := p.oe[i]
-               last := i == 0
-               if last && p.context != nil {
-                       n = p.context
-               }
-
-               switch n.DataAtom {
-               case a.Select:
-                       if !last {
-                               for ancestor, first := n, p.oe[0]; ancestor != first; {
-                                       ancestor = p.oe[p.oe.index(ancestor)-1]
-                                       switch ancestor.DataAtom {
-                                       case a.Template:
-                                               p.im = inSelectIM
-                                               return
-                                       case a.Table:
-                                               p.im = inSelectInTableIM
-                                               return
-                                       }
-                               }
-                       }
-                       p.im = inSelectIM
-               case a.Td, a.Th:
-                       // TODO: remove this divergence from the HTML5 spec.
-                       //
-                       // See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
-                       p.im = inCellIM
-               case a.Tr:
-                       p.im = inRowIM
-               case a.Tbody, a.Thead, a.Tfoot:
-                       p.im = inTableBodyIM
-               case a.Caption:
-                       p.im = inCaptionIM
-               case a.Colgroup:
-                       p.im = inColumnGroupIM
-               case a.Table:
-                       p.im = inTableIM
-               case a.Template:
-                       // TODO: remove this divergence from the HTML5 spec.
-                       if n.Namespace != "" {
-                               continue
-                       }
-                       p.im = p.templateStack.top()
-               case a.Head:
-                       // TODO: remove this divergence from the HTML5 spec.
-                       //
-                       // See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
-                       p.im = inHeadIM
-               case a.Body:
-                       p.im = inBodyIM
-               case a.Frameset:
-                       p.im = inFramesetIM
-               case a.Html:
-                       if p.head == nil {
-                               p.im = beforeHeadIM
-                       } else {
-                               p.im = afterHeadIM
-                       }
-               default:
-                       if last {
-                               p.im = inBodyIM
-                               return
-                       }
-                       continue
-               }
-               return
-       }
-}
-
-const whitespace = " \t\r\n\f"
-
-// Section 12.2.6.4.1.
-func initialIM(p *parser) bool {
-       switch p.tok.Type {
-       case TextToken:
-               p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
-               if len(p.tok.Data) == 0 {
-                       // It was all whitespace, so ignore it.
-                       return true
-               }
-       case CommentToken:
-               p.doc.AppendChild(&Node{
-                       Type: CommentNode,
-                       Data: p.tok.Data,
-               })
-               return true
-       case DoctypeToken:
-               n, quirks := parseDoctype(p.tok.Data)
-               p.doc.AppendChild(n)
-               p.quirks = quirks
-               p.im = beforeHTMLIM
-               return true
-       }
-       p.quirks = true
-       p.im = beforeHTMLIM
-       return false
-}
-
-// Section 12.2.6.4.2.
-func beforeHTMLIM(p *parser) bool {
-       switch p.tok.Type {
-       case DoctypeToken:
-               // Ignore the token.
-               return true
-       case TextToken:
-               p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
-               if len(p.tok.Data) == 0 {
-                       // It was all whitespace, so ignore it.
-                       return true
-               }
-       case StartTagToken:
-               if p.tok.DataAtom == a.Html {
-                       p.addElement()
-                       p.im = beforeHeadIM
-                       return true
-               }
-       case EndTagToken:
-               switch p.tok.DataAtom {
-               case a.Head, a.Body, a.Html, a.Br:
-                       p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
-                       return false
-               default:
-                       // Ignore the token.
-                       return true
-               }
-       case CommentToken:
-               p.doc.AppendChild(&Node{
-                       Type: CommentNode,
-                       Data: p.tok.Data,
-               })
-               return true
-       }
-       p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
-       return false
-}
-
-// Section 12.2.6.4.3.
-func beforeHeadIM(p *parser) bool {
-       switch p.tok.Type {
-       case TextToken:
-               p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
-               if len(p.tok.Data) == 0 {
-                       // It was all whitespace, so ignore it.
-                       return true
-               }
-       case StartTagToken:
-               switch p.tok.DataAtom {
-               case a.Head:
-                       p.addElement()
-                       p.head = p.top()
-                       p.im = inHeadIM
-                       return true
-               case a.Html:
-                       return inBodyIM(p)
-               }
-       case EndTagToken:
-               switch p.tok.DataAtom {
-               case a.Head, a.Body, a.Html, a.Br:
-                       p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
-                       return false
-               default:
-                       // Ignore the token.
-                       return true
-               }
-       case CommentToken:
-               p.addChild(&Node{
-                       Type: CommentNode,
-                       Data: p.tok.Data,
-               })
-               return true
-       case DoctypeToken:
-               // Ignore the token.
-               return true
-       }
-
-       p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
-       return false
-}
-
-// Section 12.2.6.4.4.
-func inHeadIM(p *parser) bool {
-       switch p.tok.Type {
-       case TextToken:
-               s := strings.TrimLeft(p.tok.Data, whitespace)
-               if len(s) < len(p.tok.Data) {
-                       // Add the initial whitespace to the current node.
-                       p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
-                       if s == "" {
-                               return true
-                       }
-                       p.tok.Data = s
-               }
-       case StartTagToken:
-               switch p.tok.DataAtom {
-               case a.Html:
-                       return inBodyIM(p)
-               case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta:
-                       p.addElement()
-                       p.oe.pop()
-                       p.acknowledgeSelfClosingTag()
-                       return true
-               case a.Noscript:
-                       if p.scripting {
-                               p.parseGenericRawTextElement()
-                               return true
-                       }
-                       p.addElement()
-                       p.im = inHeadNoscriptIM
-                       // Don't let the tokenizer go into raw text mode when scripting is disabled.
-                       p.tokenizer.NextIsNotRawText()
-                       return true
-               case a.Script, a.Title:
-                       p.addElement()
-                       p.setOriginalIM()
-                       p.im = textIM
-                       return true
-               case a.Noframes, a.Style:
-                       p.parseGenericRawTextElement()
-                       return true
-               case a.Head:
-                       // Ignore the token.
-                       return true
-               case a.Template:
-                       // TODO: remove this divergence from the HTML5 spec.
-                       //
-                       // We don't handle all of the corner cases when mixing foreign
-                       // content (i.e. <math> or <svg>) with <template>. Without this
-                       // early return, we can get into an infinite loop, possibly because
-                       // of the "TODO... further divergence" a little below.
-                       //
-                       // As a workaround, if we are mixing foreign content and templates,
-                       // just ignore the rest of the HTML. Foreign content is rare and a
-                       // relatively old HTML feature. Templates are also rare and a
-                       // relatively new HTML feature. Their combination is very rare.
-                       for _, e := range p.oe {
-                               if e.Namespace != "" {
-                                       p.im = ignoreTheRemainingTokens
-                                       return true
-                               }
-                       }
-
-                       p.addElement()
-                       p.afe = append(p.afe, &scopeMarker)
-                       p.framesetOK = false
-                       p.im = inTemplateIM
-                       p.templateStack = append(p.templateStack, inTemplateIM)
-                       return true
-               }
-       case EndTagToken:
-               switch p.tok.DataAtom {
-               case a.Head:
-                       p.oe.pop()
-                       p.im = afterHeadIM
-                       return true
-               case a.Body, a.Html, a.Br:
-                       p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
-                       return false
-               case a.Template:
-                       if !p.oe.contains(a.Template) {
-                               return true
-                       }
-                       // TODO: remove this further divergence from the HTML5 spec.
-                       //
-                       // See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
-                       p.generateImpliedEndTags()
-                       for i := len(p.oe) - 1; i >= 0; i-- {
-                               if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template {
-                                       p.oe = p.oe[:i]
-                                       break
-                               }
-                       }
-                       p.clearActiveFormattingElements()
-                       p.templateStack.pop()
-                       p.resetInsertionMode()
-                       return true
-               default:
-                       // Ignore the token.
-                       return true
-               }
-       case CommentToken:
-               p.addChild(&Node{
-                       Type: CommentNode,
-                       Data: p.tok.Data,
-               })
-               return true
-       case DoctypeToken:
-               // Ignore the token.
-               return true
-       }
-
-       p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
-       return false
-}
-
-// Section 12.2.6.4.5.
-func inHeadNoscriptIM(p *parser) bool {
-       switch p.tok.Type {
-       case DoctypeToken:
-               // Ignore the token.
-               return true
-       case StartTagToken:
-               switch p.tok.DataAtom {
-               case a.Html:
-                       return inBodyIM(p)
-               case a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Style:
-                       return inHeadIM(p)
-               case a.Head:
-                       // Ignore the token.
-                       return true
-               case a.Noscript:
-                       // Don't let the tokenizer go into raw text mode even when a <noscript>
-                       // tag is in "in head noscript" insertion mode.
-                       p.tokenizer.NextIsNotRawText()
-                       // Ignore the token.
-                       return true
-               }
-       case EndTagToken:
-               switch p.tok.DataAtom {
-               case a.Noscript, a.Br:
-               default:
-                       // Ignore the token.
-                       return true
-               }
-       case TextToken:
-               s := strings.TrimLeft(p.tok.Data, whitespace)
-               if len(s) == 0 {
-                       // It was all whitespace.
-                       return inHeadIM(p)
-               }
-       case CommentToken:
-               return inHeadIM(p)
-       }
-       p.oe.pop()
-       if p.top().DataAtom != a.Head {
-               panic("html: the new current node will be a head element.")
-       }
-       p.im = inHeadIM
-       if p.tok.DataAtom == a.Noscript {
-               return true
-       }
-       return false
-}
-
-// Section 12.2.6.4.6.
-func afterHeadIM(p *parser) bool {
-       switch p.tok.Type {
-       case TextToken:
-               s := strings.TrimLeft(p.tok.Data, whitespace)
-               if len(s) < len(p.tok.Data) {
-                       // Add the initial whitespace to the current node.
-                       p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
-                       if s == "" {
-                               return true
-                       }
-                       p.tok.Data = s
-               }
-       case StartTagToken:
-               switch p.tok.DataAtom {
-               case a.Html:
-                       return inBodyIM(p)
-               case a.Body:
-                       p.addElement()
-                       p.framesetOK = false
-                       p.im = inBodyIM
-                       return true
-               case a.Frameset:
-                       p.addElement()
-                       p.im = inFramesetIM
-                       return true
-               case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
-                       p.oe = append(p.oe, p.head)
-                       defer p.oe.remove(p.head)
-                       return inHeadIM(p)
-               case a.Head:
-                       // Ignore the token.
-                       return true
-               }
-       case EndTagToken:
-               switch p.tok.DataAtom {
-               case a.Body, a.Html, a.Br:
-                       // Drop down to creating an implied <body> tag.
-               case a.Template:
-                       return inHeadIM(p)
-               default:
-                       // Ignore the token.
-                       return true
-               }
-       case CommentToken:
-               p.addChild(&Node{
-                       Type: CommentNode,
-                       Data: p.tok.Data,
-               })
-               return true
-       case DoctypeToken:
-               // Ignore the token.
-               return true
-       }
-
-       p.parseImpliedToken(StartTagToken, a.Body, a.Body.String())
-       p.framesetOK = true
-       return false
-}
-
-// copyAttributes copies attributes of src not found on dst to dst.
-func copyAttributes(dst *Node, src Token) {
-       if len(src.Attr) == 0 {
-               return
-       }
-       attr := map[string]string{}
-       for _, t := range dst.Attr {
-               attr[t.Key] = t.Val
-       }
-       for _, t := range src.Attr {
-               if _, ok := attr[t.Key]; !ok {
-                       dst.Attr = append(dst.Attr, t)
-                       attr[t.Key] = t.Val
-               }
-       }
-}
-
-// Section 12.2.6.4.7.
-func inBodyIM(p *parser) bool {
-       switch p.tok.Type {
-       case TextToken:
-               d := p.tok.Data
-               switch n := p.oe.top(); n.DataAtom {
-               case a.Pre, a.Listing:
-                       if n.FirstChild == nil {
-                               // Ignore a newline at the start of a <pre> block.
-                               if d != "" && d[0] == '\r' {
-                                       d = d[1:]
-                               }
-                               if d != "" && d[0] == '\n' {
-                                       d = d[1:]
-                               }
-                       }
-               }
-               d = strings.Replace(d, "\x00", "", -1)
-               if d == "" {
-                       return true
-               }
-               p.reconstructActiveFormattingElements()
-               p.addText(d)
-               if p.framesetOK && strings.TrimLeft(d, whitespace) != "" {
-                       // There were non-whitespace characters inserted.
-                       p.framesetOK = false
-               }
-       case StartTagToken:
-               switch p.tok.DataAtom {
-               case a.Html:
-                       if p.oe.contains(a.Template) {
-                               return true
-                       }
-                       copyAttributes(p.oe[0], p.tok)
-               case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
-                       return inHeadIM(p)
-               case a.Body:
-                       if p.oe.contains(a.Template) {
-                               return true
-                       }
-                       if len(p.oe) >= 2 {
-                               body := p.oe[1]
-                               if body.Type == ElementNode && body.DataAtom == a.Body {
-                                       p.framesetOK = false
-                                       copyAttributes(body, p.tok)
-                               }
-                       }
-               case a.Frameset:
-                       if !p.framesetOK || len(p.oe) < 2 || p.oe[1].DataAtom != a.Body {
-                               // Ignore the token.
-                               return true
-                       }
-                       body := p.oe[1]
-                       if body.Parent != nil {
-                               body.Parent.RemoveChild(body)
-                       }
-                       p.oe = p.oe[:1]
-                       p.addElement()
-                       p.im = inFramesetIM
-                       return true
-               case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dialog, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Main, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul:
-                       p.popUntil(buttonScope, a.P)
-                       p.addElement()
-               case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
-                       p.popUntil(buttonScope, a.P)
-                       switch n := p.top(); n.DataAtom {
-                       case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
-                               p.oe.pop()
-                       }
-                       p.addElement()
-               case a.Pre, a.Listing:
-                       p.popUntil(buttonScope, a.P)
-                       p.addElement()
-                       // The newline, if any, will be dealt with by the TextToken case.
-                       p.framesetOK = false
-               case a.Form:
-                       if p.form != nil && !p.oe.contains(a.Template) {
-                               // Ignore the token
-                               return true
-                       }
-                       p.popUntil(buttonScope, a.P)
-                       p.addElement()
-                       if !p.oe.contains(a.Template) {
-                               p.form = p.top()
-                       }
-               case a.Li:
-                       p.framesetOK = false
-                       for i := len(p.oe) - 1; i >= 0; i-- {
-                               node := p.oe[i]
-                               switch node.DataAtom {
-                               case a.Li:
-                                       p.oe = p.oe[:i]
-                               case a.Address, a.Div, a.P:
-                                       continue
-                               default:
-                                       if !isSpecialElement(node) {
-                                               continue
-                                       }
-                               }
-                               break
-                       }
-                       p.popUntil(buttonScope, a.P)
-                       p.addElement()
-               case a.Dd, a.Dt:
-                       p.framesetOK = false
-                       for i := len(p.oe) - 1; i >= 0; i-- {
-                               node := p.oe[i]
-                               switch node.DataAtom {
-                               case a.Dd, a.Dt:
-                                       p.oe = p.oe[:i]
-                               case a.Address, a.Div, a.P:
-                                       continue
-                               default:
-                                       if !isSpecialElement(node) {
-                                               continue
-                                       }
-                               }
-                               break
-                       }
-                       p.popUntil(buttonScope, a.P)
-                       p.addElement()
-               case a.Plaintext:
-                       p.popUntil(buttonScope, a.P)
-                       p.addElement()
-               case a.Button:
-                       p.popUntil(defaultScope, a.Button)
-                       p.reconstructActiveFormattingElements()
-                       p.addElement()
-                       p.framesetOK = false
-               case a.A:
-                       for i := len(p.afe) - 1; i >= 0 && p.afe[i].Type != scopeMarkerNode; i-- {
-                               if n := p.afe[i]; n.Type == ElementNode && n.DataAtom == a.A {
-                                       p.inBodyEndTagFormatting(a.A, "a")
-                                       p.oe.remove(n)
-                                       p.afe.remove(n)
-                                       break
-                               }
-                       }
-                       p.reconstructActiveFormattingElements()
-                       p.addFormattingElement()
-               case a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
-                       p.reconstructActiveFormattingElements()
-                       p.addFormattingElement()
-               case a.Nobr:
-                       p.reconstructActiveFormattingElements()
-                       if p.elementInScope(defaultScope, a.Nobr) {
-                               p.inBodyEndTagFormatting(a.Nobr, "nobr")
-                               p.reconstructActiveFormattingElements()
-                       }
-                       p.addFormattingElement()
-               case a.Applet, a.Marquee, a.Object:
-                       p.reconstructActiveFormattingElements()
-                       p.addElement()
-                       p.afe = append(p.afe, &scopeMarker)
-                       p.framesetOK = false
-               case a.Table:
-                       if !p.quirks {
-                               p.popUntil(buttonScope, a.P)
-                       }
-                       p.addElement()
-                       p.framesetOK = false
-                       p.im = inTableIM
-                       return true
-               case a.Area, a.Br, a.Embed, a.Img, a.Input, a.Keygen, a.Wbr:
-                       p.reconstructActiveFormattingElements()
-                       p.addElement()
-                       p.oe.pop()
-                       p.acknowledgeSelfClosingTag()
-                       if p.tok.DataAtom == a.Input {
-                               for _, t := range p.tok.Attr {
-                                       if t.Key == "type" {
-                                               if strings.ToLower(t.Val) == "hidden" {
-                                                       // Skip setting framesetOK = false
-                                                       return true
-                                               }
-                                       }
-                               }
-                       }
-                       p.framesetOK = false
-               case a.Param, a.Source, a.Track:
-                       p.addElement()
-                       p.oe.pop()
-                       p.acknowledgeSelfClosingTag()
-               case a.Hr:
-                       p.popUntil(buttonScope, a.P)
-                       p.addElement()
-                       p.oe.pop()
-                       p.acknowledgeSelfClosingTag()
-                       p.framesetOK = false
-               case a.Image:
-                       p.tok.DataAtom = a.Img
-                       p.tok.Data = a.Img.String()
-                       return false
-               case a.Textarea:
-                       p.addElement()
-                       p.setOriginalIM()
-                       p.framesetOK = false
-                       p.im = textIM
-               case a.Xmp:
-                       p.popUntil(buttonScope, a.P)
-                       p.reconstructActiveFormattingElements()
-                       p.framesetOK = false
-                       p.parseGenericRawTextElement()
-               case a.Iframe:
-                       p.framesetOK = false
-                       p.parseGenericRawTextElement()
-               case a.Noembed:
-                       p.parseGenericRawTextElement()
-               case a.Noscript:
-                       if p.scripting {
-                               p.parseGenericRawTextElement()
-                               return true
-                       }
-                       p.reconstructActiveFormattingElements()
-                       p.addElement()
-                       // Don't let the tokenizer go into raw text mode when scripting is disabled.
-                       p.tokenizer.NextIsNotRawText()
-               case a.Select:
-                       p.reconstructActiveFormattingElements()
-                       p.addElement()
-                       p.framesetOK = false
-                       p.im = inSelectIM
-                       return true
-               case a.Optgroup, a.Option:
-                       if p.top().DataAtom == a.Option {
-                               p.oe.pop()
-                       }
-                       p.reconstructActiveFormattingElements()
-                       p.addElement()
-               case a.Rb, a.Rtc:
-                       if p.elementInScope(defaultScope, a.Ruby) {
-                               p.generateImpliedEndTags()
-                       }
-                       p.addElement()
-               case a.Rp, a.Rt:
-                       if p.elementInScope(defaultScope, a.Ruby) {
-                               p.generateImpliedEndTags("rtc")
-                       }
-                       p.addElement()
-               case a.Math, a.Svg:
-                       p.reconstructActiveFormattingElements()
-                       if p.tok.DataAtom == a.Math {
-                               adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
-                       } else {
-                               adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
-                       }
-                       adjustForeignAttributes(p.tok.Attr)
-                       p.addElement()
-                       p.top().Namespace = p.tok.Data
-                       if p.hasSelfClosingToken {
-                               p.oe.pop()
-                               p.acknowledgeSelfClosingTag()
-                       }
-                       return true
-               case a.Caption, a.Col, a.Colgroup, a.Frame, a.Head, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
-                       // Ignore the token.
-               default:
-                       p.reconstructActiveFormattingElements()
-                       p.addElement()
-               }
-       case EndTagToken:
-               switch p.tok.DataAtom {
-               case a.Body:
-                       if p.elementInScope(defaultScope, a.Body) {
-                               p.im = afterBodyIM
-                       }
-               case a.Html:
-                       if p.elementInScope(defaultScope, a.Body) {
-                               p.parseImpliedToken(EndTagToken, a.Body, a.Body.String())
-                               return false
-                       }
-                       return true
-               case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dialog, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Main, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul:
-                       p.popUntil(defaultScope, p.tok.DataAtom)
-               case a.Form:
-                       if p.oe.contains(a.Template) {
-                               i := p.indexOfElementInScope(defaultScope, a.Form)
-                               if i == -1 {
-                                       // Ignore the token.
-                                       return true
-                               }
-                               p.generateImpliedEndTags()
-                               if p.oe[i].DataAtom != a.Form {
-                                       // Ignore the token.
-                                       return true
-                               }
-                               p.popUntil(defaultScope, a.Form)
-                       } else {
-                               node := p.form
-                               p.form = nil
-                               i := p.indexOfElementInScope(defaultScope, a.Form)
-                               if node == nil || i == -1 || p.oe[i] != node {
-                                       // Ignore the token.
-                                       return true
-                               }
-                               p.generateImpliedEndTags()
-                               p.oe.remove(node)
-                       }
-               case a.P:
-                       if !p.elementInScope(buttonScope, a.P) {
-                               p.parseImpliedToken(StartTagToken, a.P, a.P.String())
-                       }
-                       p.popUntil(buttonScope, a.P)
-               case a.Li:
-                       p.popUntil(listItemScope, a.Li)
-               case a.Dd, a.Dt:
-                       p.popUntil(defaultScope, p.tok.DataAtom)
-               case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
-                       p.popUntil(defaultScope, a.H1, a.H2, a.H3, a.H4, a.H5, a.H6)
-               case a.A, a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.Nobr, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
-                       p.inBodyEndTagFormatting(p.tok.DataAtom, p.tok.Data)
-               case a.Applet, a.Marquee, a.Object:
-                       if p.popUntil(defaultScope, p.tok.DataAtom) {
-                               p.clearActiveFormattingElements()
-                       }
-               case a.Br:
-                       p.tok.Type = StartTagToken
-                       return false
-               case a.Template:
-                       return inHeadIM(p)
-               default:
-                       p.inBodyEndTagOther(p.tok.DataAtom, p.tok.Data)
-               }
-       case CommentToken:
-               p.addChild(&Node{
-                       Type: CommentNode,
-                       Data: p.tok.Data,
-               })
-       case ErrorToken:
-               // TODO: remove this divergence from the HTML5 spec.
-               if len(p.templateStack) > 0 {
-                       p.im = inTemplateIM
-                       return false
-               }
-               for _, e := range p.oe {
-                       switch e.DataAtom {
-                       case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc, a.Tbody, a.Td, a.Tfoot, a.Th,
-                               a.Thead, a.Tr, a.Body, a.Html:
-                       default:
-                               return true
-                       }
-               }
-       }
-
-       return true
-}
-
-func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) {
-       // This is the "adoption agency" algorithm, described at
-       // https://html.spec.whatwg.org/multipage/syntax.html#adoptionAgency
-
-       // TODO: this is a fairly literal line-by-line translation of that algorithm.
-       // Once the code successfully parses the comprehensive test suite, we should
-       // refactor this code to be more idiomatic.
-
-       // Steps 1-2
-       if current := p.oe.top(); current.Data == tagName && p.afe.index(current) == -1 {
-               p.oe.pop()
-               return
-       }
-
-       // Steps 3-5. The outer loop.
-       for i := 0; i < 8; i++ {
-               // Step 6. Find the formatting element.
-               var formattingElement *Node
-               for j := len(p.afe) - 1; j >= 0; j-- {
-                       if p.afe[j].Type == scopeMarkerNode {
-                               break
-                       }
-                       if p.afe[j].DataAtom == tagAtom {
-                               formattingElement = p.afe[j]
-                               break
-                       }
-               }
-               if formattingElement == nil {
-                       p.inBodyEndTagOther(tagAtom, tagName)
-                       return
-               }
-
-               // Step 7. Ignore the tag if formatting element is not in the stack of open elements.
-               feIndex := p.oe.index(formattingElement)
-               if feIndex == -1 {
-                       p.afe.remove(formattingElement)
-                       return
-               }
-               // Step 8. Ignore the tag if formatting element is not in the scope.
-               if !p.elementInScope(defaultScope, tagAtom) {
-                       // Ignore the tag.
-                       return
-               }
-
-               // Step 9. This step is omitted because it's just a parse error but no need to return.
-
-               // Steps 10-11. Find the furthest block.
-               var furthestBlock *Node
-               for _, e := range p.oe[feIndex:] {
-                       if isSpecialElement(e) {
-                               furthestBlock = e
-                               break
-                       }
-               }
-               if furthestBlock == nil {
-                       e := p.oe.pop()
-                       for e != formattingElement {
-                               e = p.oe.pop()
-                       }
-                       p.afe.remove(e)
-                       return
-               }
-
-               // Steps 12-13. Find the common ancestor and bookmark node.
-               commonAncestor := p.oe[feIndex-1]
-               bookmark := p.afe.index(formattingElement)
-
-               // Step 14. The inner loop. Find the lastNode to reparent.
-               lastNode := furthestBlock
-               node := furthestBlock
-               x := p.oe.index(node)
-               // Step 14.1.
-               j := 0
-               for {
-                       // Step 14.2.
-                       j++
-                       // Step. 14.3.
-                       x--
-                       node = p.oe[x]
-                       // Step 14.4. Go to the next step if node is formatting element.
-                       if node == formattingElement {
-                               break
-                       }
-                       // Step 14.5. Remove node from the list of active formatting elements if
-                       // inner loop counter is greater than three and node is in the list of
-                       // active formatting elements.
-                       if ni := p.afe.index(node); j > 3 && ni > -1 {
-                               p.afe.remove(node)
-                               // If any element of the list of active formatting elements is removed,
-                               // we need to take care whether bookmark should be decremented or not.
-                               // This is because the value of bookmark may exceed the size of the
-                               // list by removing elements from the list.
-                               if ni <= bookmark {
-                                       bookmark--
-                               }
-                               continue
-                       }
-                       // Step 14.6. Continue the next inner loop if node is not in the list of
-                       // active formatting elements.
-                       if p.afe.index(node) == -1 {
-                               p.oe.remove(node)
-                               continue
-                       }
-                       // Step 14.7.
-                       clone := node.clone()
-                       p.afe[p.afe.index(node)] = clone
-                       p.oe[p.oe.index(node)] = clone
-                       node = clone
-                       // Step 14.8.
-                       if lastNode == furthestBlock {
-                               bookmark = p.afe.index(node) + 1
-                       }
-                       // Step 14.9.
-                       if lastNode.Parent != nil {
-                               lastNode.Parent.RemoveChild(lastNode)
-                       }
-                       node.AppendChild(lastNode)
-                       // Step 14.10.
-                       lastNode = node
-               }
-
-               // Step 15. Reparent lastNode to the common ancestor,
-               // or for misnested table nodes, to the foster parent.
-               if lastNode.Parent != nil {
-                       lastNode.Parent.RemoveChild(lastNode)
-               }
-               switch commonAncestor.DataAtom {
-               case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
-                       p.fosterParent(lastNode)
-               default:
-                       commonAncestor.AppendChild(lastNode)
-               }
-
-               // Steps 16-18. Reparent nodes from the furthest block's children
-               // to a clone of the formatting element.
-               clone := formattingElement.clone()
-               reparentChildren(clone, furthestBlock)
-               furthestBlock.AppendChild(clone)
-
-               // Step 19. Fix up the list of active formatting elements.
-               if oldLoc := p.afe.index(formattingElement); oldLoc != -1 && oldLoc < bookmark {
-                       // Move the bookmark with the rest of the list.
-                       bookmark--
-               }
-               p.afe.remove(formattingElement)
-               p.afe.insert(bookmark, clone)
-
-               // Step 20. Fix up the stack of open elements.
-               p.oe.remove(formattingElement)
-               p.oe.insert(p.oe.index(furthestBlock)+1, clone)
-       }
-}
-
-// inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM.
-// "Any other end tag" handling from 12.2.6.5 The rules for parsing tokens in foreign content
-// https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inforeign
-func (p *parser) inBodyEndTagOther(tagAtom a.Atom, tagName string) {
-       for i := len(p.oe) - 1; i >= 0; i-- {
-               // Two element nodes have the same tag if they have the same Data (a
-               // string-typed field). As an optimization, for common HTML tags, each
-               // Data string is assigned a unique, non-zero DataAtom (a uint32-typed
-               // field), since integer comparison is faster than string comparison.
-               // Uncommon (custom) tags get a zero DataAtom.
-               //
-               // The if condition here is equivalent to (p.oe[i].Data == tagName).
-               if (p.oe[i].DataAtom == tagAtom) &&
-                       ((tagAtom != 0) || (p.oe[i].Data == tagName)) {
-                       p.oe = p.oe[:i]
-                       break
-               }
-               if isSpecialElement(p.oe[i]) {
-                       break
-               }
-       }
-}
-
-// Section 12.2.6.4.8.
-func textIM(p *parser) bool {
-       switch p.tok.Type {
-       case ErrorToken:
-               p.oe.pop()
-       case TextToken:
-               d := p.tok.Data
-               if n := p.oe.top(); n.DataAtom == a.Textarea && n.FirstChild == nil {
-                       // Ignore a newline at the start of a <textarea> block.
-                       if d != "" && d[0] == '\r' {
-                               d = d[1:]
-                       }
-                       if d != "" && d[0] == '\n' {
-                               d = d[1:]
-                       }
-               }
-               if d == "" {
-                       return true
-               }
-               p.addText(d)
-               return true
-       case EndTagToken:
-               p.oe.pop()
-       }
-       p.im = p.originalIM
-       p.originalIM = nil
-       return p.tok.Type == EndTagToken
-}
-
-// Section 12.2.6.4.9.
-func inTableIM(p *parser) bool {
-       switch p.tok.Type {
-       case TextToken:
-               p.tok.Data = strings.Replace(p.tok.Data, "\x00", "", -1)
-               switch p.oe.top().DataAtom {
-               case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
-                       if strings.Trim(p.tok.Data, whitespace) == "" {
-                               p.addText(p.tok.Data)
-                               return true
-                       }
-               }
-       case StartTagToken:
-               switch p.tok.DataAtom {
-               case a.Caption:
-                       p.clearStackToContext(tableScope)
-                       p.afe = append(p.afe, &scopeMarker)
-                       p.addElement()
-                       p.im = inCaptionIM
-                       return true
-               case a.Colgroup:
-                       p.clearStackToContext(tableScope)
-                       p.addElement()
-                       p.im = inColumnGroupIM
-                       return true
-               case a.Col:
-                       p.parseImpliedToken(StartTagToken, a.Colgroup, a.Colgroup.String())
-                       return false
-               case a.Tbody, a.Tfoot, a.Thead:
-                       p.clearStackToContext(tableScope)
-                       p.addElement()
-                       p.im = inTableBodyIM
-                       return true
-               case a.Td, a.Th, a.Tr:
-                       p.parseImpliedToken(StartTagToken, a.Tbody, a.Tbody.String())
-                       return false
-               case a.Table:
-                       if p.popUntil(tableScope, a.Table) {
-                               p.resetInsertionMode()
-                               return false
-                       }
-                       // Ignore the token.
-                       return true
-               case a.Style, a.Script, a.Template:
-                       return inHeadIM(p)
-               case a.Input:
-                       for _, t := range p.tok.Attr {
-                               if t.Key == "type" && strings.ToLower(t.Val) == "hidden" {
-                                       p.addElement()
-                                       p.oe.pop()
-                                       return true
-                               }
-                       }
-                       // Otherwise drop down to the default action.
-               case a.Form:
-                       if p.oe.contains(a.Template) || p.form != nil {
-                               // Ignore the token.
-                               return true
-                       }
-                       p.addElement()
-                       p.form = p.oe.pop()
-               case a.Select:
-                       p.reconstructActiveFormattingElements()
-                       switch p.top().DataAtom {
-                       case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
-                               p.fosterParenting = true
-                       }
-                       p.addElement()
-                       p.fosterParenting = false
-                       p.framesetOK = false
-                       p.im = inSelectInTableIM
-                       return true
-               }
-       case EndTagToken:
-               switch p.tok.DataAtom {
-               case a.Table:
-                       if p.popUntil(tableScope, a.Table) {
-                               p.resetInsertionMode()
-                               return true
-                       }
-                       // Ignore the token.
-                       return true
-               case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
-                       // Ignore the token.
-                       return true
-               case a.Template:
-                       return inHeadIM(p)
-               }
-       case CommentToken:
-               p.addChild(&Node{
-                       Type: CommentNode,
-                       Data: p.tok.Data,
-               })
-               return true
-       case DoctypeToken:
-               // Ignore the token.
-               return true
-       case ErrorToken:
-               return inBodyIM(p)
-       }
-
-       p.fosterParenting = true
-       defer func() { p.fosterParenting = false }()
-
-       return inBodyIM(p)
-}
-
-// Section 12.2.6.4.11.
-func inCaptionIM(p *parser) bool {
-       switch p.tok.Type {
-       case StartTagToken:
-               switch p.tok.DataAtom {
-               case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Thead, a.Tr:
-                       if !p.popUntil(tableScope, a.Caption) {
-                               // Ignore the token.
-                               return true
-                       }
-                       p.clearActiveFormattingElements()
-                       p.im = inTableIM
-                       return false
-               case a.Select:
-                       p.reconstructActiveFormattingElements()
-                       p.addElement()
-                       p.framesetOK = false
-                       p.im = inSelectInTableIM
-                       return true
-               }
-       case EndTagToken:
-               switch p.tok.DataAtom {
-               case a.Caption:
-                       if p.popUntil(tableScope, a.Caption) {
-                               p.clearActiveFormattingElements()
-                               p.im = inTableIM
-                       }
-                       return true
-               case a.Table:
-                       if !p.popUntil(tableScope, a.Caption) {
-                               // Ignore the token.
-                               return true
-                       }
-                       p.clearActiveFormattingElements()
-                       p.im = inTableIM
-                       return false
-               case a.Body, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
-                       // Ignore the token.
-                       return true
-               }
-       }
-       return inBodyIM(p)
-}
-
-// Section 12.2.6.4.12.
-func inColumnGroupIM(p *parser) bool {
-       switch p.tok.Type {
-       case TextToken:
-               s := strings.TrimLeft(p.tok.Data, whitespace)
-               if len(s) < len(p.tok.Data) {
-                       // Add the initial whitespace to the current node.
-                       p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
-                       if s == "" {
-                               return true
-                       }
-                       p.tok.Data = s
-               }
-       case CommentToken:
-               p.addChild(&Node{
-                       Type: CommentNode,
-                       Data: p.tok.Data,
-               })
-               return true
-       case DoctypeToken:
-               // Ignore the token.
-               return true
-       case StartTagToken:
-               switch p.tok.DataAtom {
-               case a.Html:
-                       return inBodyIM(p)
-               case a.Col:
-                       p.addElement()
-                       p.oe.pop()
-                       p.acknowledgeSelfClosingTag()
-                       return true
-               case a.Template:
-                       return inHeadIM(p)
-               }
-       case EndTagToken:
-               switch p.tok.DataAtom {
-               case a.Colgroup:
-                       if p.oe.top().DataAtom == a.Colgroup {
-                               p.oe.pop()
-                               p.im = inTableIM
-                       }
-                       return true
-               case a.Col:
-                       // Ignore the token.
-                       return true
-               case a.Template:
-                       return inHeadIM(p)
-               }
-       case ErrorToken:
-               return inBodyIM(p)
-       }
-       if p.oe.top().DataAtom != a.Colgroup {
-               return true
-       }
-       p.oe.pop()
-       p.im = inTableIM
-       return false
-}
-
-// Section 12.2.6.4.13.
-func inTableBodyIM(p *parser) bool {
-       switch p.tok.Type {
-       case StartTagToken:
-               switch p.tok.DataAtom {
-               case a.Tr:
-                       p.clearStackToContext(tableBodyScope)
-                       p.addElement()
-                       p.im = inRowIM
-                       return true
-               case a.Td, a.Th:
-                       p.parseImpliedToken(StartTagToken, a.Tr, a.Tr.String())
-                       return false
-               case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
-                       if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
-                               p.im = inTableIM
-                               return false
-                       }
-                       // Ignore the token.
-                       return true
-               }
-       case EndTagToken:
-               switch p.tok.DataAtom {
-               case a.Tbody, a.Tfoot, a.Thead:
-                       if p.elementInScope(tableScope, p.tok.DataAtom) {
-                               p.clearStackToContext(tableBodyScope)
-                               p.oe.pop()
-                               p.im = inTableIM
-                       }
-                       return true
-               case a.Table:
-                       if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
-                               p.im = inTableIM
-                               return false
-                       }
-                       // Ignore the token.
-                       return true
-               case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th, a.Tr:
-                       // Ignore the token.
-                       return true
-               }
-       case CommentToken:
-               p.addChild(&Node{
-                       Type: CommentNode,
-                       Data: p.tok.Data,
-               })
-               return true
-       }
-
-       return inTableIM(p)
-}
-
-// Section 12.2.6.4.14.
-func inRowIM(p *parser) bool {
-       switch p.tok.Type {
-       case StartTagToken:
-               switch p.tok.DataAtom {
-               case a.Td, a.Th:
-                       p.clearStackToContext(tableRowScope)
-                       p.addElement()
-                       p.afe = append(p.afe, &scopeMarker)
-                       p.im = inCellIM
-                       return true
-               case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead, a.Tr:
-                       if p.popUntil(tableScope, a.Tr) {
-                               p.im = inTableBodyIM
-                               return false
-                       }
-                       // Ignore the token.
-                       return true
-               }
-       case EndTagToken:
-               switch p.tok.DataAtom {
-               case a.Tr:
-                       if p.popUntil(tableScope, a.Tr) {
-                               p.im = inTableBodyIM
-                               return true
-                       }
-                       // Ignore the token.
-                       return true
-               case a.Table:
-                       if p.popUntil(tableScope, a.Tr) {
-                               p.im = inTableBodyIM
-                               return false
-                       }
-                       // Ignore the token.
-                       return true
-               case a.Tbody, a.Tfoot, a.Thead:
-                       if p.elementInScope(tableScope, p.tok.DataAtom) {
-                               p.parseImpliedToken(EndTagToken, a.Tr, a.Tr.String())
-                               return false
-                       }
-                       // Ignore the token.
-                       return true
-               case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th:
-                       // Ignore the token.
-                       return true
-               }
-       }
-
-       return inTableIM(p)
-}
-
-// Section 12.2.6.4.15.
-func inCellIM(p *parser) bool {
-       switch p.tok.Type {
-       case StartTagToken:
-               switch p.tok.DataAtom {
-               case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
-                       if p.popUntil(tableScope, a.Td, a.Th) {
-                               // Close the cell and reprocess.
-                               p.clearActiveFormattingElements()
-                               p.im = inRowIM
-                               return false
-                       }
-                       // Ignore the token.
-                       return true
-               case a.Select:
-                       p.reconstructActiveFormattingElements()
-                       p.addElement()
-                       p.framesetOK = false
-                       p.im = inSelectInTableIM
-                       return true
-               }
-       case EndTagToken:
-               switch p.tok.DataAtom {
-               case a.Td, a.Th:
-                       if !p.popUntil(tableScope, p.tok.DataAtom) {
-                               // Ignore the token.
-                               return true
-                       }
-                       p.clearActiveFormattingElements()
-                       p.im = inRowIM
-                       return true
-               case a.Body, a.Caption, a.Col, a.Colgroup, a.Html:
-                       // Ignore the token.
-                       return true
-               case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
-                       if !p.elementInScope(tableScope, p.tok.DataAtom) {
-                               // Ignore the token.
-                               return true
-                       }
-                       // Close the cell and reprocess.
-                       if p.popUntil(tableScope, a.Td, a.Th) {
-                               p.clearActiveFormattingElements()
-                       }
-                       p.im = inRowIM
-                       return false
-               }
-       }
-       return inBodyIM(p)
-}
-
-// Section 12.2.6.4.16.
-func inSelectIM(p *parser) bool {
-       switch p.tok.Type {
-       case TextToken:
-               p.addText(strings.Replace(p.tok.Data, "\x00", "", -1))
-       case StartTagToken:
-               switch p.tok.DataAtom {
-               case a.Html:
-                       return inBodyIM(p)
-               case a.Option:
-                       if p.top().DataAtom == a.Option {
-                               p.oe.pop()
-                       }
-                       p.addElement()
-               case a.Optgroup:
-                       if p.top().DataAtom == a.Option {
-                               p.oe.pop()
-                       }
-                       if p.top().DataAtom == a.Optgroup {
-                               p.oe.pop()
-                       }
-                       p.addElement()
-               case a.Select:
-                       if !p.popUntil(selectScope, a.Select) {
-                               // Ignore the token.
-                               return true
-                       }
-                       p.resetInsertionMode()
-               case a.Input, a.Keygen, a.Textarea:
-                       if p.elementInScope(selectScope, a.Select) {
-                               p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
-                               return false
-                       }
-                       // In order to properly ignore <textarea>, we need to change the tokenizer mode.
-                       p.tokenizer.NextIsNotRawText()
-                       // Ignore the token.
-                       return true
-               case a.Script, a.Template:
-                       return inHeadIM(p)
-               case a.Iframe, a.Noembed, a.Noframes, a.Noscript, a.Plaintext, a.Style, a.Title, a.Xmp:
-                       // Don't let the tokenizer go into raw text mode when there are raw tags
-                       // to be ignored. These tags should be ignored from the tokenizer
-                       // properly.
-                       p.tokenizer.NextIsNotRawText()
-                       // Ignore the token.
-                       return true
-               }
-       case EndTagToken:
-               switch p.tok.DataAtom {
-               case a.Option:
-                       if p.top().DataAtom == a.Option {
-                               p.oe.pop()
-                       }
-               case a.Optgroup:
-                       i := len(p.oe) - 1
-                       if p.oe[i].DataAtom == a.Option {
-                               i--
-                       }
-                       if p.oe[i].DataAtom == a.Optgroup {
-                               p.oe = p.oe[:i]
-                       }
-               case a.Select:
-                       if !p.popUntil(selectScope, a.Select) {
-                               // Ignore the token.
-                               return true
-                       }
-                       p.resetInsertionMode()
-               case a.Template:
-                       return inHeadIM(p)
-               }
-       case CommentToken:
-               p.addChild(&Node{
-                       Type: CommentNode,
-                       Data: p.tok.Data,
-               })
-       case DoctypeToken:
-               // Ignore the token.
-               return true
-       case ErrorToken:
-               return inBodyIM(p)
-       }
-
-       return true
-}
-
-// Section 12.2.6.4.17.
-func inSelectInTableIM(p *parser) bool {
-       switch p.tok.Type {
-       case StartTagToken, EndTagToken:
-               switch p.tok.DataAtom {
-               case a.Caption, a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr, a.Td, a.Th:
-                       if p.tok.Type == EndTagToken && !p.elementInScope(tableScope, p.tok.DataAtom) {
-                               // Ignore the token.
-                               return true
-                       }
-                       // This is like p.popUntil(selectScope, a.Select), but it also
-                       // matches <math select>, not just <select>. Matching the MathML
-                       // tag is arguably incorrect (conceptually), but it mimics what
-                       // Chromium does.
-                       for i := len(p.oe) - 1; i >= 0; i-- {
-                               if n := p.oe[i]; n.DataAtom == a.Select {
-                                       p.oe = p.oe[:i]
-                                       break
-                               }
-                       }
-                       p.resetInsertionMode()
-                       return false
-               }
-       }
-       return inSelectIM(p)
-}
-
-// Section 12.2.6.4.18.
-func inTemplateIM(p *parser) bool {
-       switch p.tok.Type {
-       case TextToken, CommentToken, DoctypeToken:
-               return inBodyIM(p)
-       case StartTagToken:
-               switch p.tok.DataAtom {
-               case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
-                       return inHeadIM(p)
-               case a.Caption, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
-                       p.templateStack.pop()
-                       p.templateStack = append(p.templateStack, inTableIM)
-                       p.im = inTableIM
-                       return false
-               case a.Col:
-                       p.templateStack.pop()
-                       p.templateStack = append(p.templateStack, inColumnGroupIM)
-                       p.im = inColumnGroupIM
-                       return false
-               case a.Tr:
-                       p.templateStack.pop()
-                       p.templateStack = append(p.templateStack, inTableBodyIM)
-                       p.im = inTableBodyIM
-                       return false
-               case a.Td, a.Th:
-                       p.templateStack.pop()
-                       p.templateStack = append(p.templateStack, inRowIM)
-                       p.im = inRowIM
-                       return false
-               default:
-                       p.templateStack.pop()
-                       p.templateStack = append(p.templateStack, inBodyIM)
-                       p.im = inBodyIM
-                       return false
-               }
-       case EndTagToken:
-               switch p.tok.DataAtom {
-               case a.Template:
-                       return inHeadIM(p)
-               default:
-                       // Ignore the token.
-                       return true
-               }
-       case ErrorToken:
-               if !p.oe.contains(a.Template) {
-                       // Ignore the token.
-                       return true
-               }
-               // TODO: remove this divergence from the HTML5 spec.
-               //
-               // See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
-               p.generateImpliedEndTags()
-               for i := len(p.oe) - 1; i >= 0; i-- {
-                       if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template {
-                               p.oe = p.oe[:i]
-                               break
-                       }
-               }
-               p.clearActiveFormattingElements()
-               p.templateStack.pop()
-               p.resetInsertionMode()
-               return false
-       }
-       return false
-}
-
-// Section 12.2.6.4.19.
-func afterBodyIM(p *parser) bool {
-       switch p.tok.Type {
-       case ErrorToken:
-               // Stop parsing.
-               return true
-       case TextToken:
-               s := strings.TrimLeft(p.tok.Data, whitespace)
-               if len(s) == 0 {
-                       // It was all whitespace.
-                       return inBodyIM(p)
-               }
-       case StartTagToken:
-               if p.tok.DataAtom == a.Html {
-                       return inBodyIM(p)
-               }
-       case EndTagToken:
-               if p.tok.DataAtom == a.Html {
-                       if !p.fragment {
-                               p.im = afterAfterBodyIM
-                       }
-                       return true
-               }
-       case CommentToken:
-               // The comment is attached to the <html> element.
-               if len(p.oe) < 1 || p.oe[0].DataAtom != a.Html {
-                       panic("html: bad parser state: <html> element not found, in the after-body insertion mode")
-               }
-               p.oe[0].AppendChild(&Node{
-                       Type: CommentNode,
-                       Data: p.tok.Data,
-               })
-               return true
-       }
-       p.im = inBodyIM
-       return false
-}
-
-// Section 12.2.6.4.20.
-func inFramesetIM(p *parser) bool {
-       switch p.tok.Type {
-       case CommentToken:
-               p.addChild(&Node{
-                       Type: CommentNode,
-                       Data: p.tok.Data,
-               })
-       case TextToken:
-               // Ignore all text but whitespace.
-               s := strings.Map(func(c rune) rune {
-                       switch c {
-                       case ' ', '\t', '\n', '\f', '\r':
-                               return c
-                       }
-                       return -1
-               }, p.tok.Data)
-               if s != "" {
-                       p.addText(s)
-               }
-       case StartTagToken:
-               switch p.tok.DataAtom {
-               case a.Html:
-                       return inBodyIM(p)
-               case a.Frameset:
-                       p.addElement()
-               case a.Frame:
-                       p.addElement()
-                       p.oe.pop()
-                       p.acknowledgeSelfClosingTag()
-               case a.Noframes:
-                       return inHeadIM(p)
-               }
-       case EndTagToken:
-               switch p.tok.DataAtom {
-               case a.Frameset:
-                       if p.oe.top().DataAtom != a.Html {
-                               p.oe.pop()
-                               if p.oe.top().DataAtom != a.Frameset {
-                                       p.im = afterFramesetIM
-                                       return true
-                               }
-                       }
-               }
-       default:
-               // Ignore the token.
-       }
-       return true
-}
-
-// Section 12.2.6.4.21.
-func afterFramesetIM(p *parser) bool {
-       switch p.tok.Type {
-       case CommentToken:
-               p.addChild(&Node{
-                       Type: CommentNode,
-                       Data: p.tok.Data,
-               })
-       case TextToken:
-               // Ignore all text but whitespace.
-               s := strings.Map(func(c rune) rune {
-                       switch c {
-                       case ' ', '\t', '\n', '\f', '\r':
-                               return c
-                       }
-                       return -1
-               }, p.tok.Data)
-               if s != "" {
-                       p.addText(s)
-               }
-       case StartTagToken:
-               switch p.tok.DataAtom {
-               case a.Html:
-                       return inBodyIM(p)
-               case a.Noframes:
-                       return inHeadIM(p)
-               }
-       case EndTagToken:
-               switch p.tok.DataAtom {
-               case a.Html:
-                       p.im = afterAfterFramesetIM
-                       return true
-               }
-       default:
-               // Ignore the token.
-       }
-       return true
-}
-
-// Section 12.2.6.4.22.
-func afterAfterBodyIM(p *parser) bool {
-       switch p.tok.Type {
-       case ErrorToken:
-               // Stop parsing.
-               return true
-       case TextToken:
-               s := strings.TrimLeft(p.tok.Data, whitespace)
-               if len(s) == 0 {
-                       // It was all whitespace.
-                       return inBodyIM(p)
-               }
-       case StartTagToken:
-               if p.tok.DataAtom == a.Html {
-                       return inBodyIM(p)
-               }
-       case CommentToken:
-               p.doc.AppendChild(&Node{
-                       Type: CommentNode,
-                       Data: p.tok.Data,
-               })
-               return true
-       case DoctypeToken:
-               return inBodyIM(p)
-       }
-       p.im = inBodyIM
-       return false
-}
-
-// Section 12.2.6.4.23.
-func afterAfterFramesetIM(p *parser) bool {
-       switch p.tok.Type {
-       case CommentToken:
-               p.doc.AppendChild(&Node{
-                       Type: CommentNode,
-                       Data: p.tok.Data,
-               })
-       case TextToken:
-               // Ignore all text but whitespace.
-               s := strings.Map(func(c rune) rune {
-                       switch c {
-                       case ' ', '\t', '\n', '\f', '\r':
-                               return c
-                       }
-                       return -1
-               }, p.tok.Data)
-               if s != "" {
-                       p.tok.Data = s
-                       return inBodyIM(p)
-               }
-       case StartTagToken:
-               switch p.tok.DataAtom {
-               case a.Html:
-                       return inBodyIM(p)
-               case a.Noframes:
-                       return inHeadIM(p)
-               }
-       case DoctypeToken:
-               return inBodyIM(p)
-       default:
-               // Ignore the token.
-       }
-       return true
-}
-
-func ignoreTheRemainingTokens(p *parser) bool {
-       return true
-}
-
-const whitespaceOrNUL = whitespace + "\x00"
-
-// Section 12.2.6.5
-func parseForeignContent(p *parser) bool {
-       switch p.tok.Type {
-       case TextToken:
-               if p.framesetOK {
-                       p.framesetOK = strings.TrimLeft(p.tok.Data, whitespaceOrNUL) == ""
-               }
-               p.tok.Data = strings.Replace(p.tok.Data, "\x00", "\ufffd", -1)
-               p.addText(p.tok.Data)
-       case CommentToken:
-               p.addChild(&Node{
-                       Type: CommentNode,
-                       Data: p.tok.Data,
-               })
-       case StartTagToken:
-               if !p.fragment {
-                       b := breakout[p.tok.Data]
-                       if p.tok.DataAtom == a.Font {
-                       loop:
-                               for _, attr := range p.tok.Attr {
-                                       switch attr.Key {
-                                       case "color", "face", "size":
-                                               b = true
-                                               break loop
-                                       }
-                               }
-                       }
-                       if b {
-                               for i := len(p.oe) - 1; i >= 0; i-- {
-                                       n := p.oe[i]
-                                       if n.Namespace == "" || htmlIntegrationPoint(n) || mathMLTextIntegrationPoint(n) {
-                                               p.oe = p.oe[:i+1]
-                                               break
-                                       }
-                               }
-                               return false
-                       }
-               }
-               current := p.adjustedCurrentNode()
-               switch current.Namespace {
-               case "math":
-                       adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
-               case "svg":
-                       // Adjust SVG tag names. The tokenizer lower-cases tag names, but
-                       // SVG wants e.g. "foreignObject" with a capital second "O".
-                       if x := svgTagNameAdjustments[p.tok.Data]; x != "" {
-                               p.tok.DataAtom = a.Lookup([]byte(x))
-                               p.tok.Data = x
-                       }
-                       adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
-               default:
-                       panic("html: bad parser state: unexpected namespace")
-               }
-               adjustForeignAttributes(p.tok.Attr)
-               namespace := current.Namespace
-               p.addElement()
-               p.top().Namespace = namespace
-               if namespace != "" {
-                       // Don't let the tokenizer go into raw text mode in foreign content
-                       // (e.g. in an SVG <title> tag).
-                       p.tokenizer.NextIsNotRawText()
-               }
-               if p.hasSelfClosingToken {
-                       p.oe.pop()
-                       p.acknowledgeSelfClosingTag()
-               }
-       case EndTagToken:
-               for i := len(p.oe) - 1; i >= 0; i-- {
-                       if p.oe[i].Namespace == "" {
-                               return p.im(p)
-                       }
-                       if strings.EqualFold(p.oe[i].Data, p.tok.Data) {
-                               p.oe = p.oe[:i]
-                               break
-                       }
-               }
-               return true
-       default:
-               // Ignore the token.
-       }
-       return true
-}
-
-// Section 12.2.4.2.
-func (p *parser) adjustedCurrentNode() *Node {
-       if len(p.oe) == 1 && p.fragment && p.context != nil {
-               return p.context
-       }
-       return p.oe.top()
-}
-
-// Section 12.2.6.
-func (p *parser) inForeignContent() bool {
-       if len(p.oe) == 0 {
-               return false
-       }
-       n := p.adjustedCurrentNode()
-       if n.Namespace == "" {
-               return false
-       }
-       if mathMLTextIntegrationPoint(n) {
-               if p.tok.Type == StartTagToken && p.tok.DataAtom != a.Mglyph && p.tok.DataAtom != a.Malignmark {
-                       return false
-               }
-               if p.tok.Type == TextToken {
-                       return false
-               }
-       }
-       if n.Namespace == "math" && n.DataAtom == a.AnnotationXml && p.tok.Type == StartTagToken && p.tok.DataAtom == a.Svg {
-               return false
-       }
-       if htmlIntegrationPoint(n) && (p.tok.Type == StartTagToken || p.tok.Type == TextToken) {
-               return false
-       }
-       if p.tok.Type == ErrorToken {
-               return false
-       }
-       return true
-}
-
-// parseImpliedToken parses a token as though it had appeared in the parser's
-// input.
-func (p *parser) parseImpliedToken(t TokenType, dataAtom a.Atom, data string) {
-       realToken, selfClosing := p.tok, p.hasSelfClosingToken
-       p.tok = Token{
-               Type:     t,
-               DataAtom: dataAtom,
-               Data:     data,
-       }
-       p.hasSelfClosingToken = false
-       p.parseCurrentToken()
-       p.tok, p.hasSelfClosingToken = realToken, selfClosing
-}
-
-// parseCurrentToken runs the current token through the parsing routines
-// until it is consumed.
-func (p *parser) parseCurrentToken() {
-       if p.tok.Type == SelfClosingTagToken {
-               p.hasSelfClosingToken = true
-               p.tok.Type = StartTagToken
-       }
-
-       consumed := false
-       for !consumed {
-               if p.inForeignContent() {
-                       consumed = parseForeignContent(p)
-               } else {
-                       consumed = p.im(p)
-               }
-       }
-
-       if p.hasSelfClosingToken {
-               // This is a parse error, but ignore it.
-               p.hasSelfClosingToken = false
-       }
-}
-
-func (p *parser) parse() error {
-       // Iterate until EOF. Any other error will cause an early return.
-       var err error
-       for err != io.EOF {
-               // CDATA sections are allowed only in foreign content.
-               n := p.oe.top()
-               p.tokenizer.AllowCDATA(n != nil && n.Namespace != "")
-               // Read and parse the next token.
-               p.tokenizer.Next()
-               p.tok = p.tokenizer.Token()
-               if p.tok.Type == ErrorToken {
-                       err = p.tokenizer.Err()
-                       if err != nil && err != io.EOF {
-                               return err
-                       }
-               }
-               p.parseCurrentToken()
-       }
-       return nil
-}
-
-// Parse returns the parse tree for the HTML from the given Reader.
-//
-// It implements the HTML5 parsing algorithm
-// (https://html.spec.whatwg.org/multipage/syntax.html#tree-construction),
-// which is very complicated. The resultant tree can contain implicitly created
-// nodes that have no explicit <tag> listed in r's data, and nodes' parents can
-// differ from the nesting implied by a naive processing of start and end
-// <tag>s. Conversely, explicit <tag>s in r's data can be silently dropped,
-// with no corresponding node in the resulting tree.
-//
-// The input is assumed to be UTF-8 encoded.
-func Parse(r io.Reader) (*Node, error) {
-       return ParseWithOptions(r)
-}
-
-// ParseFragment parses a fragment of HTML and returns the nodes that were
-// found. If the fragment is the InnerHTML for an existing element, pass that
-// element in context.
-//
-// It has the same intricacies as Parse.
-func ParseFragment(r io.Reader, context *Node) ([]*Node, error) {
-       return ParseFragmentWithOptions(r, context)
-}
-
-// ParseOption configures a parser.
-type ParseOption func(p *parser)
-
-// ParseOptionEnableScripting configures the scripting flag.
-// https://html.spec.whatwg.org/multipage/webappapis.html#enabling-and-disabling-scripting
-//
-// By default, scripting is enabled.
-func ParseOptionEnableScripting(enable bool) ParseOption {
-       return func(p *parser) {
-               p.scripting = enable
-       }
-}
-
-// ParseWithOptions is like Parse, with options.
-func ParseWithOptions(r io.Reader, opts ...ParseOption) (*Node, error) {
-       p := &parser{
-               tokenizer: NewTokenizer(r),
-               doc: &Node{
-                       Type: DocumentNode,
-               },
-               scripting:  true,
-               framesetOK: true,
-               im:         initialIM,
-       }
-
-       for _, f := range opts {
-               f(p)
-       }
-
-       if err := p.parse(); err != nil {
-               return nil, err
-       }
-       return p.doc, nil
-}
-
-// ParseFragmentWithOptions is like ParseFragment, with options.
-func ParseFragmentWithOptions(r io.Reader, context *Node, opts ...ParseOption) ([]*Node, error) {
-       contextTag := ""
-       if context != nil {
-               if context.Type != ElementNode {
-                       return nil, errors.New("html: ParseFragment of non-element Node")
-               }
-               // The next check isn't just context.DataAtom.String() == context.Data because
-               // it is valid to pass an element whose tag isn't a known atom. For example,
-               // DataAtom == 0 and Data = "tagfromthefuture" is perfectly consistent.
-               if context.DataAtom != a.Lookup([]byte(context.Data)) {
-                       return nil, fmt.Errorf("html: inconsistent Node: DataAtom=%q, Data=%q", context.DataAtom, context.Data)
-               }
-               contextTag = context.DataAtom.String()
-       }
-       p := &parser{
-               doc: &Node{
-                       Type: DocumentNode,
-               },
-               scripting: true,
-               fragment:  true,
-               context:   context,
-       }
-       if context != nil && context.Namespace != "" {
-               p.tokenizer = NewTokenizer(r)
-       } else {
-               p.tokenizer = NewTokenizerFragment(r, contextTag)
-       }
-
-       for _, f := range opts {
-               f(p)
-       }
-
-       root := &Node{
-               Type:     ElementNode,
-               DataAtom: a.Html,
-               Data:     a.Html.String(),
-       }
-       p.doc.AppendChild(root)
-       p.oe = nodeStack{root}
-       if context != nil && context.DataAtom == a.Template {
-               p.templateStack = append(p.templateStack, inTemplateIM)
-       }
-       p.resetInsertionMode()
-
-       for n := context; n != nil; n = n.Parent {
-               if n.Type == ElementNode && n.DataAtom == a.Form {
-                       p.form = n
-                       break
-               }
-       }
-
-       if err := p.parse(); err != nil {
-               return nil, err
-       }
-
-       parent := p.doc
-       if context != nil {
-               parent = root
-       }
-
-       var result []*Node
-       for c := parent.FirstChild; c != nil; {
-               next := c.NextSibling
-               parent.RemoveChild(c)
-               result = append(result, c)
-               c = next
-       }
-       return result, nil
-}
diff --git a/html/parse_test.go b/html/parse_test.go
deleted file mode 100644 (file)
index 0eb2be1..0000000
+++ /dev/null
@@ -1,490 +0,0 @@
-// Copyright 2010 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package html
-
-import (
-       "bufio"
-       "bytes"
-       "errors"
-       "fmt"
-       "io"
-       "os"
-       "path/filepath"
-       "runtime"
-       "sort"
-       "strings"
-       "testing"
-
-       "git.earlybird.gay/today-engine/html/atom"
-)
-
-type testAttrs struct {
-       text, want, context string
-       scripting           bool
-}
-
-// readParseTest reads a single test case from r.
-func readParseTest(r *bufio.Reader) (*testAttrs, error) {
-       ta := &testAttrs{scripting: true}
-       line, err := r.ReadSlice('\n')
-       if err != nil {
-               return nil, err
-       }
-       var b []byte
-
-       // Read the HTML.
-       if string(line) != "#data\n" {
-               return nil, fmt.Errorf(`got %q want "#data\n"`, line)
-       }
-       for {
-               line, err = r.ReadSlice('\n')
-               if err != nil {
-                       return nil, err
-               }
-               if line[0] == '#' {
-                       break
-               }
-               b = append(b, line...)
-       }
-       ta.text = strings.TrimSuffix(string(b), "\n")
-       b = b[:0]
-
-       // Skip the error list.
-       if string(line) != "#errors\n" {
-               return nil, fmt.Errorf(`got %q want "#errors\n"`, line)
-       }
-       for {
-               line, err = r.ReadSlice('\n')
-               if err != nil {
-                       return nil, err
-               }
-               if line[0] == '#' {
-                       break
-               }
-       }
-
-       // Skip the new-errors list.
-       if string(line) == "#new-errors\n" {
-               for {
-                       line, err = r.ReadSlice('\n')
-                       if err != nil {
-                               return nil, err
-                       }
-                       if line[0] == '#' {
-                               break
-                       }
-               }
-       }
-
-       if ls := string(line); strings.HasPrefix(ls, "#script-") {
-               switch {
-               case strings.HasSuffix(ls, "-on\n"):
-                       ta.scripting = true
-               case strings.HasSuffix(ls, "-off\n"):
-                       ta.scripting = false
-               default:
-                       return nil, fmt.Errorf(`got %q, want "#script-on" or "#script-off"`, line)
-               }
-               for {
-                       line, err = r.ReadSlice('\n')
-                       if err != nil {
-                               return nil, err
-                       }
-                       if line[0] == '#' {
-                               break
-                       }
-               }
-       }
-
-       if string(line) == "#document-fragment\n" {
-               line, err = r.ReadSlice('\n')
-               if err != nil {
-                       return nil, err
-               }
-               ta.context = strings.TrimSpace(string(line))
-               line, err = r.ReadSlice('\n')
-               if err != nil {
-                       return nil, err
-               }
-       }
-
-       // Read the dump of what the parse tree should be.
-       if string(line) != "#document\n" {
-               return nil, fmt.Errorf(`got %q want "#document\n"`, line)
-       }
-       inQuote := false
-       for {
-               line, err = r.ReadSlice('\n')
-               if err != nil && err != io.EOF {
-                       return nil, err
-               }
-               trimmed := bytes.Trim(line, "| \n")
-               if len(trimmed) > 0 {
-                       if line[0] == '|' && trimmed[0] == '"' {
-                               inQuote = true
-                       }
-                       if trimmed[len(trimmed)-1] == '"' && !(line[0] == '|' && len(trimmed) == 1) {
-                               inQuote = false
-                       }
-               }
-               if len(line) == 0 || len(line) == 1 && line[0] == '\n' && !inQuote {
-                       break
-               }
-               b = append(b, line...)
-       }
-       ta.want = string(b)
-       return ta, nil
-}
-
-func dumpIndent(w io.Writer, level int) {
-       io.WriteString(w, "| ")
-       for i := 0; i < level; i++ {
-               io.WriteString(w, "  ")
-       }
-}
-
-type sortedAttributes []Attribute
-
-func (a sortedAttributes) Len() int {
-       return len(a)
-}
-
-func (a sortedAttributes) Less(i, j int) bool {
-       if a[i].Namespace != a[j].Namespace {
-               return a[i].Namespace < a[j].Namespace
-       }
-       return a[i].Key < a[j].Key
-}
-
-func (a sortedAttributes) Swap(i, j int) {
-       a[i], a[j] = a[j], a[i]
-}
-
-func dumpLevel(w io.Writer, n *Node, level int) error {
-       dumpIndent(w, level)
-       level++
-       switch n.Type {
-       case ErrorNode:
-               return errors.New("unexpected ErrorNode")
-       case DocumentNode:
-               return errors.New("unexpected DocumentNode")
-       case ElementNode:
-               if n.Namespace != "" {
-                       fmt.Fprintf(w, "<%s %s>", n.Namespace, n.Data)
-               } else {
-                       fmt.Fprintf(w, "<%s>", n.Data)
-               }
-               attr := sortedAttributes(n.Attr)
-               sort.Sort(attr)
-               for _, a := range attr {
-                       io.WriteString(w, "\n")
-                       dumpIndent(w, level)
-                       if a.Namespace != "" {
-                               fmt.Fprintf(w, `%s %s="%s"`, a.Namespace, a.Key, a.Val)
-                       } else {
-                               fmt.Fprintf(w, `%s="%s"`, a.Key, a.Val)
-                       }
-               }
-               if n.Namespace == "" && n.DataAtom == atom.Template {
-                       io.WriteString(w, "\n")
-                       dumpIndent(w, level)
-                       level++
-                       io.WriteString(w, "content")
-               }
-       case TextNode:
-               fmt.Fprintf(w, `"%s"`, n.Data)
-       case CommentNode:
-               fmt.Fprintf(w, "<!-- %s -->", n.Data)
-       case DoctypeNode:
-               fmt.Fprintf(w, "<!DOCTYPE %s", n.Data)
-               if n.Attr != nil {
-                       var p, s string
-                       for _, a := range n.Attr {
-                               switch a.Key {
-                               case "public":
-                                       p = a.Val
-                               case "system":
-                                       s = a.Val
-                               }
-                       }
-                       if p != "" || s != "" {
-                               fmt.Fprintf(w, ` "%s"`, p)
-                               fmt.Fprintf(w, ` "%s"`, s)
-                       }
-               }
-               io.WriteString(w, ">")
-       case scopeMarkerNode:
-               return errors.New("unexpected scopeMarkerNode")
-       default:
-               return errors.New("unknown node type")
-       }
-       io.WriteString(w, "\n")
-       for c := n.FirstChild; c != nil; c = c.NextSibling {
-               if err := dumpLevel(w, c, level); err != nil {
-                       return err
-               }
-       }
-       return nil
-}
-
-func dump(n *Node) (string, error) {
-       if n == nil || n.FirstChild == nil {
-               return "", nil
-       }
-       var b bytes.Buffer
-       for c := n.FirstChild; c != nil; c = c.NextSibling {
-               if err := dumpLevel(&b, c, 0); err != nil {
-                       return "", err
-               }
-       }
-       return b.String(), nil
-}
-
-var testDataDirs = []string{"testdata/webkit/", "testdata/go/"}
-
-func TestParser(t *testing.T) {
-       for _, testDataDir := range testDataDirs {
-               testFiles, err := filepath.Glob(testDataDir + "*.dat")
-               if err != nil {
-                       t.Fatal(err)
-               }
-               for _, tf := range testFiles {
-                       f, err := os.Open(tf)
-                       if err != nil {
-                               t.Fatal(err)
-                       }
-                       defer f.Close()
-                       r := bufio.NewReader(f)
-
-                       for i := 0; ; i++ {
-                               ta, err := readParseTest(r)
-                               if err == io.EOF {
-                                       break
-                               }
-                               if err != nil {
-                                       t.Fatal(err)
-                               }
-                               if parseTestBlacklist[ta.text] {
-                                       continue
-                               }
-
-                               err = testParseCase(ta.text, ta.want, ta.context, ParseOptionEnableScripting(ta.scripting))
-
-                               if err != nil {
-                                       t.Errorf("%s test #%d %q, %s", tf, i, ta.text, err)
-                               }
-                       }
-               }
-       }
-}
-
-// Issue 16318
-func TestParserWithoutScripting(t *testing.T) {
-       text := `<noscript><img src='https://golang.org/doc/gopher/frontpage.png' /></noscript><p><img src='https://golang.org/doc/gopher/doc.png' /></p>`
-       want := `| <html>
-|   <head>
-|     <noscript>
-|   <body>
-|     <img>
-|       src="https://golang.org/doc/gopher/frontpage.png"
-|     <p>
-|       <img>
-|         src="https://golang.org/doc/gopher/doc.png"
-`
-
-       if err := testParseCase(text, want, "", ParseOptionEnableScripting(false)); err != nil {
-               t.Errorf("test with scripting is disabled, %q, %s", text, err)
-       }
-}
-
-// testParseCase tests one test case from the test files. If the test does not
-// pass, it returns an error that explains the failure.
-// text is the HTML to be parsed, want is a dump of the correct parse tree,
-// and context is the name of the context node, if any.
-func testParseCase(text, want, context string, opts ...ParseOption) (err error) {
-       defer func() {
-               if x := recover(); x != nil {
-                       switch e := x.(type) {
-                       case error:
-                               err = e
-                       default:
-                               err = fmt.Errorf("%v", e)
-                       }
-               }
-       }()
-
-       var doc *Node
-       if context == "" {
-               doc, err = ParseWithOptions(strings.NewReader(text), opts...)
-               if err != nil {
-                       return err
-               }
-       } else {
-               namespace := ""
-               if i := strings.IndexByte(context, ' '); i >= 0 {
-                       namespace, context = context[:i], context[i+1:]
-               }
-               contextNode := &Node{
-                       Data:      context,
-                       DataAtom:  atom.Lookup([]byte(context)),
-                       Namespace: namespace,
-                       Type:      ElementNode,
-               }
-               nodes, err := ParseFragmentWithOptions(strings.NewReader(text), contextNode, opts...)
-               if err != nil {
-                       return err
-               }
-               doc = &Node{
-                       Type: DocumentNode,
-               }
-               for _, n := range nodes {
-                       doc.AppendChild(n)
-               }
-       }
-
-       if err := checkTreeConsistency(doc); err != nil {
-               return err
-       }
-
-       got, err := dump(doc)
-       if err != nil {
-               return err
-       }
-       // Compare the parsed tree to the #document section.
-       if got != want {
-               return fmt.Errorf("got vs want:\n----\n%s----\n%s----", got, want)
-       }
-
-       if renderTestBlacklist[text] || context != "" {
-               return nil
-       }
-
-       // Check that rendering and re-parsing results in an identical tree.
-       pr, pw := io.Pipe()
-       go func() {
-               pw.CloseWithError(Render(pw, doc))
-       }()
-       doc1, err := ParseWithOptions(pr, opts...)
-       if err != nil {
-               return err
-       }
-       got1, err := dump(doc1)
-       if err != nil {
-               return err
-       }
-       if got != got1 {
-               return fmt.Errorf("got vs got1:\n----\n%s----\n%s----", got, got1)
-       }
-
-       return nil
-}
-
-// Some test inputs are simply skipped - we would otherwise fail the test. We
-// blacklist such inputs from the parse test.
-var parseTestBlacklist = map[string]bool{
-       // See the a.Template TODO in inHeadIM.
-       `<math><template><mo><template>`:                                     true,
-       `<template><svg><foo><template><foreignObject><div></template><div>`: true,
-}
-
-// Some test input result in parse trees are not 'well-formed' despite
-// following the HTML5 recovery algorithms. Rendering and re-parsing such a
-// tree will not result in an exact clone of that tree. We blacklist such
-// inputs from the render test.
-var renderTestBlacklist = map[string]bool{
-       // The second <a> will be reparented to the first <table>'s parent. This
-       // results in an <a> whose parent is an <a>, which is not 'well-formed'.
-       `<a><table><td><a><table></table><a></tr><a></table><b>X</b>C<a>Y`: true,
-       // The same thing with a <p>:
-       `<p><table></p>`: true,
-       // More cases of <a> being reparented:
-       `<a href="blah">aba<table><a href="foo">br<tr><td></td></tr>x</table>aoe`: true,
-       `<a><table><a></table><p><a><div><a>`:                                     true,
-       `<a><table><td><a><table></table><a></tr><a></table><a>`:                  true,
-       `<template><a><table><a>`:                                                 true,
-       // A similar reparenting situation involving <nobr>:
-       `<!DOCTYPE html><body><b><nobr>1<table><nobr></b><i><nobr>2<nobr></i>3`: true,
-       // A <plaintext> element is reparented, putting it before a table.
-       // A <plaintext> element can't have anything after it in HTML.
-       `<table><plaintext><td>`:                                   true,
-       `<!doctype html><table><plaintext></plaintext>`:            true,
-       `<!doctype html><table><tbody><plaintext></plaintext>`:     true,
-       `<!doctype html><table><tbody><tr><plaintext></plaintext>`: true,
-       // A form inside a table inside a form doesn't work either.
-       `<!doctype html><form><table></form><form></table></form>`: true,
-       // A script that ends at EOF may escape its own closing tag when rendered.
-       `<!doctype html><script><!--<script `:          true,
-       `<!doctype html><script><!--<script <`:         true,
-       `<!doctype html><script><!--<script <a`:        true,
-       `<!doctype html><script><!--<script </`:        true,
-       `<!doctype html><script><!--<script </s`:       true,
-       `<!doctype html><script><!--<script </script`:  true,
-       `<!doctype html><script><!--<script </scripta`: true,
-       `<!doctype html><script><!--<script -`:         true,
-       `<!doctype html><script><!--<script -a`:        true,
-       `<!doctype html><script><!--<script -<`:        true,
-       `<!doctype html><script><!--<script --`:        true,
-       `<!doctype html><script><!--<script --a`:       true,
-       `<!doctype html><script><!--<script --<`:       true,
-       `<script><!--<script `:                         true,
-       `<script><!--<script <a`:                       true,
-       `<script><!--<script </script`:                 true,
-       `<script><!--<script </scripta`:                true,
-       `<script><!--<script -`:                        true,
-       `<script><!--<script -a`:                       true,
-       `<script><!--<script --`:                       true,
-       `<script><!--<script --a`:                      true,
-       `<script><!--<script <`:                        true,
-       `<script><!--<script </`:                       true,
-       `<script><!--<script </s`:                      true,
-       // Reconstructing the active formatting elements results in a <plaintext>
-       // element that contains an <a> element.
-       `<!doctype html><p><a><plaintext>b`:                       true,
-       `<table><math><select><mi><select></table>`:               true,
-       `<!doctype html><table><colgroup><plaintext></plaintext>`: true,
-       `<!doctype html><svg><plaintext>a</plaintext>b`:           true,
-}
-
-func TestNodeConsistency(t *testing.T) {
-       // inconsistentNode is a Node whose DataAtom and Data do not agree.
-       inconsistentNode := &Node{
-               Type:     ElementNode,
-               DataAtom: atom.Frameset,
-               Data:     "table",
-       }
-       if _, err := ParseFragment(strings.NewReader("<p>hello</p>"), inconsistentNode); err == nil {
-               t.Errorf("got nil error, want non-nil")
-       }
-}
-
-func TestParseFragmentWithNilContext(t *testing.T) {
-       // This shouldn't panic.
-       ParseFragment(strings.NewReader("<p>hello</p>"), nil)
-}
-
-func TestParseFragmentForeignContentTemplates(t *testing.T) {
-       srcs := []string{
-               "<math><html><template><mn><template></template></template>",
-               "<math><math><head><mi><template>",
-       }
-       for _, src := range srcs {
-               // The next line shouldn't infinite-loop.
-               ParseFragment(strings.NewReader(src), nil)
-       }
-}
-
-func BenchmarkParser(b *testing.B) {
-       buf, err := os.ReadFile("testdata/go1.html")
-       if err != nil {
-               b.Fatalf("could not read testdata/go1.html: %v", err)
-       }
-       b.SetBytes(int64(len(buf)))
-       runtime.GC()
-       b.ReportAllocs()
-       b.ResetTimer()
-       for i := 0; i < b.N; i++ {
-               Parse(bytes.NewBuffer(buf))
-       }
-}
diff --git a/html/render.go b/html/render.go
deleted file mode 100644 (file)
index e8c1233..0000000
+++ /dev/null
@@ -1,293 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package html
-
-import (
-       "bufio"
-       "errors"
-       "fmt"
-       "io"
-       "strings"
-)
-
-type writer interface {
-       io.Writer
-       io.ByteWriter
-       WriteString(string) (int, error)
-}
-
-// Render renders the parse tree n to the given writer.
-//
-// Rendering is done on a 'best effort' basis: calling Parse on the output of
-// Render will always result in something similar to the original tree, but it
-// is not necessarily an exact clone unless the original tree was 'well-formed'.
-// 'Well-formed' is not easily specified; the HTML5 specification is
-// complicated.
-//
-// Calling Parse on arbitrary input typically results in a 'well-formed' parse
-// tree. However, it is possible for Parse to yield a 'badly-formed' parse tree.
-// For example, in a 'well-formed' parse tree, no <a> element is a child of
-// another <a> element: parsing "<a><a>" results in two sibling elements.
-// Similarly, in a 'well-formed' parse tree, no <a> element is a child of a
-// <table> element: parsing "<p><table><a>" results in a <p> with two sibling
-// children; the <a> is reparented to the <table>'s parent. However, calling
-// Parse on "<a><table><a>" does not return an error, but the result has an <a>
-// element with an <a> child, and is therefore not 'well-formed'.
-//
-// Programmatically constructed trees are typically also 'well-formed', but it
-// is possible to construct a tree that looks innocuous but, when rendered and
-// re-parsed, results in a different tree. A simple example is that a solitary
-// text node would become a tree containing <html>, <head> and <body> elements.
-// Another example is that the programmatic equivalent of "a<head>b</head>c"
-// becomes "<html><head><head/><body>abc</body></html>".
-func Render(w io.Writer, n *Node) error {
-       if x, ok := w.(writer); ok {
-               return render(x, n)
-       }
-       buf := bufio.NewWriter(w)
-       if err := render(buf, n); err != nil {
-               return err
-       }
-       return buf.Flush()
-}
-
-// plaintextAbort is returned from render1 when a <plaintext> element
-// has been rendered. No more end tags should be rendered after that.
-var plaintextAbort = errors.New("html: internal error (plaintext abort)")
-
-func render(w writer, n *Node) error {
-       err := render1(w, n)
-       if err == plaintextAbort {
-               err = nil
-       }
-       return err
-}
-
-func render1(w writer, n *Node) error {
-       // Render non-element nodes; these are the easy cases.
-       switch n.Type {
-       case ErrorNode:
-               return errors.New("html: cannot render an ErrorNode node")
-       case TextNode:
-               return escape(w, n.Data)
-       case DocumentNode:
-               for c := n.FirstChild; c != nil; c = c.NextSibling {
-                       if err := render1(w, c); err != nil {
-                               return err
-                       }
-               }
-               return nil
-       case ElementNode:
-               // No-op.
-       case CommentNode:
-               if _, err := w.WriteString("<!--"); err != nil {
-                       return err
-               }
-               if err := escapeComment(w, n.Data); err != nil {
-                       return err
-               }
-               if _, err := w.WriteString("-->"); err != nil {
-                       return err
-               }
-               return nil
-       case DoctypeNode:
-               if _, err := w.WriteString("<!DOCTYPE "); err != nil {
-                       return err
-               }
-               if err := escape(w, n.Data); err != nil {
-                       return err
-               }
-               if n.Attr != nil {
-                       var p, s string
-                       for _, a := range n.Attr {
-                               switch a.Key {
-                               case "public":
-                                       p = a.Val
-                               case "system":
-                                       s = a.Val
-                               }
-                       }
-                       if p != "" {
-                               if _, err := w.WriteString(" PUBLIC "); err != nil {
-                                       return err
-                               }
-                               if err := writeQuoted(w, p); err != nil {
-                                       return err
-                               }
-                               if s != "" {
-                                       if err := w.WriteByte(' '); err != nil {
-                                               return err
-                                       }
-                                       if err := writeQuoted(w, s); err != nil {
-                                               return err
-                                       }
-                               }
-                       } else if s != "" {
-                               if _, err := w.WriteString(" SYSTEM "); err != nil {
-                                       return err
-                               }
-                               if err := writeQuoted(w, s); err != nil {
-                                       return err
-                               }
-                       }
-               }
-               return w.WriteByte('>')
-       case RawNode:
-               _, err := w.WriteString(n.Data)
-               return err
-       default:
-               return errors.New("html: unknown node type")
-       }
-
-       // Render the <xxx> opening tag.
-       if err := w.WriteByte('<'); err != nil {
-               return err
-       }
-       if _, err := w.WriteString(n.Data); err != nil {
-               return err
-       }
-       for _, a := range n.Attr {
-               if err := w.WriteByte(' '); err != nil {
-                       return err
-               }
-               if a.Namespace != "" {
-                       if _, err := w.WriteString(a.Namespace); err != nil {
-                               return err
-                       }
-                       if err := w.WriteByte(':'); err != nil {
-                               return err
-                       }
-               }
-               if _, err := w.WriteString(a.Key); err != nil {
-                       return err
-               }
-               if _, err := w.WriteString(`="`); err != nil {
-                       return err
-               }
-               if err := escape(w, a.Val); err != nil {
-                       return err
-               }
-               if err := w.WriteByte('"'); err != nil {
-                       return err
-               }
-       }
-       if voidElements[n.Data] {
-               if n.FirstChild != nil {
-                       return fmt.Errorf("html: void element <%s> has child nodes", n.Data)
-               }
-               _, err := w.WriteString("/>")
-               return err
-       }
-       if err := w.WriteByte('>'); err != nil {
-               return err
-       }
-
-       // Add initial newline where there is danger of a newline beging ignored.
-       if c := n.FirstChild; c != nil && c.Type == TextNode && strings.HasPrefix(c.Data, "\n") {
-               switch n.Data {
-               case "pre", "listing", "textarea":
-                       if err := w.WriteByte('\n'); err != nil {
-                               return err
-                       }
-               }
-       }
-
-       // Render any child nodes
-       if childTextNodesAreLiteral(n) {
-               for c := n.FirstChild; c != nil; c = c.NextSibling {
-                       if c.Type == TextNode {
-                               if _, err := w.WriteString(c.Data); err != nil {
-                                       return err
-                               }
-                       } else {
-                               if err := render1(w, c); err != nil {
-                                       return err
-                               }
-                       }
-               }
-               if n.Data == "plaintext" {
-                       // Don't render anything else. <plaintext> must be the
-                       // last element in the file, with no closing tag.
-                       return plaintextAbort
-               }
-       } else {
-               for c := n.FirstChild; c != nil; c = c.NextSibling {
-                       if err := render1(w, c); err != nil {
-                               return err
-                       }
-               }
-       }
-
-       // Render the </xxx> closing tag.
-       if _, err := w.WriteString("</"); err != nil {
-               return err
-       }
-       if _, err := w.WriteString(n.Data); err != nil {
-               return err
-       }
-       return w.WriteByte('>')
-}
-
-func childTextNodesAreLiteral(n *Node) bool {
-       // Per WHATWG HTML 13.3, if the parent of the current node is a style,
-       // script, xmp, iframe, noembed, noframes, or plaintext element, and the
-       // current node is a text node, append the value of the node's data
-       // literally. The specification is not explicit about it, but we only
-       // enforce this if we are in the HTML namespace (i.e. when the namespace is
-       // "").
-       // NOTE: we also always include noscript elements, although the
-       // specification states that they should only be rendered as such if
-       // scripting is enabled for the node (which is not something we track).
-       if n.Namespace != "" {
-               return false
-       }
-       switch n.Data {
-       case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp":
-               return true
-       default:
-               return false
-       }
-}
-
-// writeQuoted writes s to w surrounded by quotes. Normally it will use double
-// quotes, but if s contains a double quote, it will use single quotes.
-// It is used for writing the identifiers in a doctype declaration.
-// In valid HTML, they can't contain both types of quotes.
-func writeQuoted(w writer, s string) error {
-       var q byte = '"'
-       if strings.Contains(s, `"`) {
-               q = '\''
-       }
-       if err := w.WriteByte(q); err != nil {
-               return err
-       }
-       if _, err := w.WriteString(s); err != nil {
-               return err
-       }
-       if err := w.WriteByte(q); err != nil {
-               return err
-       }
-       return nil
-}
-
-// Section 12.1.2, "Elements", gives this list of void elements. Void elements
-// are those that can't have any contents.
-var voidElements = map[string]bool{
-       "area":   true,
-       "base":   true,
-       "br":     true,
-       "col":    true,
-       "embed":  true,
-       "hr":     true,
-       "img":    true,
-       "input":  true,
-       "keygen": true, // "keygen" has been removed from the spec, but are kept here for backwards compatibility.
-       "link":   true,
-       "meta":   true,
-       "param":  true,
-       "source": true,
-       "track":  true,
-       "wbr":    true,
-}
diff --git a/html/render_test.go b/html/render_test.go
deleted file mode 100644 (file)
index 22d0864..0000000
+++ /dev/null
@@ -1,207 +0,0 @@
-// Copyright 2010 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package html
-
-import (
-       "bytes"
-       "fmt"
-       "strings"
-       "testing"
-)
-
-func TestRenderer(t *testing.T) {
-       nodes := [...]*Node{
-               0: {
-                       Type: ElementNode,
-                       Data: "html",
-               },
-               1: {
-                       Type: ElementNode,
-                       Data: "head",
-               },
-               2: {
-                       Type: ElementNode,
-                       Data: "body",
-               },
-               3: {
-                       Type: TextNode,
-                       Data: "0<1",
-               },
-               4: {
-                       Type: ElementNode,
-                       Data: "p",
-                       Attr: []Attribute{
-                               {
-                                       Key: "id",
-                                       Val: "A",
-                               },
-                               {
-                                       Key: "foo",
-                                       Val: `abc"def`,
-                               },
-                       },
-               },
-               5: {
-                       Type: TextNode,
-                       Data: "2",
-               },
-               6: {
-                       Type: ElementNode,
-                       Data: "b",
-                       Attr: []Attribute{
-                               {
-                                       Key: "empty",
-                                       Val: "",
-                               },
-                       },
-               },
-               7: {
-                       Type: TextNode,
-                       Data: "3",
-               },
-               8: {
-                       Type: ElementNode,
-                       Data: "i",
-                       Attr: []Attribute{
-                               {
-                                       Key: "backslash",
-                                       Val: `\`,
-                               },
-                       },
-               },
-               9: {
-                       Type: TextNode,
-                       Data: "&4",
-               },
-               10: {
-                       Type: TextNode,
-                       Data: "5",
-               },
-               11: {
-                       Type: ElementNode,
-                       Data: "blockquote",
-               },
-               12: {
-                       Type: ElementNode,
-                       Data: "br",
-               },
-               13: {
-                       Type: TextNode,
-                       Data: "6",
-               },
-               14: {
-                       Type: CommentNode,
-                       Data: "comm",
-               },
-               15: {
-                       Type: CommentNode,
-                       Data: "x-->y", // Needs escaping.
-               },
-               16: {
-                       Type: RawNode,
-                       Data: "7<pre>8</pre>9",
-               },
-       }
-
-       // Build a tree out of those nodes, based on a textual representation.
-       // Only the ".\t"s are significant. The trailing HTML-like text is
-       // just commentary. The "0:" prefixes are for easy cross-reference with
-       // the nodes array.
-       treeAsText := [...]string{
-               0:  `<html>`,
-               1:  `.  <head>`,
-               2:  `.  <body>`,
-               3:  `.  .       "0&lt;1"`,
-               4:  `.  .       <p id="A" foo="abc&#34;def">`,
-               5:  `.  .       .       "2"`,
-               6:  `.  .       .       <b empty="">`,
-               7:  `.  .       .       .       "3"`,
-               8:  `.  .       .       <i backslash="\">`,
-               9:  `.  .       .       .       "&amp;4"`,
-               10: `.  .       "5"`,
-               11: `.  .       <blockquote>`,
-               12: `.  .       <br>`,
-               13: `.  .       "6"`,
-               14: `.  .       "<!--comm-->"`,
-               15: `.  .       "<!--x--&gt;y-->"`,
-               16: `.  .       "7<pre>8</pre>9"`,
-       }
-       if len(nodes) != len(treeAsText) {
-               t.Fatal("len(nodes) != len(treeAsText)")
-       }
-       var stack [8]*Node
-       for i, line := range treeAsText {
-               level := 0
-               for line[0] == '.' {
-                       // Strip a leading ".\t".
-                       line = line[2:]
-                       level++
-               }
-               n := nodes[i]
-               if level == 0 {
-                       if stack[0] != nil {
-                               t.Fatal("multiple root nodes")
-                       }
-                       stack[0] = n
-               } else {
-                       stack[level-1].AppendChild(n)
-                       stack[level] = n
-                       for i := level + 1; i < len(stack); i++ {
-                               stack[i] = nil
-                       }
-               }
-               // At each stage of tree construction, we check all nodes for consistency.
-               for j, m := range nodes {
-                       if err := checkNodeConsistency(m); err != nil {
-                               t.Fatalf("i=%d, j=%d: %v", i, j, err)
-                       }
-               }
-       }
-
-       want := `<html><head></head><body>0&lt;1<p id="A" foo="abc&#34;def">` +
-               `2<b empty="">3</b><i backslash="\">&amp;4</i></p>` +
-               `5<blockquote></blockquote><br/>6<!--comm--><!--x--&gt;y-->7<pre>8</pre>9</body></html>`
-       b := new(bytes.Buffer)
-       if err := Render(b, nodes[0]); err != nil {
-               t.Fatal(err)
-       }
-       if got := b.String(); got != want {
-               t.Errorf("got vs want:\n%s\n%s\n", got, want)
-       }
-}
-
-func TestRenderTextNodes(t *testing.T) {
-       elements := []string{"style", "script", "xmp", "iframe", "noembed", "noframes", "plaintext", "noscript"}
-       for _, namespace := range []string{
-               "", // html
-               "svg",
-               "math",
-       } {
-               for _, e := range elements {
-                       var namespaceOpen, namespaceClose string
-                       if namespace != "" {
-                               namespaceOpen, namespaceClose = fmt.Sprintf("<%s>", namespace), fmt.Sprintf("</%s>", namespace)
-                       }
-                       doc := fmt.Sprintf(`<html><head></head><body>%s<%s>&</%s>%s</body></html>`, namespaceOpen, e, e, namespaceClose)
-                       n, err := Parse(strings.NewReader(doc))
-                       if err != nil {
-                               t.Fatal(err)
-                       }
-                       b := bytes.NewBuffer(nil)
-                       if err := Render(b, n); err != nil {
-                               t.Fatal(err)
-                       }
-
-                       expected := doc
-                       if namespace != "" {
-                               expected = strings.Replace(expected, "&", "&amp;", 1)
-                       }
-
-                       if b.String() != expected {
-                               t.Errorf("unexpected output: got %q, want %q", b.String(), expected)
-                       }
-               }
-       }
-}
diff --git a/html/testdata/go/issue_30600_parse_panics_in_cell_mode.dat b/html/testdata/go/issue_30600_parse_panics_in_cell_mode.dat
deleted file mode 100644 (file)
index 741f4b1..0000000
+++ /dev/null
@@ -1,12 +0,0 @@
-#data
-<table><math><th><mo><select></table>
-#errors
-#document
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       <math th>
-|         <math mo>
-|           <select>
-|     <table>
diff --git a/html/testdata/go/issue_30961_error_nested_unknown_tag_types.dat b/html/testdata/go/issue_30961_error_nested_unknown_tag_types.dat
deleted file mode 100644 (file)
index e314964..0000000
+++ /dev/null
@@ -1,11 +0,0 @@
-#data
-<html><head></head><body><tag1><tag2 /><p></p></tag1><div></div></body></html>
-#errors
-#document
-| <html>
-|   <head>
-|   <body>
-|     <tag1>
-|       <tag2>
-|         <p>
-|     <div>
diff --git a/html/testdata/go/raw_tags_to_be_ignored.dat b/html/testdata/go/raw_tags_to_be_ignored.dat
deleted file mode 100644 (file)
index 50bac59..0000000
+++ /dev/null
@@ -1,97 +0,0 @@
-#data
-<!doctype html><table><select><iframe>a<caption>b
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <select>
-|       "a"
-|     <table>
-|       <caption>
-|         "b"
-
-#data
-<!doctype html><table><select><noembed>a<caption>b
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <select>
-|       "a"
-|     <table>
-|       <caption>
-|         "b"
-
-#data
-<!doctype html><table><select><noframes>a<caption>b
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <select>
-|       "a"
-|     <table>
-|       <caption>
-|         "b"
-
-#data
-<!doctype html><table><select><noscript>a<caption>b
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <select>
-|       "a"
-|     <table>
-|       <caption>
-|         "b"
-
-#data
-<!doctype html><table><select><style>a<caption>b
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <select>
-|       "a"
-|     <table>
-|       <caption>
-|         "b"
-
-#data
-<!doctype html><table><select><title>a<caption>b
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <select>
-|       "a"
-|     <table>
-|       <caption>
-|         "b"
-
-#data
-<!doctype html><table><select><xmp>a<caption>b
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <select>
-|       "a"
-|     <table>
-|       <caption>
-|         "b"
diff --git a/html/testdata/go/select.dat b/html/testdata/go/select.dat
deleted file mode 100644 (file)
index 684554c..0000000
+++ /dev/null
@@ -1,12 +0,0 @@
-#data
-<table><math><select><mi><select></table>
-#errors
-#document
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       <math select>
-|         <math mi>
-|           <select>
-|     <table>
diff --git a/html/testdata/go/template.dat b/html/testdata/go/template.dat
deleted file mode 100644 (file)
index b923b0f..0000000
+++ /dev/null
@@ -1,64 +0,0 @@
-#data
-<body><template><yt-icon-button></yt-icon-button><form><paper-input></paper-input></form><style></style></template>
-#errors
-#document
-| <html>
-|   <head>
-|   <body>
-|     <template>
-|       content
-|         <yt-icon-button>
-|         <form>
-|           <paper-input>
-|         <style>
-
-#data
-<template><tBody><isindex/action=0>
-#errors
-#document
-| <html>
-|   <head>
-|     <template>
-|       content
-|         <tbody>
-|         <isindex>
-|           action="0"
-|   <body>
-
-#data
-<math><template><mo><template>
-#errors
-#document
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       <math template>
-|         <math mo>
-|           <template>
-|             content
-
-#data
-<svg><template><desc><t><svg></template>
-#errors
-#document
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       <svg template>
-|         <svg desc>
-|           <t>
-|             <svg svg>
-
-#data
-<math><template><mn><b></template>
-#errors
-#document
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       <math template>
-|         <math mn>
-|           <b>
diff --git a/html/testdata/go1.html b/html/testdata/go1.html
deleted file mode 100644 (file)
index 086c011..0000000
+++ /dev/null
@@ -1,2237 +0,0 @@
-<!DOCTYPE html>
-<html>
-<head>
-<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-
-  <title>Go 1 Release Notes - The Go Programming Language</title>
-
-<link type="text/css" rel="stylesheet" href="/doc/style.css">
-<script type="text/javascript" src="/doc/godocs.js"></script>
-
-<link rel="search" type="application/opensearchdescription+xml" title="godoc" href="/opensearch.xml" />
-
-<script type="text/javascript">
-var _gaq = _gaq || [];
-_gaq.push(["_setAccount", "UA-11222381-2"]);
-_gaq.push(["_trackPageview"]);
-</script>
-</head>
-<body>
-
-<div id="topbar"><div class="container wide">
-
-<form method="GET" action="/search">
-<div id="menu">
-<a href="/doc/">Documents</a>
-<a href="/ref/">References</a>
-<a href="/pkg/">Packages</a>
-<a href="/project/">The Project</a>
-<a href="/help/">Help</a>
-<input type="text" id="search" name="q" class="inactive" value="Search">
-</div>
-<div id="heading"><a href="/">The Go Programming Language</a></div>
-</form>
-
-</div></div>
-
-<div id="page" class="wide">
-
-
-  <div id="minusone"><g:minusone size="small" annotation="none"></g:minusone></div>
-  <h1>Go 1 Release Notes</h1>
-
-
-
-
-<div id="nav"></div>
-
-
-
-
-<h2 id="introduction">Introduction to Go 1</h2>
-
-<p>
-Go version 1, Go 1 for short, defines a language and a set of core libraries
-that provide a stable foundation for creating reliable products, projects, and
-publications.
-</p>
-
-<p>
-The driving motivation for Go 1 is stability for its users. People should be able to
-write Go programs and expect that they will continue to compile and run without
-change, on a time scale of years, including in production environments such as
-Google App Engine. Similarly, people should be able to write books about Go, be
-able to say which version of Go the book is describing, and have that version
-number still be meaningful much later.
-</p>
-
-<p>
-Code that compiles in Go 1 should, with few exceptions, continue to compile and
-run throughout the lifetime of that version, even as we issue updates and bug
-fixes such as Go version 1.1, 1.2, and so on. Other than critical fixes, changes
-made to the language and library for subsequent releases of Go 1 may
-add functionality but will not break existing Go 1 programs.
-<a href="go1compat.html">The Go 1 compatibility document</a>
-explains the compatibility guidelines in more detail.
-</p>
-
-<p>
-Go 1 is a representation of Go as it used today, not a wholesale rethinking of
-the language. We avoided designing new features and instead focused on cleaning
-up problems and inconsistencies and improving portability. There are a number
-changes to the Go language and packages that we had considered for some time and
-prototyped but not released primarily because they are significant and
-backwards-incompatible. Go 1 was an opportunity to get them out, which is
-helpful for the long term, but also means that Go 1 introduces incompatibilities
-for old programs. Fortunately, the <code>go</code> <code>fix</code> tool can
-automate much of the work needed to bring programs up to the Go 1 standard.
-</p>
-
-<p>
-This document outlines the major changes in Go 1 that will affect programmers
-updating existing code; its reference point is the prior release, r60 (tagged as
-r60.3). It also explains how to update code from r60 to run under Go 1.
-</p>
-
-<h2 id="language">Changes to the language</h2>
-
-<h3 id="append">Append</h3>
-
-<p>
-The <code>append</code> predeclared variadic function makes it easy to grow a slice
-by adding elements to the end.
-A common use is to add bytes to the end of a byte slice when generating output.
-However, <code>append</code> did not provide a way to append a string to a <code>[]byte</code>,
-which is another common case.
-</p>
-
-<pre><!--{{code "/doc/progs/go1.go" `/greeting := ..byte/` `/append.*hello/`}}
--->    greeting := []byte{}
-    greeting = append(greeting, []byte(&#34;hello &#34;)...)</pre>
-
-<p>
-By analogy with the similar property of <code>copy</code>, Go 1
-permits a string to be appended (byte-wise) directly to a byte
-slice, reducing the friction between strings and byte slices.
-The conversion is no longer necessary:
-</p>
-
-<pre><!--{{code "/doc/progs/go1.go" `/append.*world/`}}
--->    greeting = append(greeting, &#34;world&#34;...)</pre>
-
-<p>
-<em>Updating</em>:
-This is a new feature, so existing code needs no changes.
-</p>
-
-<h3 id="close">Close</h3>
-
-<p>
-The <code>close</code> predeclared function provides a mechanism
-for a sender to signal that no more values will be sent.
-It is important to the implementation of <code>for</code> <code>range</code>
-loops over channels and is helpful in other situations.
-Partly by design and partly because of race conditions that can occur otherwise,
-it is intended for use only by the goroutine sending on the channel,
-not by the goroutine receiving data.
-However, before Go 1 there was no compile-time checking that <code>close</code>
-was being used correctly.
-</p>
-
-<p>
-To close this gap, at least in part, Go 1 disallows <code>close</code> on receive-only channels.
-Attempting to close such a channel is a compile-time error.
-</p>
-
-<pre>
-    var c chan int
-    var csend chan&lt;- int = c
-    var crecv &lt;-chan int = c
-    close(c)     // legal
-    close(csend) // legal
-    close(crecv) // illegal
-</pre>
-
-<p>
-<em>Updating</em>:
-Existing code that attempts to close a receive-only channel was
-erroneous even before Go 1 and should be fixed.  The compiler will
-now reject such code.
-</p>
-
-<h3 id="literals">Composite literals</h3>
-
-<p>
-In Go 1, a composite literal of array, slice, or map type can elide the
-type specification for the elements' initializers if they are of pointer type.
-All four of the initializations in this example are legal; the last one was illegal before Go 1.
-</p>
-
-<pre><!--{{code "/doc/progs/go1.go" `/type Date struct/` `/STOP/`}}
--->    type Date struct {
-        month string
-        day   int
-    }
-    <span class="comment">// Struct values, fully qualified; always legal.</span>
-    holiday1 := []Date{
-        Date{&#34;Feb&#34;, 14},
-        Date{&#34;Nov&#34;, 11},
-        Date{&#34;Dec&#34;, 25},
-    }
-    <span class="comment">// Struct values, type name elided; always legal.</span>
-    holiday2 := []Date{
-        {&#34;Feb&#34;, 14},
-        {&#34;Nov&#34;, 11},
-        {&#34;Dec&#34;, 25},
-    }
-    <span class="comment">// Pointers, fully qualified, always legal.</span>
-    holiday3 := []*Date{
-        &amp;Date{&#34;Feb&#34;, 14},
-        &amp;Date{&#34;Nov&#34;, 11},
-        &amp;Date{&#34;Dec&#34;, 25},
-    }
-    <span class="comment">// Pointers, type name elided; legal in Go 1.</span>
-    holiday4 := []*Date{
-        {&#34;Feb&#34;, 14},
-        {&#34;Nov&#34;, 11},
-        {&#34;Dec&#34;, 25},
-    }</pre>
-
-<p>
-<em>Updating</em>:
-This change has no effect on existing code, but the command
-<code>gofmt</code> <code>-s</code> applied to existing source
-will, among other things, elide explicit element types wherever permitted.
-</p>
-
-
-<h3 id="init">Goroutines during init</h3>
-
-<p>
-The old language defined that <code>go</code> statements executed during initialization created goroutines but that they did not begin to run until initialization of the entire program was complete.
-This introduced clumsiness in many places and, in effect, limited the utility
-of the <code>init</code> construct:
-if it was possible for another package to use the library during initialization, the library
-was forced to avoid goroutines.
-This design was done for reasons of simplicity and safety but,
-as our confidence in the language grew, it seemed unnecessary.
-Running goroutines during initialization is no more complex or unsafe than running them during normal execution.
-</p>
-
-<p>
-In Go 1, code that uses goroutines can be called from
-<code>init</code> routines and global initialization expressions
-without introducing a deadlock.
-</p>
-
-<pre><!--{{code "/doc/progs/go1.go" `/PackageGlobal/` `/^}/`}}
--->var PackageGlobal int
-
-func init() {
-    c := make(chan int)
-    go initializationFunction(c)
-    PackageGlobal = &lt;-c
-}</pre>
-
-<p>
-<em>Updating</em>:
-This is a new feature, so existing code needs no changes,
-although it's possible that code that depends on goroutines not starting before <code>main</code> will break.
-There was no such code in the standard repository.
-</p>
-
-<h3 id="rune">The rune type</h3>
-
-<p>
-The language spec allows the <code>int</code> type to be 32 or 64 bits wide, but current implementations set <code>int</code> to 32 bits even on 64-bit platforms.
-It would be preferable to have <code>int</code> be 64 bits on 64-bit platforms.
-(There are important consequences for indexing large slices.)
-However, this change would waste space when processing Unicode characters with
-the old language because the <code>int</code> type was also used to hold Unicode code points: each code point would waste an extra 32 bits of storage if <code>int</code> grew from 32 bits to 64.
-</p>
-
-<p>
-To make changing to 64-bit <code>int</code> feasible,
-Go 1 introduces a new basic type, <code>rune</code>, to represent
-individual Unicode code points.
-It is an alias for <code>int32</code>, analogous to <code>byte</code>
-as an alias for <code>uint8</code>.
-</p>
-
-<p>
-Character literals such as <code>'a'</code>, <code>'語'</code>, and <code>'\u0345'</code>
-now have default type <code>rune</code>,
-analogous to <code>1.0</code> having default type <code>float64</code>.
-A variable initialized to a character constant will therefore
-have type <code>rune</code> unless otherwise specified.
-</p>
-
-<p>
-Libraries have been updated to use <code>rune</code> rather than <code>int</code>
-when appropriate. For instance, the functions <code>unicode.ToLower</code> and
-relatives now take and return a <code>rune</code>.
-</p>
-
-<pre><!--{{code "/doc/progs/go1.go" `/STARTRUNE/` `/ENDRUNE/`}}
--->    delta := &#39;δ&#39; <span class="comment">// delta has type rune.</span>
-    var DELTA rune
-    DELTA = unicode.ToUpper(delta)
-    epsilon := unicode.ToLower(DELTA + 1)
-    if epsilon != &#39;δ&#39;+1 {
-        log.Fatal(&#34;inconsistent casing for Greek&#34;)
-    }</pre>
-
-<p>
-<em>Updating</em>:
-Most source code will be unaffected by this because the type inference from
-<code>:=</code> initializers introduces the new type silently, and it propagates
-from there.
-Some code may get type errors that a trivial conversion will resolve.
-</p>
-
-<h3 id="error">The error type</h3>
-
-<p>
-Go 1 introduces a new built-in type, <code>error</code>, which has the following definition:
-</p>
-
-<pre>
-    type error interface {
-        Error() string
-    }
-</pre>
-
-<p>
-Since the consequences of this type are all in the package library,
-it is discussed <a href="#errors">below</a>.
-</p>
-
-<h3 id="delete">Deleting from maps</h3>
-
-<p>
-In the old language, to delete the entry with key <code>k</code> from map <code>m</code>, one wrote the statement,
-</p>
-
-<pre>
-    m[k] = value, false
-</pre>
-
-<p>
-This syntax was a peculiar special case, the only two-to-one assignment.
-It required passing a value (usually ignored) that is evaluated but discarded,
-plus a boolean that was nearly always the constant <code>false</code>.
-It did the job but was odd and a point of contention.
-</p>
-
-<p>
-In Go 1, that syntax has gone; instead there is a new built-in
-function, <code>delete</code>.  The call
-</p>
-
-<pre><!--{{code "/doc/progs/go1.go" `/delete\(m, k\)/`}}
--->    delete(m, k)</pre>
-
-<p>
-will delete the map entry retrieved by the expression <code>m[k]</code>.
-There is no return value. Deleting a non-existent entry is a no-op.
-</p>
-
-<p>
-<em>Updating</em>:
-Running <code>go</code> <code>fix</code> will convert expressions of the form <code>m[k] = value,
-false</code> into <code>delete(m, k)</code> when it is clear that
-the ignored value can be safely discarded from the program and
-<code>false</code> refers to the predefined boolean constant.
-The fix tool
-will flag other uses of the syntax for inspection by the programmer.
-</p>
-
-<h3 id="iteration">Iterating in maps</h3>
-
-<p>
-The old language specification did not define the order of iteration for maps,
-and in practice it differed across hardware platforms.
-This caused tests that iterated over maps to be fragile and non-portable, with the
-unpleasant property that a test might always pass on one machine but break on another.
-</p>
-
-<p>
-In Go 1, the order in which elements are visited when iterating
-over a map using a <code>for</code> <code>range</code> statement
-is defined to be unpredictable, even if the same loop is run multiple
-times with the same map.
-Code should not assume that the elements are visited in any particular order.
-</p>
-
-<p>
-This change means that code that depends on iteration order is very likely to break early and be fixed long before it becomes a problem.
-Just as important, it allows the map implementation to ensure better map balancing even when programs are using range loops to select an element from a map.
-</p>
-
-<pre><!--{{code "/doc/progs/go1.go" `/Sunday/` `/^     }/`}}
--->    m := map[string]int{&#34;Sunday&#34;: 0, &#34;Monday&#34;: 1}
-    for name, value := range m {
-        <span class="comment">// This loop should not assume Sunday will be visited first.</span>
-        f(name, value)
-    }</pre>
-
-<p>
-<em>Updating</em>:
-This is one change where tools cannot help.  Most existing code
-will be unaffected, but some programs may break or misbehave; we
-recommend manual checking of all range statements over maps to
-verify they do not depend on iteration order. There were a few such
-examples in the standard repository; they have been fixed.
-Note that it was already incorrect to depend on the iteration order, which
-was unspecified. This change codifies the unpredictability.
-</p>
-
-<h3 id="multiple_assignment">Multiple assignment</h3>
-
-<p>
-The language specification has long guaranteed that in assignments
-the right-hand-side expressions are all evaluated before any left-hand-side expressions are assigned.
-To guarantee predictable behavior,
-Go 1 refines the specification further.
-</p>
-
-<p>
-If the left-hand side of the assignment
-statement contains expressions that require evaluation, such as
-function calls or array indexing operations, these will all be done
-using the usual left-to-right rule before any variables are assigned
-their value.  Once everything is evaluated, the actual assignments
-proceed in left-to-right order.
-</p>
-
-<p>
-These examples illustrate the behavior.
-</p>
-
-<pre><!--{{code "/doc/progs/go1.go" `/sa :=/` `/then sc.0. = 2/`}}
--->    sa := []int{1, 2, 3}
-    i := 0
-    i, sa[i] = 1, 2 <span class="comment">// sets i = 1, sa[0] = 2</span>
-
-    sb := []int{1, 2, 3}
-    j := 0
-    sb[j], j = 2, 1 <span class="comment">// sets sb[0] = 2, j = 1</span>
-
-    sc := []int{1, 2, 3}
-    sc[0], sc[0] = 1, 2 <span class="comment">// sets sc[0] = 1, then sc[0] = 2 (so sc[0] = 2 at end)</span></pre>
-
-<p>
-<em>Updating</em>:
-This is one change where tools cannot help, but breakage is unlikely.
-No code in the standard repository was broken by this change, and code
-that depended on the previous unspecified behavior was already incorrect.
-</p>
-
-<h3 id="shadowing">Returns and shadowed variables</h3>
-
-<p>
-A common mistake is to use <code>return</code> (without arguments) after an assignment to a variable that has the same name as a result variable but is not the same variable.
-This situation is called <em>shadowing</em>: the result variable has been shadowed by another variable with the same name declared in an inner scope.
-</p>
-
-<p>
-In functions with named return values,
-the Go 1 compilers disallow return statements without arguments if any of the named return values is shadowed at the point of the return statement.
-(It isn't part of the specification, because this is one area we are still exploring;
-the situation is analogous to the compilers rejecting functions that do not end with an explicit return statement.)
-</p>
-
-<p>
-This function implicitly returns a shadowed return value and will be rejected by the compiler:
-</p>
-
-<pre>
-    func Bug() (i, j, k int) {
-        for i = 0; i &lt; 5; i++ {
-            for j := 0; j &lt; 5; j++ { // Redeclares j.
-                k += i*j
-                if k > 100 {
-                    return // Rejected: j is shadowed here.
-                }
-            }
-        }
-        return // OK: j is not shadowed here.
-    }
-</pre>
-
-<p>
-<em>Updating</em>:
-Code that shadows return values in this way will be rejected by the compiler and will need to be fixed by hand.
-The few cases that arose in the standard repository were mostly bugs.
-</p>
-
-<h3 id="unexported">Copying structs with unexported fields</h3>
-
-<p>
-The old language did not allow a package to make a copy of a struct value containing unexported fields belonging to a different package.
-There was, however, a required exception for a method receiver;
-also, the implementations of <code>copy</code> and <code>append</code> have never honored the restriction.
-</p>
-
-<p>
-Go 1 will allow packages to copy struct values containing unexported fields from other packages.
-Besides resolving the inconsistency,
-this change admits a new kind of API: a package can return an opaque value without resorting to a pointer or interface.
-The new implementations of <code>time.Time</code> and
-<code>reflect.Value</code> are examples of types taking advantage of this new property.
-</p>
-
-<p>
-As an example, if package <code>p</code> includes the definitions,
-</p>
-
-<pre>
-    type Struct struct {
-        Public int
-        secret int
-    }
-    func NewStruct(a int) Struct {  // Note: not a pointer.
-        return Struct{a, f(a)}
-    }
-    func (s Struct) String() string {
-        return fmt.Sprintf("{%d (secret %d)}", s.Public, s.secret)
-    }
-</pre>
-
-<p>
-a package that imports <code>p</code> can assign and copy values of type
-<code>p.Struct</code> at will.
-Behind the scenes the unexported fields will be assigned and copied just
-as if they were exported,
-but the client code will never be aware of them. The code
-</p>
-
-<pre>
-    import "p"
-
-    myStruct := p.NewStruct(23)
-    copyOfMyStruct := myStruct
-    fmt.Println(myStruct, copyOfMyStruct)
-</pre>
-
-<p>
-will show that the secret field of the struct has been copied to the new value.
-</p>
-
-<p>
-<em>Updating</em>:
-This is a new feature, so existing code needs no changes.
-</p>
-
-<h3 id="equality">Equality</h3>
-
-<p>
-Before Go 1, the language did not define equality on struct and array values.
-This meant,
-among other things, that structs and arrays could not be used as map keys.
-On the other hand, Go did define equality on function and map values.
-Function equality was problematic in the presence of closures
-(when are two closures equal?)
-while map equality compared pointers, not the maps' content, which was usually
-not what the user would want.
-</p>
-
-<p>
-Go 1 addressed these issues.
-First, structs and arrays can be compared for equality and inequality
-(<code>==</code> and <code>!=</code>),
-and therefore be used as map keys,
-provided they are composed from elements for which equality is also defined,
-using element-wise comparison.
-</p>
-
-<pre><!--{{code "/doc/progs/go1.go" `/type Day struct/` `/Printf/`}}
--->    type Day struct {
-        long  string
-        short string
-    }
-    Christmas := Day{&#34;Christmas&#34;, &#34;XMas&#34;}
-    Thanksgiving := Day{&#34;Thanksgiving&#34;, &#34;Turkey&#34;}
-    holiday := map[Day]bool{
-        Christmas:    true,
-        Thanksgiving: true,
-    }
-    fmt.Printf(&#34;Christmas is a holiday: %t\n&#34;, holiday[Christmas])</pre>
-
-<p>
-Second, Go 1 removes the definition of equality for function values,
-except for comparison with <code>nil</code>.
-Finally, map equality is gone too, also except for comparison with <code>nil</code>.
-</p>
-
-<p>
-Note that equality is still undefined for slices, for which the
-calculation is in general infeasible.  Also note that the ordered
-comparison operators (<code>&lt;</code> <code>&lt;=</code>
-<code>&gt;</code> <code>&gt;=</code>) are still undefined for
-structs and arrays.
-
-<p>
-<em>Updating</em>:
-Struct and array equality is a new feature, so existing code needs no changes.
-Existing code that depends on function or map equality will be
-rejected by the compiler and will need to be fixed by hand.
-Few programs will be affected, but the fix may require some
-redesign.
-</p>
-
-<h2 id="packages">The package hierarchy</h2>
-
-<p>
-Go 1 addresses many deficiencies in the old standard library and
-cleans up a number of packages, making them more internally consistent
-and portable.
-</p>
-
-<p>
-This section describes how the packages have been rearranged in Go 1.
-Some have moved, some have been renamed, some have been deleted.
-New packages are described in later sections.
-</p>
-
-<h3 id="hierarchy">The package hierarchy</h3>
-
-<p>
-Go 1 has a rearranged package hierarchy that groups related items
-into subdirectories. For instance, <code>utf8</code> and
-<code>utf16</code> now occupy subdirectories of <code>unicode</code>.
-Also, <a href="#subrepo">some packages</a> have moved into
-subrepositories of
-<a href="http://code.google.com/p/go"><code>code.google.com/p/go</code></a>
-while <a href="#deleted">others</a> have been deleted outright.
-</p>
-
-<table class="codetable" frame="border" summary="Moved packages">
-<colgroup align="left" width="60%"></colgroup>
-<colgroup align="left" width="40%"></colgroup>
-<tr>
-<th align="left">Old path</th>
-<th align="left">New path</th>
-</tr>
-<tr>
-<td colspan="2"><hr></td>
-</tr>
-<tr><td>asn1</td> <td>encoding/asn1</td></tr>
-<tr><td>csv</td> <td>encoding/csv</td></tr>
-<tr><td>gob</td> <td>encoding/gob</td></tr>
-<tr><td>json</td> <td>encoding/json</td></tr>
-<tr><td>xml</td> <td>encoding/xml</td></tr>
-<tr>
-<td colspan="2"><hr></td>
-</tr>
-<tr><td>exp/template/html</td> <td>html/template</td></tr>
-<tr>
-<td colspan="2"><hr></td>
-</tr>
-<tr><td>big</td> <td>math/big</td></tr>
-<tr><td>cmath</td> <td>math/cmplx</td></tr>
-<tr><td>rand</td> <td>math/rand</td></tr>
-<tr>
-<td colspan="2"><hr></td>
-</tr>
-<tr><td>http</td> <td>net/http</td></tr>
-<tr><td>http/cgi</td> <td>net/http/cgi</td></tr>
-<tr><td>http/fcgi</td> <td>net/http/fcgi</td></tr>
-<tr><td>http/httptest</td> <td>net/http/httptest</td></tr>
-<tr><td>http/pprof</td> <td>net/http/pprof</td></tr>
-<tr><td>mail</td> <td>net/mail</td></tr>
-<tr><td>rpc</td> <td>net/rpc</td></tr>
-<tr><td>rpc/jsonrpc</td> <td>net/rpc/jsonrpc</td></tr>
-<tr><td>smtp</td> <td>net/smtp</td></tr>
-<tr><td>url</td> <td>net/url</td></tr>
-<tr>
-<td colspan="2"><hr></td>
-</tr>
-<tr><td>exec</td> <td>os/exec</td></tr>
-<tr>
-<td colspan="2"><hr></td>
-</tr>
-<tr><td>scanner</td> <td>text/scanner</td></tr>
-<tr><td>tabwriter</td> <td>text/tabwriter</td></tr>
-<tr><td>template</td> <td>text/template</td></tr>
-<tr><td>template/parse</td> <td>text/template/parse</td></tr>
-<tr>
-<td colspan="2"><hr></td>
-</tr>
-<tr><td>utf8</td> <td>unicode/utf8</td></tr>
-<tr><td>utf16</td> <td>unicode/utf16</td></tr>
-</table>
-
-<p>
-Note that the package names for the old <code>cmath</code> and
-<code>exp/template/html</code> packages have changed to <code>cmplx</code>
-and <code>template</code>.
-</p>
-
-<p>
-<em>Updating</em>:
-Running <code>go</code> <code>fix</code> will update all imports and package renames for packages that
-remain inside the standard repository.  Programs that import packages
-that are no longer in the standard repository will need to be edited
-by hand.
-</p>
-
-<h3 id="exp">The package tree exp</h3>
-
-<p>
-Because they are not standardized, the packages under the <code>exp</code> directory will not be available in the
-standard Go 1 release distributions, although they will be available in source code form
-in <a href="http://code.google.com/p/go/">the repository</a> for
-developers who wish to use them.
-</p>
-
-<p>
-Several packages have moved under <code>exp</code> at the time of Go 1's release:
-</p>
-
-<ul>
-<li><code>ebnf</code></li>
-<li><code>html</code><sup>&#8224;</sup></li>
-<li><code>go/types</code></li>
-</ul>
-
-<p>
-(<sup>&#8224;</sup>The <code>EscapeString</code> and <code>UnescapeString</code> types remain
-in package <code>html</code>.)
-</p>
-
-<p>
-All these packages are available under the same names, with the prefix <code>exp/</code>: <code>exp/ebnf</code> etc.
-</p>
-
-<p>
-Also, the <code>utf8.String</code> type has been moved to its own package, <code>exp/utf8string</code>.
-</p>
-
-<p>
-Finally, the <code>gotype</code> command now resides in <code>exp/gotype</code>, while
-<code>ebnflint</code> is now in <code>exp/ebnflint</code>.
-If they are installed, they now reside in <code>$GOROOT/bin/tool</code>.
-</p>
-
-<p>
-<em>Updating</em>:
-Code that uses packages in <code>exp</code> will need to be updated by hand,
-or else compiled from an installation that has <code>exp</code> available.
-The <code>go</code> <code>fix</code> tool or the compiler will complain about such uses.
-</p>
-
-<h3 id="old">The package tree old</h3>
-
-<p>
-Because they are deprecated, the packages under the <code>old</code> directory will not be available in the
-standard Go 1 release distributions, although they will be available in source code form for
-developers who wish to use them.
-</p>
-
-<p>
-The packages in their new locations are:
-</p>
-
-<ul>
-<li><code>old/netchan</code></li>
-<li><code>old/regexp</code></li>
-<li><code>old/template</code></li>
-</ul>
-
-<p>
-<em>Updating</em>:
-Code that uses packages now in <code>old</code> will need to be updated by hand,
-or else compiled from an installation that has <code>old</code> available.
-The <code>go</code> <code>fix</code> tool will warn about such uses.
-</p>
-
-<h3 id="deleted">Deleted packages</h3>
-
-<p>
-Go 1 deletes several packages outright:
-</p>
-
-<ul>
-<li><code>container/vector</code></li>
-<li><code>exp/datafmt</code></li>
-<li><code>go/typechecker</code></li>
-<li><code>try</code></li>
-</ul>
-
-<p>
-and also the command <code>gotry</code>.
-</p>
-
-<p>
-<em>Updating</em>:
-Code that uses <code>container/vector</code> should be updated to use
-slices directly.  See
-<a href="http://code.google.com/p/go-wiki/wiki/SliceTricks">the Go
-Language Community Wiki</a> for some suggestions.
-Code that uses the other packages (there should be almost zero) will need to be rethought.
-</p>
-
-<h3 id="subrepo">Packages moving to subrepositories</h3>
-
-<p>
-Go 1 has moved a number of packages into other repositories, usually sub-repositories of
-<a href="http://code.google.com/p/go/">the main Go repository</a>.
-This table lists the old and new import paths:
-
-<table class="codetable" frame="border" summary="Sub-repositories">
-<colgroup align="left" width="40%"></colgroup>
-<colgroup align="left" width="60%"></colgroup>
-<tr>
-<th align="left">Old</th>
-<th align="left">New</th>
-</tr>
-<tr>
-<td colspan="2"><hr></td>
-</tr>
-<tr><td>crypto/bcrypt</td> <td>code.google.com/p/go.crypto/bcrypt</tr>
-<tr><td>crypto/blowfish</td> <td>code.google.com/p/go.crypto/blowfish</tr>
-<tr><td>crypto/cast5</td> <td>code.google.com/p/go.crypto/cast5</tr>
-<tr><td>crypto/md4</td> <td>code.google.com/p/go.crypto/md4</tr>
-<tr><td>crypto/ocsp</td> <td>code.google.com/p/go.crypto/ocsp</tr>
-<tr><td>crypto/openpgp</td> <td>code.google.com/p/go.crypto/openpgp</tr>
-<tr><td>crypto/openpgp/armor</td> <td>code.google.com/p/go.crypto/openpgp/armor</tr>
-<tr><td>crypto/openpgp/elgamal</td> <td>code.google.com/p/go.crypto/openpgp/elgamal</tr>
-<tr><td>crypto/openpgp/errors</td> <td>code.google.com/p/go.crypto/openpgp/errors</tr>
-<tr><td>crypto/openpgp/packet</td> <td>code.google.com/p/go.crypto/openpgp/packet</tr>
-<tr><td>crypto/openpgp/s2k</td> <td>code.google.com/p/go.crypto/openpgp/s2k</tr>
-<tr><td>crypto/ripemd160</td> <td>code.google.com/p/go.crypto/ripemd160</tr>
-<tr><td>crypto/twofish</td> <td>code.google.com/p/go.crypto/twofish</tr>
-<tr><td>crypto/xtea</td> <td>code.google.com/p/go.crypto/xtea</tr>
-<tr><td>exp/ssh</td> <td>code.google.com/p/go.crypto/ssh</tr>
-<tr>
-<td colspan="2"><hr></td>
-</tr>
-<tr><td>image/bmp</td> <td>code.google.com/p/go.image/bmp</tr>
-<tr><td>image/tiff</td> <td>code.google.com/p/go.image/tiff</tr>
-<tr>
-<td colspan="2"><hr></td>
-</tr>
-<tr><td>net/dict</td> <td>code.google.com/p/go.net/dict</tr>
-<tr><td>net/websocket</td> <td>code.google.com/p/go.net/websocket</tr>
-<tr><td>exp/spdy</td> <td>code.google.com/p/go.net/spdy</tr>
-<tr>
-<td colspan="2"><hr></td>
-</tr>
-<tr><td>encoding/git85</td> <td>code.google.com/p/go.codereview/git85</tr>
-<tr><td>patch</td> <td>code.google.com/p/go.codereview/patch</tr>
-<tr>
-<td colspan="2"><hr></td>
-</tr>
-<tr><td>exp/wingui</td> <td>code.google.com/p/gowingui</tr>
-</table>
-
-<p>
-<em>Updating</em>:
-Running <code>go</code> <code>fix</code> will update imports of these packages to use the new import paths.
-Installations that depend on these packages will need to install them using
-a <code>go get</code> command.
-</p>
-
-<h2 id="major">Major changes to the library</h2>
-
-<p>
-This section describes significant changes to the core libraries, the ones that
-affect the most programs.
-</p>
-
-<h3 id="errors">The error type and errors package</h3>
-
-<p>
-The placement of <code>os.Error</code> in package <code>os</code> is mostly historical: errors first came up when implementing package <code>os</code>, and they seemed system-related at the time.
-Since then it has become clear that errors are more fundamental than the operating system.  For example, it would be nice to use <code>Errors</code> in packages that <code>os</code> depends on, like <code>syscall</code>.
-Also, having <code>Error</code> in <code>os</code> introduces many dependencies on <code>os</code> that would otherwise not exist.
-</p>
-
-<p>
-Go 1 solves these problems by introducing a built-in <code>error</code> interface type and a separate <code>errors</code> package (analogous to <code>bytes</code> and <code>strings</code>) that contains utility functions.
-It replaces <code>os.NewError</code> with
-<a href="/pkg/errors/#New"><code>errors.New</code></a>,
-giving errors a more central place in the environment.
-</p>
-
-<p>
-So the widely-used <code>String</code> method does not cause accidental satisfaction
-of the <code>error</code> interface, the <code>error</code> interface uses instead
-the name <code>Error</code> for that method:
-</p>
-
-<pre>
-    type error interface {
-        Error() string
-    }
-</pre>
-
-<p>
-The <code>fmt</code> library automatically invokes <code>Error</code>, as it already
-does for <code>String</code>, for easy printing of error values.
-</p>
-
-<pre><!--{{code "/doc/progs/go1.go" `/START ERROR EXAMPLE/` `/END ERROR EXAMPLE/`}}
--->type SyntaxError struct {
-    File    string
-    Line    int
-    Message string
-}
-
-func (se *SyntaxError) Error() string {
-    return fmt.Sprintf(&#34;%s:%d: %s&#34;, se.File, se.Line, se.Message)
-}</pre>
-
-<p>
-All standard packages have been updated to use the new interface; the old <code>os.Error</code> is gone.
-</p>
-
-<p>
-A new package, <a href="/pkg/errors/"><code>errors</code></a>, contains the function
-</p>
-
-<pre>
-func New(text string) error
-</pre>
-
-<p>
-to turn a string into an error. It replaces the old <code>os.NewError</code>.
-</p>
-
-<pre><!--{{code "/doc/progs/go1.go" `/ErrSyntax/`}}
--->    var ErrSyntax = errors.New(&#34;syntax error&#34;)</pre>
-               
-<p>
-<em>Updating</em>:
-Running <code>go</code> <code>fix</code> will update almost all code affected by the change.
-Code that defines error types with a <code>String</code> method will need to be updated
-by hand to rename the methods to <code>Error</code>.
-</p>
-
-<h3 id="errno">System call errors</h3>
-
-<p>
-The old <code>syscall</code> package, which predated <code>os.Error</code>
-(and just about everything else),
-returned errors as <code>int</code> values.
-In turn, the <code>os</code> package forwarded many of these errors, such
-as <code>EINVAL</code>, but using a different set of errors on each platform.
-This behavior was unpleasant and unportable.
-</p>
-
-<p>
-In Go 1, the
-<a href="/pkg/syscall/"><code>syscall</code></a>
-package instead returns an <code>error</code> for system call errors.
-On Unix, the implementation is done by a
-<a href="/pkg/syscall/#Errno"><code>syscall.Errno</code></a> type
-that satisfies <code>error</code> and replaces the old <code>os.Errno</code>.
-</p>
-
-<p>
-The changes affecting <code>os.EINVAL</code> and relatives are
-described <a href="#os">elsewhere</a>.
-
-<p>
-<em>Updating</em>:
-Running <code>go</code> <code>fix</code> will update almost all code affected by the change.
-Regardless, most code should use the <code>os</code> package
-rather than <code>syscall</code> and so will be unaffected.
-</p>
-
-<h3 id="time">Time</h3>
-
-<p>
-Time is always a challenge to support well in a programming language.
-The old Go <code>time</code> package had <code>int64</code> units, no
-real type safety,
-and no distinction between absolute times and durations.
-</p>
-
-<p>
-One of the most sweeping changes in the Go 1 library is therefore a
-complete redesign of the
-<a href="/pkg/time/"><code>time</code></a> package.
-Instead of an integer number of nanoseconds as an <code>int64</code>,
-and a separate <code>*time.Time</code> type to deal with human
-units such as hours and years,
-there are now two fundamental types:
-<a href="/pkg/time/#Time"><code>time.Time</code></a>
-(a value, so the <code>*</code> is gone), which represents a moment in time;
-and <a href="/pkg/time/#Duration"><code>time.Duration</code></a>,
-which represents an interval.
-Both have nanosecond resolution.
-A <code>Time</code> can represent any time into the ancient
-past and remote future, while a <code>Duration</code> can
-span plus or minus only about 290 years.
-There are methods on these types, plus a number of helpful
-predefined constant durations such as <code>time.Second</code>.
-</p>
-
-<p>
-Among the new methods are things like
-<a href="/pkg/time/#Time.Add"><code>Time.Add</code></a>,
-which adds a <code>Duration</code> to a <code>Time</code>, and
-<a href="/pkg/time/#Time.Sub"><code>Time.Sub</code></a>,
-which subtracts two <code>Times</code> to yield a <code>Duration</code>.
-</p>
-
-<p>
-The most important semantic change is that the Unix epoch (Jan 1, 1970) is now
-relevant only for those functions and methods that mention Unix:
-<a href="/pkg/time/#Unix"><code>time.Unix</code></a>
-and the <a href="/pkg/time/#Time.Unix"><code>Unix</code></a>
-and <a href="/pkg/time/#Time.UnixNano"><code>UnixNano</code></a> methods
-of the <code>Time</code> type.
-In particular,
-<a href="/pkg/time/#Now"><code>time.Now</code></a>
-returns a <code>time.Time</code> value rather than, in the old
-API, an integer nanosecond count since the Unix epoch.
-</p>
-
-<pre><!--{{code "/doc/progs/go1.go" `/sleepUntil/` `/^}/`}}
---><span class="comment">// sleepUntil sleeps until the specified time. It returns immediately if it&#39;s too late.</span>
-func sleepUntil(wakeup time.Time) {
-    now := time.Now() <span class="comment">// A Time.</span>
-    if !wakeup.After(now) {
-        return
-    }
-    delta := wakeup.Sub(now) <span class="comment">// A Duration.</span>
-    fmt.Printf(&#34;Sleeping for %.3fs\n&#34;, delta.Seconds())
-    time.Sleep(delta)
-}</pre>
-
-<p>
-The new types, methods, and constants have been propagated through
-all the standard packages that use time, such as <code>os</code> and
-its representation of file time stamps.
-</p>
-
-<p>
-<em>Updating</em>:
-The <code>go</code> <code>fix</code> tool will update many uses of the old <code>time</code> package to use the new
-types and methods, although it does not replace values such as <code>1e9</code>
-representing nanoseconds per second.
-Also, because of type changes in some of the values that arise,
-some of the expressions rewritten by the fix tool may require
-further hand editing; in such cases the rewrite will include
-the correct function or method for the old functionality, but
-may have the wrong type or require further analysis.
-</p>
-
-<h2 id="minor">Minor changes to the library</h2>
-
-<p>
-This section describes smaller changes, such as those to less commonly
-used packages or that affect
-few programs beyond the need to run <code>go</code> <code>fix</code>.
-This category includes packages that are new in Go 1.
-Collectively they improve portability, regularize behavior, and
-make the interfaces more modern and Go-like.
-</p>
-
-<h3 id="archive_zip">The archive/zip package</h3>
-
-<p>
-In Go 1, <a href="/pkg/archive/zip/#Writer"><code>*zip.Writer</code></a> no
-longer has a <code>Write</code> method. Its presence was a mistake.
-</p>
-
-<p>
-<em>Updating</em>:
-What little code is affected will be caught by the compiler and must be updated by hand.
-</p>
-
-<h3 id="bufio">The bufio package</h3>
-
-<p>
-In Go 1, <a href="/pkg/bufio/#NewReaderSize"><code>bufio.NewReaderSize</code></a>
-and
-<a href="/pkg/bufio/#NewWriterSize"><code>bufio.NewWriterSize</code></a>
-functions no longer return an error for invalid sizes.
-If the argument size is too small or invalid, it is adjusted.
-</p>
-
-<p>
-<em>Updating</em>:
-Running <code>go</code> <code>fix</code> will update calls that assign the error to _.
-Calls that aren't fixed will be caught by the compiler and must be updated by hand.
-</p>
-
-<h3 id="compress">The compress/flate, compress/gzip and compress/zlib packages</h3>
-
-<p>
-In Go 1, the <code>NewWriterXxx</code> functions in
-<a href="/pkg/compress/flate"><code>compress/flate</code></a>,
-<a href="/pkg/compress/gzip"><code>compress/gzip</code></a> and
-<a href="/pkg/compress/zlib"><code>compress/zlib</code></a>
-all return <code>(*Writer, error)</code> if they take a compression level,
-and <code>*Writer</code> otherwise. Package <code>gzip</code>'s
-<code>Compressor</code> and <code>Decompressor</code> types have been renamed
-to <code>Writer</code> and <code>Reader</code>. Package <code>flate</code>'s
-<code>WrongValueError</code> type has been removed.
-</p>
-
-<p>
-<em>Updating</em>
-Running <code>go</code> <code>fix</code> will update old names and calls that assign the error to _.
-Calls that aren't fixed will be caught by the compiler and must be updated by hand.
-</p>
-
-<h3 id="crypto_aes_des">The crypto/aes and crypto/des packages</h3>
-
-<p>
-In Go 1, the <code>Reset</code> method has been removed. Go does not guarantee
-that memory is not copied and therefore this method was misleading.
-</p>
-
-<p>
-The cipher-specific types <code>*aes.Cipher</code>, <code>*des.Cipher</code>,
-and <code>*des.TripleDESCipher</code> have been removed in favor of
-<code>cipher.Block</code>.
-</p>
-
-<p>
-<em>Updating</em>:
-Remove the calls to Reset. Replace uses of the specific cipher types with
-cipher.Block.
-</p>
-
-<h3 id="crypto_elliptic">The crypto/elliptic package</h3>
-
-<p>
-In Go 1, <a href="/pkg/crypto/elliptic/#Curve"><code>elliptic.Curve</code></a>
-has been made an interface to permit alternative implementations. The curve
-parameters have been moved to the
-<a href="/pkg/crypto/elliptic/#CurveParams"><code>elliptic.CurveParams</code></a>
-structure.
-</p>
-
-<p>
-<em>Updating</em>:
-Existing users of <code>*elliptic.Curve</code> will need to change to
-simply <code>elliptic.Curve</code>. Calls to <code>Marshal</code>,
-<code>Unmarshal</code> and <code>GenerateKey</code> are now functions
-in <code>crypto/elliptic</code> that take an <code>elliptic.Curve</code>
-as their first argument.
-</p>
-
-<h3 id="crypto_hmac">The crypto/hmac package</h3>
-
-<p>
-In Go 1, the hash-specific functions, such as <code>hmac.NewMD5</code>, have
-been removed from <code>crypto/hmac</code>. Instead, <code>hmac.New</code> takes
-a function that returns a <code>hash.Hash</code>, such as <code>md5.New</code>.
-</p>
-
-<p>
-<em>Updating</em>:
-Running <code>go</code> <code>fix</code> will perform the needed changes.
-</p>
-
-<h3 id="crypto_x509">The crypto/x509 package</h3>
-
-<p>
-In Go 1, the
-<a href="/pkg/crypto/x509/#CreateCertificate"><code>CreateCertificate</code></a>
-and
-<a href="/pkg/crypto/x509/#CreateCRL"><code>CreateCRL</code></a>
-functions in <code>crypto/x509</code> have been altered to take an
-<code>interface{}</code> where they previously took a <code>*rsa.PublicKey</code>
-or <code>*rsa.PrivateKey</code>. This will allow other public key algorithms
-to be implemented in the future.
-</p>
-
-<p>
-<em>Updating</em>:
-No changes will be needed.
-</p>
-
-<h3 id="encoding_binary">The encoding/binary package</h3>
-
-<p>
-In Go 1, the <code>binary.TotalSize</code> function has been replaced by
-<a href="/pkg/encoding/binary/#Size"><code>Size</code></a>,
-which takes an <code>interface{}</code> argument rather than
-a <code>reflect.Value</code>.
-</p>
-
-<p>
-<em>Updating</em>:
-What little code is affected will be caught by the compiler and must be updated by hand.
-</p>
-
-<h3 id="encoding_xml">The encoding/xml package</h3>
-
-<p>
-In Go 1, the <a href="/pkg/encoding/xml/"><code>xml</code></a> package
-has been brought closer in design to the other marshaling packages such
-as <a href="/pkg/encoding/gob/"><code>encoding/gob</code></a>.
-</p>
-
-<p>
-The old <code>Parser</code> type is renamed
-<a href="/pkg/encoding/xml/#Decoder"><code>Decoder</code></a> and has a new
-<a href="/pkg/encoding/xml/#Decoder.Decode"><code>Decode</code></a> method. An
-<a href="/pkg/encoding/xml/#Encoder"><code>Encoder</code></a> type was also introduced.
-</p>
-
-<p>
-The functions <a href="/pkg/encoding/xml/#Marshal"><code>Marshal</code></a>
-and <a href="/pkg/encoding/xml/#Unmarshal"><code>Unmarshal</code></a>
-work with <code>[]byte</code> values now. To work with streams,
-use the new <a href="/pkg/encoding/xml/#Encoder"><code>Encoder</code></a>
-and <a href="/pkg/encoding/xml/#Decoder"><code>Decoder</code></a> types.
-</p>
-
-<p>
-When marshaling or unmarshaling values, the format of supported flags in
-field tags has changed to be closer to the
-<a href="/pkg/encoding/json"><code>json</code></a> package
-(<code>`xml:"name,flag"`</code>). The matching done between field tags, field
-names, and the XML attribute and element names is now case-sensitive.
-The <code>XMLName</code> field tag, if present, must also match the name
-of the XML element being marshaled.
-</p>
-
-<p>
-<em>Updating</em>:
-Running <code>go</code> <code>fix</code> will update most uses of the package except for some calls to
-<code>Unmarshal</code>. Special care must be taken with field tags,
-since the fix tool will not update them and if not fixed by hand they will
-misbehave silently in some cases. For example, the old
-<code>"attr"</code> is now written <code>",attr"</code> while plain
-<code>"attr"</code> remains valid but with a different meaning.
-</p>
-
-<h3 id="expvar">The expvar package</h3>
-
-<p>
-In Go 1, the <code>RemoveAll</code> function has been removed.
-The <code>Iter</code> function and Iter method on <code>*Map</code> have
-been replaced by
-<a href="/pkg/expvar/#Do"><code>Do</code></a>
-and
-<a href="/pkg/expvar/#Map.Do"><code>(*Map).Do</code></a>.
-</p>
-
-<p>
-<em>Updating</em>:
-Most code using <code>expvar</code> will not need changing. The rare code that used
-<code>Iter</code> can be updated to pass a closure to <code>Do</code> to achieve the same effect.
-</p>
-
-<h3 id="flag">The flag package</h3>
-
-<p>
-In Go 1, the interface <a href="/pkg/flag/#Value"><code>flag.Value</code></a> has changed slightly.
-The <code>Set</code> method now returns an <code>error</code> instead of
-a <code>bool</code> to indicate success or failure.
-</p>
-
-<p>
-There is also a new kind of flag, <code>Duration</code>, to support argument
-values specifying time intervals.
-Values for such flags must be given units, just as <code>time.Duration</code>
-formats them: <code>10s</code>, <code>1h30m</code>, etc.
-</p>
-
-<pre><!--{{code "/doc/progs/go1.go" `/timeout/`}}
--->var timeout = flag.Duration(&#34;timeout&#34;, 30*time.Second, &#34;how long to wait for completion&#34;)</pre>
-
-<p>
-<em>Updating</em>:
-Programs that implement their own flags will need minor manual fixes to update their
-<code>Set</code> methods.
-The <code>Duration</code> flag is new and affects no existing code.
-</p>
-
-
-<h3 id="go">The go/* packages</h3>
-
-<p>
-Several packages under <code>go</code> have slightly revised APIs.
-</p>
-
-<p>
-A concrete <code>Mode</code> type was introduced for configuration mode flags
-in the packages
-<a href="/pkg/go/scanner/"><code>go/scanner</code></a>,
-<a href="/pkg/go/parser/"><code>go/parser</code></a>,
-<a href="/pkg/go/printer/"><code>go/printer</code></a>, and
-<a href="/pkg/go/doc/"><code>go/doc</code></a>.
-</p>
-
-<p>
-The modes <code>AllowIllegalChars</code> and <code>InsertSemis</code> have been removed
-from the <a href="/pkg/go/scanner/"><code>go/scanner</code></a> package. They were mostly
-useful for scanning text other then Go source files. Instead, the
-<a href="/pkg/text/scanner/"><code>text/scanner</code></a> package should be used
-for that purpose.
-</p>
-
-<p>
-The <a href="/pkg/go/scanner/#ErrorHandler"><code>ErrorHandler</code></a> provided
-to the scanner's <a href="/pkg/go/scanner/#Scanner.Init"><code>Init</code></a> method is
-now simply a function rather than an interface. The <code>ErrorVector</code> type has
-been removed in favor of the (existing) <a href="/pkg/go/scanner/#ErrorList"><code>ErrorList</code></a>
-type, and the <code>ErrorVector</code> methods have been migrated. Instead of embedding
-an <code>ErrorVector</code> in a client of the scanner, now a client should maintain
-an <code>ErrorList</code>.
-</p>
-
-<p>
-The set of parse functions provided by the <a href="/pkg/go/parser/"><code>go/parser</code></a>
-package has been reduced to the primary parse function
-<a href="/pkg/go/parser/#ParseFile"><code>ParseFile</code></a>, and a couple of
-convenience functions <a href="/pkg/go/parser/#ParseDir"><code>ParseDir</code></a>
-and <a href="/pkg/go/parser/#ParseExpr"><code>ParseExpr</code></a>.
-</p>
-
-<p>
-The <a href="/pkg/go/printer/"><code>go/printer</code></a> package supports an additional
-configuration mode <a href="/pkg/go/printer/#Mode"><code>SourcePos</code></a>;
-if set, the printer will emit <code>//line</code> comments such that the generated
-output contains the original source code position information. The new type
-<a href="/pkg/go/printer/#CommentedNode"><code>CommentedNode</code></a> can be
-used to provide comments associated with an arbitrary
-<a href="/pkg/go/ast/#Node"><code>ast.Node</code></a> (until now only
-<a href="/pkg/go/ast/#File"><code>ast.File</code></a> carried comment information).
-</p>
-
-<p>
-The type names of the <a href="/pkg/go/doc/"><code>go/doc</code></a> package have been
-streamlined by removing the <code>Doc</code> suffix: <code>PackageDoc</code>
-is now <code>Package</code>, <code>ValueDoc</code> is <code>Value</code>, etc.
-Also, all types now consistently have a <code>Name</code> field (or <code>Names</code>,
-in the case of type <code>Value</code>) and <code>Type.Factories</code> has become
-<code>Type.Funcs</code>.
-Instead of calling <code>doc.NewPackageDoc(pkg, importpath)</code>,
-documentation for a package is created with:
-</p>
-
-<pre>
-    doc.New(pkg, importpath, mode)
-</pre>
-
-<p>
-where the new <code>mode</code> parameter specifies the operation mode:
-if set to <a href="/pkg/go/doc/#AllDecls"><code>AllDecls</code></a>, all declarations
-(not just exported ones) are considered.
-The function <code>NewFileDoc</code> was removed, and the function
-<code>CommentText</code> has become the method
-<a href="/pkg/go/ast/#Text"><code>Text</code></a> of
-<a href="/pkg/go/ast/#CommentGroup"><code>ast.CommentGroup</code></a>.
-</p>
-
-<p>
-In package <a href="/pkg/go/token/"><code>go/token</code></a>, the
-<a href="/pkg/go/token/#FileSet"><code>token.FileSet</code></a> method <code>Files</code>
-(which originally returned a channel of <code>*token.File</code>s) has been replaced
-with the iterator <a href="/pkg/go/token/#FileSet.Iterate"><code>Iterate</code></a> that
-accepts a function argument instead.
-</p>
-
-<p>
-In package <a href="/pkg/go/build/"><code>go/build</code></a>, the API
-has been nearly completely replaced.
-The package still computes Go package information
-but it does not run the build: the <code>Cmd</code> and <code>Script</code>
-types are gone.
-(To build code, use the new
-<a href="/cmd/go/"><code>go</code></a> command instead.)
-The <code>DirInfo</code> type is now named
-<a href="/pkg/go/build/#Package"><code>Package</code></a>.
-<code>FindTree</code> and <code>ScanDir</code> are replaced by
-<a href="/pkg/go/build/#Import"><code>Import</code></a>
-and
-<a href="/pkg/go/build/#ImportDir"><code>ImportDir</code></a>.
-</p>
-
-<p>
-<em>Updating</em>:
-Code that uses packages in <code>go</code> will have to be updated by hand; the
-compiler will reject incorrect uses. Templates used in conjunction with any of the
-<code>go/doc</code> types may need manual fixes; the renamed fields will lead
-to run-time errors.
-</p>
-
-<h3 id="hash">The hash package</h3>
-
-<p>
-In Go 1, the definition of <a href="/pkg/hash/#Hash"><code>hash.Hash</code></a> includes
-a new method, <code>BlockSize</code>.  This new method is used primarily in the
-cryptographic libraries.
-</p>
-
-<p>
-The <code>Sum</code> method of the
-<a href="/pkg/hash/#Hash"><code>hash.Hash</code></a> interface now takes a
-<code>[]byte</code> argument, to which the hash value will be appended.
-The previous behavior can be recreated by adding a <code>nil</code> argument to the call.
-</p>
-
-<p>
-<em>Updating</em>:
-Existing implementations of <code>hash.Hash</code> will need to add a
-<code>BlockSize</code> method.  Hashes that process the input one byte at
-a time can implement <code>BlockSize</code> to return 1.
-Running <code>go</code> <code>fix</code> will update calls to the <code>Sum</code> methods of the various
-implementations of <code>hash.Hash</code>.
-</p>
-
-<p>
-<em>Updating</em>:
-Since the package's functionality is new, no updating is necessary.
-</p>
-
-<h3 id="http">The http package</h3>
-
-<p>
-In Go 1 the <a href="/pkg/net/http/"><code>http</code></a> package is refactored,
-putting some of the utilities into a
-<a href="/pkg/net/http/httputil/"><code>httputil</code></a> subdirectory.
-These pieces are only rarely needed by HTTP clients.
-The affected items are:
-</p>
-
-<ul>
-<li>ClientConn</li>
-<li>DumpRequest</li>
-<li>DumpRequestOut</li>
-<li>DumpResponse</li>
-<li>NewChunkedReader</li>
-<li>NewChunkedWriter</li>
-<li>NewClientConn</li>
-<li>NewProxyClientConn</li>
-<li>NewServerConn</li>
-<li>NewSingleHostReverseProxy</li>
-<li>ReverseProxy</li>
-<li>ServerConn</li>
-</ul>
-
-<p>
-The <code>Request.RawURL</code> field has been removed; it was a
-historical artifact.
-</p>
-
-<p>
-The <code>Handle</code> and <code>HandleFunc</code>
-functions, and the similarly-named methods of <code>ServeMux</code>,
-now panic if an attempt is made to register the same pattern twice.
-</p>
-
-<p>
-<em>Updating</em>:
-Running <code>go</code> <code>fix</code> will update the few programs that are affected except for
-uses of <code>RawURL</code>, which must be fixed by hand.
-</p>
-
-<h3 id="image">The image package</h3>
-
-<p>
-The <a href="/pkg/image/"><code>image</code></a> package has had a number of
-minor changes, rearrangements and renamings.
-</p>
-
-<p>
-Most of the color handling code has been moved into its own package,
-<a href="/pkg/image/color/"><code>image/color</code></a>.
-For the elements that moved, a symmetry arises; for instance,
-each pixel of an
-<a href="/pkg/image/#RGBA"><code>image.RGBA</code></a>
-is a
-<a href="/pkg/image/color/#RGBA"><code>color.RGBA</code></a>.
-</p>
-
-<p>
-The old <code>image/ycbcr</code> package has been folded, with some
-renamings, into the
-<a href="/pkg/image/"><code>image</code></a>
-and
-<a href="/pkg/image/color/"><code>image/color</code></a>
-packages.
-</p>
-
-<p>
-The old <code>image.ColorImage</code> type is still in the <code>image</code>
-package but has been renamed
-<a href="/pkg/image/#Uniform"><code>image.Uniform</code></a>,
-while <code>image.Tiled</code> has been removed.
-</p>
-
-<p>
-This table lists the renamings.
-</p>
-
-<table class="codetable" frame="border" summary="image renames">
-<colgroup align="left" width="50%"></colgroup>
-<colgroup align="left" width="50%"></colgroup>
-<tr>
-<th align="left">Old</th>
-<th align="left">New</th>
-</tr>
-<tr>
-<td colspan="2"><hr></td>
-</tr>
-<tr><td>image.Color</td> <td>color.Color</td></tr>
-<tr><td>image.ColorModel</td> <td>color.Model</td></tr>
-<tr><td>image.ColorModelFunc</td> <td>color.ModelFunc</td></tr>
-<tr><td>image.PalettedColorModel</td> <td>color.Palette</td></tr>
-<tr>
-<td colspan="2"><hr></td>
-</tr>
-<tr><td>image.RGBAColor</td> <td>color.RGBA</td></tr>
-<tr><td>image.RGBA64Color</td> <td>color.RGBA64</td></tr>
-<tr><td>image.NRGBAColor</td> <td>color.NRGBA</td></tr>
-<tr><td>image.NRGBA64Color</td> <td>color.NRGBA64</td></tr>
-<tr><td>image.AlphaColor</td> <td>color.Alpha</td></tr>
-<tr><td>image.Alpha16Color</td> <td>color.Alpha16</td></tr>
-<tr><td>image.GrayColor</td> <td>color.Gray</td></tr>
-<tr><td>image.Gray16Color</td> <td>color.Gray16</td></tr>
-<tr>
-<td colspan="2"><hr></td>
-</tr>
-<tr><td>image.RGBAColorModel</td> <td>color.RGBAModel</td></tr>
-<tr><td>image.RGBA64ColorModel</td> <td>color.RGBA64Model</td></tr>
-<tr><td>image.NRGBAColorModel</td> <td>color.NRGBAModel</td></tr>
-<tr><td>image.NRGBA64ColorModel</td> <td>color.NRGBA64Model</td></tr>
-<tr><td>image.AlphaColorModel</td> <td>color.AlphaModel</td></tr>
-<tr><td>image.Alpha16ColorModel</td> <td>color.Alpha16Model</td></tr>
-<tr><td>image.GrayColorModel</td> <td>color.GrayModel</td></tr>
-<tr><td>image.Gray16ColorModel</td> <td>color.Gray16Model</td></tr>
-<tr>
-<td colspan="2"><hr></td>
-</tr>
-<tr><td>ycbcr.RGBToYCbCr</td> <td>color.RGBToYCbCr</td></tr>
-<tr><td>ycbcr.YCbCrToRGB</td> <td>color.YCbCrToRGB</td></tr>
-<tr><td>ycbcr.YCbCrColorModel</td> <td>color.YCbCrModel</td></tr>
-<tr><td>ycbcr.YCbCrColor</td> <td>color.YCbCr</td></tr>
-<tr><td>ycbcr.YCbCr</td> <td>image.YCbCr</td></tr>
-<tr>
-<td colspan="2"><hr></td>
-</tr>
-<tr><td>ycbcr.SubsampleRatio444</td> <td>image.YCbCrSubsampleRatio444</td></tr>
-<tr><td>ycbcr.SubsampleRatio422</td> <td>image.YCbCrSubsampleRatio422</td></tr>
-<tr><td>ycbcr.SubsampleRatio420</td> <td>image.YCbCrSubsampleRatio420</td></tr>
-<tr>
-<td colspan="2"><hr></td>
-</tr>
-<tr><td>image.ColorImage</td> <td>image.Uniform</td></tr>
-</table>
-
-<p>
-The image package's <code>New</code> functions
-(<a href="/pkg/image/#NewRGBA"><code>NewRGBA</code></a>,
-<a href="/pkg/image/#NewRGBA64"><code>NewRGBA64</code></a>, etc.)
-take an <a href="/pkg/image/#Rectangle"><code>image.Rectangle</code></a> as an argument
-instead of four integers.
-</p>
-
-<p>
-Finally, there are new predefined <code>color.Color</code> variables
-<a href="/pkg/image/color/#Black"><code>color.Black</code></a>,
-<a href="/pkg/image/color/#White"><code>color.White</code></a>,
-<a href="/pkg/image/color/#Opaque"><code>color.Opaque</code></a>
-and
-<a href="/pkg/image/color/#Transparent"><code>color.Transparent</code></a>.
-</p>
-
-<p>
-<em>Updating</em>:
-Running <code>go</code> <code>fix</code> will update almost all code affected by the change.
-</p>
-
-<h3 id="log_syslog">The log/syslog package</h3>
-
-<p>
-In Go 1, the <a href="/pkg/log/syslog/#NewLogger"><code>syslog.NewLogger</code></a>
-function returns an error as well as a <code>log.Logger</code>.
-</p>
-
-<p>
-<em>Updating</em>:
-What little code is affected will be caught by the compiler and must be updated by hand.
-</p>
-
-<h3 id="mime">The mime package</h3>
-
-<p>
-In Go 1, the <a href="/pkg/mime/#FormatMediaType"><code>FormatMediaType</code></a> function
-of the <code>mime</code> package has  been simplified to make it
-consistent with
-<a href="/pkg/mime/#ParseMediaType"><code>ParseMediaType</code></a>.
-It now takes <code>"text/html"</code> rather than <code>"text"</code> and <code>"html"</code>.
-</p>
-
-<p>
-<em>Updating</em>:
-What little code is affected will be caught by the compiler and must be updated by hand.
-</p>
-
-<h3 id="net">The net package</h3>
-
-<p>
-In Go 1, the various <code>SetTimeout</code>,
-<code>SetReadTimeout</code>, and <code>SetWriteTimeout</code> methods
-have been replaced with
-<a href="/pkg/net/#IPConn.SetDeadline"><code>SetDeadline</code></a>,
-<a href="/pkg/net/#IPConn.SetReadDeadline"><code>SetReadDeadline</code></a>, and
-<a href="/pkg/net/#IPConn.SetWriteDeadline"><code>SetWriteDeadline</code></a>,
-respectively.  Rather than taking a timeout value in nanoseconds that
-apply to any activity on the connection, the new methods set an
-absolute deadline (as a <code>time.Time</code> value) after which
-reads and writes will time out and no longer block.
-</p>
-
-<p>
-There are also new functions
-<a href="/pkg/net/#DialTimeout"><code>net.DialTimeout</code></a>
-to simplify timing out dialing a network address and
-<a href="/pkg/net/#ListenMulticastUDP"><code>net.ListenMulticastUDP</code></a>
-to allow multicast UDP to listen concurrently across multiple listeners.
-The <code>net.ListenMulticastUDP</code> function replaces the old
-<code>JoinGroup</code> and <code>LeaveGroup</code> methods.
-</p>
-
-<p>
-<em>Updating</em>:
-Code that uses the old methods will fail to compile and must be updated by hand.
-The semantic change makes it difficult for the fix tool to update automatically.
-</p>
-
-<h3 id="os">The os package</h3>
-
-<p>
-The <code>Time</code> function has been removed; callers should use
-the <a href="/pkg/time/#Time"><code>Time</code></a> type from the
-<code>time</code> package.
-</p>
-
-<p>
-The <code>Exec</code> function has been removed; callers should use
-<code>Exec</code> from the <code>syscall</code> package, where available.
-</p>
-
-<p>
-The <code>ShellExpand</code> function has been renamed to <a
-href="/pkg/os/#ExpandEnv"><code>ExpandEnv</code></a>.
-</p>
-
-<p>
-The <a href="/pkg/os/#NewFile"><code>NewFile</code></a> function
-now takes a <code>uintptr</code> fd, instead of an <code>int</code>.
-The <a href="/pkg/os/#File.Fd"><code>Fd</code></a> method on files now
-also returns a <code>uintptr</code>.
-</p>
-
-<p>
-There are no longer error constants such as <code>EINVAL</code>
-in the <code>os</code> package, since the set of values varied with
-the underlying operating system. There are new portable functions like
-<a href="/pkg/os/#IsPermission"><code>IsPermission</code></a>
-to test common error properties, plus a few new error values
-with more Go-like names, such as
-<a href="/pkg/os/#ErrPermission"><code>ErrPermission</code></a>
-and
-<a href="/pkg/os/#ErrNoEnv"><code>ErrNoEnv</code></a>.
-</p>
-
-<p>
-The <code>Getenverror</code> function has been removed. To distinguish
-between a non-existent environment variable and an empty string,
-use <a href="/pkg/os/#Environ"><code>os.Environ</code></a> or
-<a href="/pkg/syscall/#Getenv"><code>syscall.Getenv</code></a>.
-</p>
-
-
-<p>
-The <a href="/pkg/os/#Process.Wait"><code>Process.Wait</code></a> method has
-dropped its option argument and the associated constants are gone
-from the package.
-Also, the function <code>Wait</code> is gone; only the method of
-the <code>Process</code> type persists.
-</p>
-
-<p>
-The <code>Waitmsg</code> type returned by
-<a href="/pkg/os/#Process.Wait"><code>Process.Wait</code></a>
-has been replaced with a more portable
-<a href="/pkg/os/#ProcessState"><code>ProcessState</code></a>
-type with accessor methods to recover information about the
-process.
-Because of changes to <code>Wait</code>, the <code>ProcessState</code>
-value always describes an exited process.
-Portability concerns simplified the interface in other ways, but the values returned by the
-<a href="/pkg/os/#ProcessState.Sys"><code>ProcessState.Sys</code></a> and
-<a href="/pkg/os/#ProcessState.SysUsage"><code>ProcessState.SysUsage</code></a>
-methods can be type-asserted to underlying system-specific data structures such as
-<a href="/pkg/syscall/#WaitStatus"><code>syscall.WaitStatus</code></a> and
-<a href="/pkg/syscall/#Rusage"><code>syscall.Rusage</code></a> on Unix.
-</p>
-
-<p>
-<em>Updating</em>:
-Running <code>go</code> <code>fix</code> will drop a zero argument to <code>Process.Wait</code>.
-All other changes will be caught by the compiler and must be updated by hand.
-</p>
-
-<h4 id="os_fileinfo">The os.FileInfo type</h4>
-
-<p>
-Go 1 redefines the <a href="/pkg/os/#FileInfo"><code>os.FileInfo</code></a> type,
-changing it from a struct to an interface:
-</p>
-
-<pre>
-    type FileInfo interface {
-        Name() string       // base name of the file
-        Size() int64        // length in bytes
-        Mode() FileMode     // file mode bits
-        ModTime() time.Time // modification time
-        IsDir() bool        // abbreviation for Mode().IsDir()
-        Sys() interface{}   // underlying data source (can return nil)
-    }
-</pre>
-
-<p>
-The file mode information has been moved into a subtype called
-<a href="/pkg/os/#FileMode"><code>os.FileMode</code></a>,
-a simple integer type with <code>IsDir</code>, <code>Perm</code>, and <code>String</code>
-methods.
-</p>
-
-<p>
-The system-specific details of file modes and properties such as (on Unix)
-i-number have been removed from <code>FileInfo</code> altogether.
-Instead, each operating system's <code>os</code> package provides an
-implementation of the <code>FileInfo</code> interface, which
-has a <code>Sys</code> method that returns the
-system-specific representation of file metadata.
-For instance, to discover the i-number of a file on a Unix system, unpack
-the <code>FileInfo</code> like this:
-</p>
-
-<pre>
-    fi, err := os.Stat("hello.go")
-    if err != nil {
-        log.Fatal(err)
-    }
-    // Check that it's a Unix file.
-    unixStat, ok := fi.Sys().(*syscall.Stat_t)
-    if !ok {
-        log.Fatal("hello.go: not a Unix file")
-    }
-    fmt.Printf("file i-number: %d\n", unixStat.Ino)
-</pre>
-
-<p>
-Assuming (which is unwise) that <code>"hello.go"</code> is a Unix file,
-the i-number expression could be contracted to
-</p>
-
-<pre>
-    fi.Sys().(*syscall.Stat_t).Ino
-</pre>
-
-<p>
-The vast majority of uses of <code>FileInfo</code> need only the methods
-of the standard interface.
-</p>
-
-<p>
-The <code>os</code> package no longer contains wrappers for the POSIX errors
-such as <code>ENOENT</code>.
-For the few programs that need to verify particular error conditions, there are
-now the boolean functions
-<a href="/pkg/os/#IsExist"><code>IsExist</code></a>,
-<a href="/pkg/os/#IsNotExist"><code>IsNotExist</code></a>
-and
-<a href="/pkg/os/#IsPermission"><code>IsPermission</code></a>.
-</p>
-
-<pre><!--{{code "/doc/progs/go1.go" `/os\.Open/` `/}/`}}
--->    f, err := os.OpenFile(name, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0600)
-    if os.IsExist(err) {
-        log.Printf(&#34;%s already exists&#34;, name)
-    }</pre>
-
-<p>
-<em>Updating</em>:
-Running <code>go</code> <code>fix</code> will update code that uses the old equivalent of the current <code>os.FileInfo</code>
-and <code>os.FileMode</code> API.
-Code that needs system-specific file details will need to be updated by hand.
-Code that uses the old POSIX error values from the <code>os</code> package
-will fail to compile and will also need to be updated by hand.
-</p>
-
-<h3 id="os_signal">The os/signal package</h3>
-
-<p>
-The <code>os/signal</code> package in Go 1 replaces the
-<code>Incoming</code> function, which returned a channel
-that received all incoming signals,
-with the selective <code>Notify</code> function, which asks
-for delivery of specific signals on an existing channel.
-</p>
-
-<p>
-<em>Updating</em>:
-Code must be updated by hand.
-A literal translation of
-</p>
-<pre>
-c := signal.Incoming()
-</pre>
-<p>
-is
-</p>
-<pre>
-c := make(chan os.Signal)
-signal.Notify(c) // ask for all signals
-</pre>
-<p>
-but most code should list the specific signals it wants to handle instead:
-</p>
-<pre>
-c := make(chan os.Signal)
-signal.Notify(c, syscall.SIGHUP, syscall.SIGQUIT)
-</pre>
-
-<h3 id="path_filepath">The path/filepath package</h3>
-
-<p>
-In Go 1, the <a href="/pkg/path/filepath/#Walk"><code>Walk</code></a> function of the
-<code>path/filepath</code> package
-has been changed to take a function value of type
-<a href="/pkg/path/filepath/#WalkFunc"><code>WalkFunc</code></a>
-instead of a <code>Visitor</code> interface value.
-<code>WalkFunc</code> unifies the handling of both files and directories.
-</p>
-
-<pre>
-    type WalkFunc func(path string, info os.FileInfo, err error) error
-</pre>
-
-<p>
-The <code>WalkFunc</code> function will be called even for files or directories that could not be opened;
-in such cases the error argument will describe the failure.
-If a directory's contents are to be skipped,
-the function should return the value <a href="/pkg/path/filepath/#variables"><code>filepath.SkipDir</code></a>
-</p>
-
-<pre><!--{{code "/doc/progs/go1.go" `/STARTWALK/` `/ENDWALK/`}}
--->    markFn := func(path string, info os.FileInfo, err error) error {
-        if path == &#34;pictures&#34; { <span class="comment">// Will skip walking of directory pictures and its contents.</span>
-            return filepath.SkipDir
-        }
-        if err != nil {
-            return err
-        }
-        log.Println(path)
-        return nil
-    }
-    err := filepath.Walk(&#34;.&#34;, markFn)
-    if err != nil {
-        log.Fatal(err)
-    }</pre>
-
-<p>
-<em>Updating</em>:
-The change simplifies most code but has subtle consequences, so affected programs
-will need to be updated by hand.
-The compiler will catch code using the old interface.
-</p>
-
-<h3 id="regexp">The regexp package</h3>
-
-<p>
-The <a href="/pkg/regexp/"><code>regexp</code></a> package has been rewritten.
-It has the same interface but the specification of the regular expressions
-it supports has changed from the old "egrep" form to that of
-<a href="http://code.google.com/p/re2/">RE2</a>.
-</p>
-
-<p>
-<em>Updating</em>:
-Code that uses the package should have its regular expressions checked by hand.
-</p>
-
-<h3 id="runtime">The runtime package</h3>
-
-<p>
-In Go 1, much of the API exported by package
-<code>runtime</code> has been removed in favor of
-functionality provided by other packages.
-Code using the <code>runtime.Type</code> interface
-or its specific concrete type implementations should
-now use package <a href="/pkg/reflect/"><code>reflect</code></a>.
-Code using <code>runtime.Semacquire</code> or <code>runtime.Semrelease</code>
-should use channels or the abstractions in package <a href="/pkg/sync/"><code>sync</code></a>.
-The <code>runtime.Alloc</code>, <code>runtime.Free</code>,
-and <code>runtime.Lookup</code> functions, an unsafe API created for
-debugging the memory allocator, have no replacement.
-</p>
-
-<p>
-Before, <code>runtime.MemStats</code> was a global variable holding
-statistics about memory allocation, and calls to <code>runtime.UpdateMemStats</code>
-ensured that it was up to date.
-In Go 1, <code>runtime.MemStats</code> is a struct type, and code should use
-<a href="/pkg/runtime/#ReadMemStats"><code>runtime.ReadMemStats</code></a>
-to obtain the current statistics.
-</p>
-
-<p>
-The package adds a new function,
-<a href="/pkg/runtime/#NumCPU"><code>runtime.NumCPU</code></a>, that returns the number of CPUs available
-for parallel execution, as reported by the operating system kernel.
-Its value can inform the setting of <code>GOMAXPROCS</code>.
-The <code>runtime.Cgocalls</code> and <code>runtime.Goroutines</code> functions
-have been renamed to <code>runtime.NumCgoCall</code> and <code>runtime.NumGoroutine</code>.
-</p>
-
-<p>
-<em>Updating</em>:
-Running <code>go</code> <code>fix</code> will update code for the function renamings.
-Other code will need to be updated by hand.
-</p>
-
-<h3 id="strconv">The strconv package</h3>
-
-<p>
-In Go 1, the
-<a href="/pkg/strconv/"><code>strconv</code></a>
-package has been significantly reworked to make it more Go-like and less C-like,
-although <code>Atoi</code> lives on (it's similar to
-<code>int(ParseInt(x, 10, 0))</code>, as does
-<code>Itoa(x)</code> (<code>FormatInt(int64(x), 10)</code>).
-There are also new variants of some of the functions that append to byte slices rather than
-return strings, to allow control over allocation.
-</p>
-
-<p>
-This table summarizes the renamings; see the
-<a href="/pkg/strconv/">package documentation</a>
-for full details.
-</p>
-
-<table class="codetable" frame="border" summary="strconv renames">
-<colgroup align="left" width="50%"></colgroup>
-<colgroup align="left" width="50%"></colgroup>
-<tr>
-<th align="left">Old call</th>
-<th align="left">New call</th>
-</tr>
-<tr>
-<td colspan="2"><hr></td>
-</tr>
-<tr><td>Atob(x)</td> <td>ParseBool(x)</td></tr>
-<tr>
-<td colspan="2"><hr></td>
-</tr>
-<tr><td>Atof32(x)</td> <td>ParseFloat(x, 32)§</td></tr>
-<tr><td>Atof64(x)</td> <td>ParseFloat(x, 64)</td></tr>
-<tr><td>AtofN(x, n)</td> <td>ParseFloat(x, n)</td></tr>
-<tr>
-<td colspan="2"><hr></td>
-</tr>
-<tr><td>Atoi(x)</td> <td>Atoi(x)</td></tr>
-<tr><td>Atoi(x)</td> <td>ParseInt(x, 10, 0)§</td></tr>
-<tr><td>Atoi64(x)</td> <td>ParseInt(x, 10, 64)</td></tr>
-<tr>
-<td colspan="2"><hr></td>
-</tr>
-<tr><td>Atoui(x)</td> <td>ParseUint(x, 10, 0)§</td></tr>
-<tr><td>Atoui64(x)</td> <td>ParseUint(x, 10, 64)</td></tr>
-<tr>
-<td colspan="2"><hr></td>
-</tr>
-<tr><td>Btoi64(x, b)</td> <td>ParseInt(x, b, 64)</td></tr>
-<tr><td>Btoui64(x, b)</td> <td>ParseUint(x, b, 64)</td></tr>
-<tr>
-<td colspan="2"><hr></td>
-</tr>
-<tr><td>Btoa(x)</td> <td>FormatBool(x)</td></tr>
-<tr>
-<td colspan="2"><hr></td>
-</tr>
-<tr><td>Ftoa32(x, f, p)</td> <td>FormatFloat(float64(x), f, p, 32)</td></tr>
-<tr><td>Ftoa64(x, f, p)</td> <td>FormatFloat(x, f, p, 64)</td></tr>
-<tr><td>FtoaN(x, f, p, n)</td> <td>FormatFloat(x, f, p, n)</td></tr>
-<tr>
-<td colspan="2"><hr></td>
-</tr>
-<tr><td>Itoa(x)</td> <td>Itoa(x)</td></tr>
-<tr><td>Itoa(x)</td> <td>FormatInt(int64(x), 10)</td></tr>
-<tr><td>Itoa64(x)</td> <td>FormatInt(x, 10)</td></tr>
-<tr>
-<td colspan="2"><hr></td>
-</tr>
-<tr><td>Itob(x, b)</td> <td>FormatInt(int64(x), b)</td></tr>
-<tr><td>Itob64(x, b)</td> <td>FormatInt(x, b)</td></tr>
-<tr>
-<td colspan="2"><hr></td>
-</tr>
-<tr><td>Uitoa(x)</td> <td>FormatUint(uint64(x), 10)</td></tr>
-<tr><td>Uitoa64(x)</td> <td>FormatUint(x, 10)</td></tr>
-<tr>
-<td colspan="2"><hr></td>
-</tr>
-<tr><td>Uitob(x, b)</td> <td>FormatUint(uint64(x), b)</td></tr>
-<tr><td>Uitob64(x, b)</td> <td>FormatUint(x, b)</td></tr>
-</table>
-               
-<p>
-<em>Updating</em>:
-Running <code>go</code> <code>fix</code> will update almost all code affected by the change.
-<br>
-§ <code>Atoi</code> persists but <code>Atoui</code> and <code>Atof32</code> do not, so
-they may require
-a cast that must be added by hand; the <code>go</code> <code>fix</code> tool will warn about it.
-</p>
-
-
-<h3 id="templates">The template packages</h3>
-
-<p>
-The <code>template</code> and <code>exp/template/html</code> packages have moved to 
-<a href="/pkg/text/template/"><code>text/template</code></a> and
-<a href="/pkg/html/template/"><code>html/template</code></a>.
-More significant, the interface to these packages has been simplified.
-The template language is the same, but the concept of "template set" is gone
-and the functions and methods of the packages have changed accordingly,
-often by elimination.
-</p>
-
-<p>
-Instead of sets, a <code>Template</code> object
-may contain multiple named template definitions,
-in effect constructing
-name spaces for template invocation.
-A template can invoke any other template associated with it, but only those
-templates associated with it.
-The simplest way to associate templates is to parse them together, something
-made easier with the new structure of the packages.
-</p>
-
-<p>
-<em>Updating</em>:
-The imports will be updated by fix tool.
-Single-template uses will be otherwise be largely unaffected.
-Code that uses multiple templates in concert will need to be updated by hand.
-The <a href="/pkg/text/template/#examples">examples</a> in
-the documentation for <code>text/template</code> can provide guidance.
-</p>
-
-<h3 id="testing">The testing package</h3>
-
-<p>
-The testing package has a type, <code>B</code>, passed as an argument to benchmark functions.
-In Go 1, <code>B</code> has new methods, analogous to those of <code>T</code>, enabling
-logging and failure reporting.
-</p>
-
-<pre><!--{{code "/doc/progs/go1.go" `/func.*Benchmark/` `/^}/`}}
--->func BenchmarkSprintf(b *testing.B) {
-    <span class="comment">// Verify correctness before running benchmark.</span>
-    b.StopTimer()
-    got := fmt.Sprintf(&#34;%x&#34;, 23)
-    const expect = &#34;17&#34;
-    if expect != got {
-        b.Fatalf(&#34;expected %q; got %q&#34;, expect, got)
-    }
-    b.StartTimer()
-    for i := 0; i &lt; b.N; i++ {
-        fmt.Sprintf(&#34;%x&#34;, 23)
-    }
-}</pre>
-
-<p>
-<em>Updating</em>:
-Existing code is unaffected, although benchmarks that use <code>println</code>
-or <code>panic</code> should be updated to use the new methods.
-</p>
-
-<h3 id="testing_script">The testing/script package</h3>
-
-<p>
-The testing/script package has been deleted. It was a dreg.
-</p>
-
-<p>
-<em>Updating</em>:
-No code is likely to be affected.
-</p>
-
-<h3 id="unsafe">The unsafe package</h3>
-
-<p>
-In Go 1, the functions
-<code>unsafe.Typeof</code>, <code>unsafe.Reflect</code>,
-<code>unsafe.Unreflect</code>, <code>unsafe.New</code>, and
-<code>unsafe.NewArray</code> have been removed;
-they duplicated safer functionality provided by
-package <a href="/pkg/reflect/"><code>reflect</code></a>.
-</p>
-
-<p>
-<em>Updating</em>:
-Code using these functions must be rewritten to use
-package <a href="/pkg/reflect/"><code>reflect</code></a>.
-The changes to <a href="http://code.google.com/p/go/source/detail?r=2646dc956207">encoding/gob</a> and the <a href="http://code.google.com/p/goprotobuf/source/detail?r=5340ad310031">protocol buffer library</a>
-may be helpful as examples.
-</p>
-
-<h3 id="url">The url package</h3>
-
-<p>
-In Go 1 several fields from the <a href="/pkg/net/url/#URL"><code>url.URL</code></a> type
-were removed or replaced.
-</p>
-
-<p>
-The <a href="/pkg/net/url/#URL.String"><code>String</code></a> method now
-predictably rebuilds an encoded URL string using all of <code>URL</code>'s
-fields as necessary. The resulting string will also no longer have
-passwords escaped.
-</p>
-
-<p>
-The <code>Raw</code> field has been removed. In most cases the <code>String</code>
-method may be used in its place.
-</p>
-
-<p>
-The old <code>RawUserinfo</code> field is replaced by the <code>User</code>
-field, of type <a href="/pkg/net/url/#Userinfo"><code>*net.Userinfo</code></a>.
-Values of this type may be created using the new <a href="/pkg/net/url/#User"><code>net.User</code></a>
-and <a href="/pkg/net/url/#UserPassword"><code>net.UserPassword</code></a>
-functions. The <code>EscapeUserinfo</code> and <code>UnescapeUserinfo</code>
-functions are also gone.
-</p>
-
-<p>
-The <code>RawAuthority</code> field has been removed. The same information is
-available in the <code>Host</code> and <code>User</code> fields.
-</p>
-
-<p>
-The <code>RawPath</code> field and the <code>EncodedPath</code> method have
-been removed. The path information in rooted URLs (with a slash following the
-schema) is now available only in decoded form in the <code>Path</code> field.
-Occasionally, the encoded data may be required to obtain information that
-was lost in the decoding process. These cases must be handled by accessing
-the data the URL was built from.
-</p>
-
-<p>
-URLs with non-rooted paths, such as <code>"mailto:dev@golang.org?subject=Hi"</code>,
-are also handled differently. The <code>OpaquePath</code> boolean field has been
-removed and a new <code>Opaque</code> string field introduced to hold the encoded
-path for such URLs. In Go 1, the cited URL parses as:
-</p>
-
-<pre>
-    URL{
-        Scheme: "mailto",
-        Opaque: "dev@golang.org",
-        RawQuery: "subject=Hi",
-    }
-</pre>
-
-<p>
-A new <a href="/pkg/net/url/#URL.RequestURI"><code>RequestURI</code></a> method was
-added to <code>URL</code>.
-</p>
-
-<p>
-The <code>ParseWithReference</code> function has been renamed to <code>ParseWithFragment</code>.
-</p>
-
-<p>
-<em>Updating</em>:
-Code that uses the old fields will fail to compile and must be updated by hand.
-The semantic changes make it difficult for the fix tool to update automatically.
-</p>
-
-<h2 id="cmd_go">The go command</h2>
-
-<p>
-Go 1 introduces the <a href="/cmd/go/">go command</a>, a tool for fetching,
-building, and installing Go packages and commands. The <code>go</code> command
-does away with makefiles, instead using Go source code to find dependencies and
-determine build conditions. Most existing Go programs will no longer require
-makefiles to be built.
-</p>
-
-<p>
-See <a href="/doc/code.html">How to Write Go Code</a> for a primer on the
-<code>go</code> command and the <a href="/cmd/go/">go command documentation</a>
-for the full details.
-</p>
-
-<p>
-<em>Updating</em>:
-Projects that depend on the Go project's old makefile-based build
-infrastructure (<code>Make.pkg</code>, <code>Make.cmd</code>, and so on) should
-switch to using the <code>go</code> command for building Go code and, if
-necessary, rewrite their makefiles to perform any auxiliary build tasks.
-</p>
-
-<h2 id="cmd_cgo">The cgo command</h2>
-
-<p>
-In Go 1, the <a href="/cmd/cgo">cgo command</a>
-uses a different <code>_cgo_export.h</code>
-file, which is generated for packages containing <code>//export</code> lines.
-The <code>_cgo_export.h</code> file now begins with the C preamble comment,
-so that exported function definitions can use types defined there.
-This has the effect of compiling the preamble multiple times, so a
-package using <code>//export</code> must not put function definitions
-or variable initializations in the C preamble.
-</p>
-
-<h2 id="releases">Packaged releases</h2>
-
-<p>
-One of the most significant changes associated with Go 1 is the availability
-of prepackaged, downloadable distributions.
-They are available for many combinations of architecture and operating system
-(including Windows) and the list will grow.
-Installation details are described on the
-<a href="/doc/install">Getting Started</a> page, while
-the distributions themselves are listed on the
-<a href="http://code.google.com/p/go/downloads/list">downloads page</a>.
-
-
-</div>
-
-<div id="footer">
-Build version go1.0.1.<br>
-A link <a href="http://code.google.com/policies.html#restrictions">noted</a>,
-and then, coming up on the very next line, we will
-find yet another link, link 3.0 if you will,
-after a few more words <a href="/LINK">link text</a>.<br>
-<a href="/doc/tos.html">Terms of Service</a> | 
-<a href="http://www.google.com/intl/en/privacy/privacy-policy.html">Privacy Policy</a>
-</div>
-
-<script type="text/javascript">
-(function() {
-  var ga = document.createElement("script"); ga.type = "text/javascript"; ga.async = true;
-  ga.src = ("https:" == document.location.protocol ? "https://ssl" : "http://www") + ".google-analytics.com/ga.js";
-  var s = document.getElementsByTagName("script")[0]; s.parentNode.insertBefore(ga, s);
-})();
-</script>
-</body>
-<script type="text/javascript">
-  (function() {
-    var po = document.createElement('script'); po.type = 'text/javascript'; po.async = true;
-    po.src = 'https://apis.google.com/js/minusone.js';
-    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(po, s);
-  })();
-</script>
-</html>
-
diff --git a/html/testdata/webkit/README b/html/testdata/webkit/README
deleted file mode 100644 (file)
index 9b4c2d8..0000000
+++ /dev/null
@@ -1,28 +0,0 @@
-The *.dat files in this directory are copied from The WebKit Open Source
-Project, specifically $WEBKITROOT/LayoutTests/html5lib/resources.
-WebKit is licensed under a BSD style license.
-http://webkit.org/coding/bsd-license.html says:
-
-Copyright (C) 2009 Apple Inc. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-1. Redistributions of source code must retain the above copyright notice,
-this list of conditions and the following disclaimer.
-
-2. Redistributions in binary form must reproduce the above copyright notice,
-this list of conditions and the following disclaimer in the documentation
-and/or other materials provided with the distribution.
-
-THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS "AS IS" AND ANY
-EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
-DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
diff --git a/html/testdata/webkit/adoption01.dat b/html/testdata/webkit/adoption01.dat
deleted file mode 100644 (file)
index 38f98ef..0000000
+++ /dev/null
@@ -1,354 +0,0 @@
-#data
-<a><p></a></p>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,10): adoption-agency-1.3
-#document
-| <html>
-|   <head>
-|   <body>
-|     <a>
-|     <p>
-|       <a>
-
-#data
-<a>1<p>2</a>3</p>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,12): adoption-agency-1.3
-#document
-| <html>
-|   <head>
-|   <body>
-|     <a>
-|       "1"
-|     <p>
-|       <a>
-|         "2"
-|       "3"
-
-#data
-<a>1<button>2</a>3</button>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,17): adoption-agency-1.3
-#document
-| <html>
-|   <head>
-|   <body>
-|     <a>
-|       "1"
-|     <button>
-|       <a>
-|         "2"
-|       "3"
-
-#data
-<a>1<b>2</a>3</b>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,12): adoption-agency-1.3
-#document
-| <html>
-|   <head>
-|   <body>
-|     <a>
-|       "1"
-|       <b>
-|         "2"
-|     <b>
-|       "3"
-
-#data
-<a>1<div>2<div>3</a>4</div>5</div>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,20): adoption-agency-1.3
-(1,20): adoption-agency-1.3
-#document
-| <html>
-|   <head>
-|   <body>
-|     <a>
-|       "1"
-|     <div>
-|       <a>
-|         "2"
-|       <div>
-|         <a>
-|           "3"
-|         "4"
-|       "5"
-
-#data
-<table><a>1<p>2</a>3</p>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,10): unexpected-start-tag-implies-table-voodoo
-(1,11): unexpected-character-implies-table-voodoo
-(1,14): unexpected-start-tag-implies-table-voodoo
-(1,15): unexpected-character-implies-table-voodoo
-(1,19): unexpected-end-tag-implies-table-voodoo
-(1,19): adoption-agency-1.3
-(1,20): unexpected-character-implies-table-voodoo
-(1,24): unexpected-end-tag-implies-table-voodoo
-(1,24): eof-in-table
-#document
-| <html>
-|   <head>
-|   <body>
-|     <a>
-|       "1"
-|     <p>
-|       <a>
-|         "2"
-|       "3"
-|     <table>
-
-#data
-<b><b><a><p></a>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,16): adoption-agency-1.3
-(1,16): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <b>
-|       <b>
-|         <a>
-|         <p>
-|           <a>
-
-#data
-<b><a><b><p></a>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,16): adoption-agency-1.3
-(1,16): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <b>
-|       <a>
-|         <b>
-|       <b>
-|         <p>
-|           <a>
-
-#data
-<a><b><b><p></a>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,16): adoption-agency-1.3
-(1,16): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <a>
-|       <b>
-|         <b>
-|     <b>
-|       <b>
-|         <p>
-|           <a>
-
-#data
-<p>1<s id="A">2<b id="B">3</p>4</s>5</b>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,30): unexpected-end-tag
-(1,35): adoption-agency-1.3
-#document
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       "1"
-|       <s>
-|         id="A"
-|         "2"
-|         <b>
-|           id="B"
-|           "3"
-|     <s>
-|       id="A"
-|       <b>
-|         id="B"
-|         "4"
-|     <b>
-|       id="B"
-|       "5"
-
-#data
-<table><a>1<td>2</td>3</table>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,10): unexpected-start-tag-implies-table-voodoo
-(1,11): unexpected-character-implies-table-voodoo
-(1,15): unexpected-cell-in-table-body
-(1,30): unexpected-implied-end-tag-in-table-view
-#document
-| <html>
-|   <head>
-|   <body>
-|     <a>
-|       "1"
-|     <a>
-|       "3"
-|     <table>
-|       <tbody>
-|         <tr>
-|           <td>
-|             "2"
-
-#data
-<table>A<td>B</td>C</table>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,8): unexpected-character-implies-table-voodoo
-(1,12): unexpected-cell-in-table-body
-(1,22): unexpected-character-implies-table-voodoo
-#document
-| <html>
-|   <head>
-|   <body>
-|     "AC"
-|     <table>
-|       <tbody>
-|         <tr>
-|           <td>
-|             "B"
-
-#data
-<a><svg><tr><input></a>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,23): unexpected-end-tag
-(1,23): adoption-agency-1.3
-#document
-| <html>
-|   <head>
-|   <body>
-|     <a>
-|       <svg svg>
-|         <svg tr>
-|           <svg input>
-
-#data
-<div><a><b><div><div><div><div><div><div><div><div><div><div></a>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,65): adoption-agency-1.3
-(1,65): adoption-agency-1.3
-(1,65): adoption-agency-1.3
-(1,65): adoption-agency-1.3
-(1,65): adoption-agency-1.3
-(1,65): adoption-agency-1.3
-(1,65): adoption-agency-1.3
-(1,65): adoption-agency-1.3
-(1,65): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       <a>
-|         <b>
-|       <b>
-|         <div>
-|           <a>
-|           <div>
-|             <a>
-|             <div>
-|               <a>
-|               <div>
-|                 <a>
-|                 <div>
-|                   <a>
-|                   <div>
-|                     <a>
-|                     <div>
-|                       <a>
-|                       <div>
-|                         <a>
-|                           <div>
-|                             <div>
-
-#data
-<div><a><b><u><i><code><div></a>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,32): adoption-agency-1.3
-(1,32): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       <a>
-|         <b>
-|           <u>
-|             <i>
-|               <code>
-|       <u>
-|         <i>
-|           <code>
-|             <div>
-|               <a>
-
-#data
-<b><b><b><b>x</b></b></b></b>y
-#errors
-(1,3): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <b>
-|       <b>
-|         <b>
-|           <b>
-|             "x"
-|     "y"
-
-#data
-<p><b><b><b><b><p>x
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,18): unexpected-end-tag
-(1,19): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <b>
-|         <b>
-|           <b>
-|             <b>
-|     <p>
-|       <b>
-|         <b>
-|           <b>
-|             "x"
-
-#data
-<b><em><foo><foob><fooc><aside></b></em>
-#errors
-(1,35): adoption-agency-1.3
-(1,40): adoption-agency-1.3
-(1,40): expected-closing-tag-but-got-eof
-#document-fragment
-div
-#document
-| <b>
-|   <em>
-|     <foo>
-|       <foob>
-|         <fooc>
-| <aside>
-|   <b>
diff --git a/html/testdata/webkit/adoption02.dat b/html/testdata/webkit/adoption02.dat
deleted file mode 100644 (file)
index e54d803..0000000
+++ /dev/null
@@ -1,39 +0,0 @@
-#data
-<b>1<i>2<p>3</b>4
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,16): adoption-agency-1.3
-(1,17): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <b>
-|       "1"
-|       <i>
-|         "2"
-|     <i>
-|       <p>
-|         <b>
-|           "3"
-|         "4"
-
-#data
-<a><div><style></style><address><a>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,35): unexpected-start-tag-implies-end-tag
-(1,35): adoption-agency-1.3
-(1,35): adoption-agency-1.3
-(1,35): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <a>
-|     <div>
-|       <a>
-|         <style>
-|       <address>
-|         <a>
-|         <a>
diff --git a/html/testdata/webkit/blocks.dat b/html/testdata/webkit/blocks.dat
deleted file mode 100644 (file)
index 5d3871e..0000000
+++ /dev/null
@@ -1,719 +0,0 @@
-#data
-<!doctype html><p>foo<address>bar<p>baz
-#errors
-(1,39): expected-closing-tag-but-got-eof
-30: Unclosed element “address”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       "foo"
-|     <address>
-|       "bar"
-|       <p>
-|         "baz"
-
-#data
-<!doctype html><address><p>foo</address>bar
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <address>
-|       <p>
-|         "foo"
-|     "bar"
-
-#data
-<!doctype html><p>foo<article>bar<p>baz
-#errors
-(1,39): expected-closing-tag-but-got-eof
-30: Unclosed element “article”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       "foo"
-|     <article>
-|       "bar"
-|       <p>
-|         "baz"
-
-#data
-<!doctype html><article><p>foo</article>bar
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <article>
-|       <p>
-|         "foo"
-|     "bar"
-
-#data
-<!doctype html><p>foo<aside>bar<p>baz
-#errors
-(1,37): expected-closing-tag-but-got-eof
-28: Unclosed element “aside”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       "foo"
-|     <aside>
-|       "bar"
-|       <p>
-|         "baz"
-
-#data
-<!doctype html><aside><p>foo</aside>bar
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <aside>
-|       <p>
-|         "foo"
-|     "bar"
-
-#data
-<!doctype html><p>foo<blockquote>bar<p>baz
-#errors
-(1,42): expected-closing-tag-but-got-eof
-33: Unclosed element “blockquote”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       "foo"
-|     <blockquote>
-|       "bar"
-|       <p>
-|         "baz"
-
-#data
-<!doctype html><blockquote><p>foo</blockquote>bar
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <blockquote>
-|       <p>
-|         "foo"
-|     "bar"
-
-#data
-<!doctype html><p>foo<center>bar<p>baz
-#errors
-(1,38): expected-closing-tag-but-got-eof
-29: Unclosed element “center”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       "foo"
-|     <center>
-|       "bar"
-|       <p>
-|         "baz"
-
-#data
-<!doctype html><center><p>foo</center>bar
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <center>
-|       <p>
-|         "foo"
-|     "bar"
-
-#data
-<!doctype html><p>foo<details>bar<p>baz
-#errors
-(1,39): expected-closing-tag-but-got-eof
-30: Unclosed element “details”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       "foo"
-|     <details>
-|       "bar"
-|       <p>
-|         "baz"
-
-#data
-<!doctype html><details><p>foo</details>bar
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <details>
-|       <p>
-|         "foo"
-|     "bar"
-
-#data
-<!doctype html><p>foo<dialog>bar<p>baz
-#errors
-(1,38): expected-closing-tag-but-got-eof
-29: Unclosed element “dialog”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       "foo"
-|     <dialog>
-|       "bar"
-|       <p>
-|         "baz"
-
-#data
-<!doctype html><dialog><p>foo</dialog>bar
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <dialog>
-|       <p>
-|         "foo"
-|     "bar"
-
-#data
-<!doctype html><p>foo<dir>bar<p>baz
-#errors
-(1,35): expected-closing-tag-but-got-eof
-26: Unclosed element “dir”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       "foo"
-|     <dir>
-|       "bar"
-|       <p>
-|         "baz"
-
-#data
-<!doctype html><dir><p>foo</dir>bar
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <dir>
-|       <p>
-|         "foo"
-|     "bar"
-
-#data
-<!doctype html><p>foo<div>bar<p>baz
-#errors
-(1,35): expected-closing-tag-but-got-eof
-26: Unclosed element “div”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       "foo"
-|     <div>
-|       "bar"
-|       <p>
-|         "baz"
-
-#data
-<!doctype html><div><p>foo</div>bar
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       <p>
-|         "foo"
-|     "bar"
-
-#data
-<!doctype html><p>foo<dl>bar<p>baz
-#errors
-(1,34): expected-closing-tag-but-got-eof
-25: Unclosed element “dl”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       "foo"
-|     <dl>
-|       "bar"
-|       <p>
-|         "baz"
-
-#data
-<!doctype html><dl><p>foo</dl>bar
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <dl>
-|       <p>
-|         "foo"
-|     "bar"
-
-#data
-<!doctype html><p>foo<fieldset>bar<p>baz
-#errors
-(1,40): expected-closing-tag-but-got-eof
-31: Unclosed element “fieldset”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       "foo"
-|     <fieldset>
-|       "bar"
-|       <p>
-|         "baz"
-
-#data
-<!doctype html><fieldset><p>foo</fieldset>bar
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <fieldset>
-|       <p>
-|         "foo"
-|     "bar"
-
-#data
-<!doctype html><p>foo<figcaption>bar<p>baz
-#errors
-(1,42): expected-closing-tag-but-got-eof
-33: Unclosed element “figcaption”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       "foo"
-|     <figcaption>
-|       "bar"
-|       <p>
-|         "baz"
-
-#data
-<!doctype html><figcaption><p>foo</figcaption>bar
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <figcaption>
-|       <p>
-|         "foo"
-|     "bar"
-
-#data
-<!doctype html><p>foo<figure>bar<p>baz
-#errors
-(1,38): expected-closing-tag-but-got-eof
-29: Unclosed element “figure”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       "foo"
-|     <figure>
-|       "bar"
-|       <p>
-|         "baz"
-
-#data
-<!doctype html><figure><p>foo</figure>bar
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <figure>
-|       <p>
-|         "foo"
-|     "bar"
-
-#data
-<!doctype html><p>foo<footer>bar<p>baz
-#errors
-(1,38): expected-closing-tag-but-got-eof
-29: Unclosed element “footer”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       "foo"
-|     <footer>
-|       "bar"
-|       <p>
-|         "baz"
-
-#data
-<!doctype html><footer><p>foo</footer>bar
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <footer>
-|       <p>
-|         "foo"
-|     "bar"
-
-#data
-<!doctype html><p>foo<header>bar<p>baz
-#errors
-(1,38): expected-closing-tag-but-got-eof
-29: Unclosed element “header”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       "foo"
-|     <header>
-|       "bar"
-|       <p>
-|         "baz"
-
-#data
-<!doctype html><header><p>foo</header>bar
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <header>
-|       <p>
-|         "foo"
-|     "bar"
-
-#data
-<!doctype html><p>foo<hgroup>bar<p>baz
-#errors
-(1,38): expected-closing-tag-but-got-eof
-29: Unclosed element “hgroup”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       "foo"
-|     <hgroup>
-|       "bar"
-|       <p>
-|         "baz"
-
-#data
-<!doctype html><hgroup><p>foo</hgroup>bar
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <hgroup>
-|       <p>
-|         "foo"
-|     "bar"
-
-#data
-<!doctype html><p>foo<listing>bar<p>baz
-#errors
-(1,39): expected-closing-tag-but-got-eof
-30: Unclosed element “listing”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       "foo"
-|     <listing>
-|       "bar"
-|       <p>
-|         "baz"
-
-#data
-<!doctype html><listing><p>foo</listing>bar
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <listing>
-|       <p>
-|         "foo"
-|     "bar"
-
-#data
-<!doctype html><p>foo<menu>bar<p>baz
-#errors
-(1,36): expected-closing-tag-but-got-eof
-27: Unclosed element “menu”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       "foo"
-|     <menu>
-|       "bar"
-|       <p>
-|         "baz"
-
-#data
-<!doctype html><menu><p>foo</menu>bar
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <menu>
-|       <p>
-|         "foo"
-|     "bar"
-
-#data
-<!doctype html><p>foo<nav>bar<p>baz
-#errors
-(1,35): expected-closing-tag-but-got-eof
-26: Unclosed element “nav”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       "foo"
-|     <nav>
-|       "bar"
-|       <p>
-|         "baz"
-
-#data
-<!doctype html><nav><p>foo</nav>bar
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <nav>
-|       <p>
-|         "foo"
-|     "bar"
-
-#data
-<!doctype html><p>foo<ol>bar<p>baz
-#errors
-(1,34): expected-closing-tag-but-got-eof
-25: Unclosed element “ol”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       "foo"
-|     <ol>
-|       "bar"
-|       <p>
-|         "baz"
-
-#data
-<!doctype html><ol><p>foo</ol>bar
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <ol>
-|       <p>
-|         "foo"
-|     "bar"
-
-#data
-<!doctype html><p>foo<pre>bar<p>baz
-#errors
-(1,35): expected-closing-tag-but-got-eof
-26: Unclosed element “pre”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       "foo"
-|     <pre>
-|       "bar"
-|       <p>
-|         "baz"
-
-#data
-<!doctype html><pre><p>foo</pre>bar
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <pre>
-|       <p>
-|         "foo"
-|     "bar"
-
-#data
-<!doctype html><p>foo<section>bar<p>baz
-#errors
-(1,39): expected-closing-tag-but-got-eof
-30: Unclosed element “section”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       "foo"
-|     <section>
-|       "bar"
-|       <p>
-|         "baz"
-
-#data
-<!doctype html><section><p>foo</section>bar
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <section>
-|       <p>
-|         "foo"
-|     "bar"
-
-#data
-<!doctype html><p>foo<summary>bar<p>baz
-#errors
-(1,39): expected-closing-tag-but-got-eof
-30: Unclosed element “summary”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       "foo"
-|     <summary>
-|       "bar"
-|       <p>
-|         "baz"
-
-#data
-<!doctype html><summary><p>foo</summary>bar
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <summary>
-|       <p>
-|         "foo"
-|     "bar"
-
-#data
-<!doctype html><p>foo<ul>bar<p>baz
-#errors
-(1,34): expected-closing-tag-but-got-eof
-25: Unclosed element “ul”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       "foo"
-|     <ul>
-|       "bar"
-|       <p>
-|         "baz"
-
-#data
-<!doctype html><ul><p>foo</ul>bar
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <ul>
-|       <p>
-|         "foo"
-|     "bar"
diff --git a/html/testdata/webkit/comments01.dat b/html/testdata/webkit/comments01.dat
deleted file mode 100644 (file)
index fa79c2b..0000000
+++ /dev/null
@@ -1,224 +0,0 @@
-#data
-FOO<!-- BAR -->BAZ
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <!--  BAR  -->
-|     "BAZ"
-
-#data
-FOO<!-- BAR --!>BAZ
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,15): unexpected-bang-after-double-dash-in-comment
-#new-errors
-(1:16) incorrectly-closed-comment
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <!--  BAR  -->
-|     "BAZ"
-
-#data
-FOO<!-- BAR --! >BAZ
-#errors
-(1,3): expected-doctype-but-got-chars
-#new-errors
-(1:20) eof-in-comment
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <!--  BAR --! >BAZ -->
-
-#data
-FOO<!-- BAR --!
->BAZ
-#errors
-(1,3): expected-doctype-but-got-chars
-#new-errors
-(1:20) eof-in-comment
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <!--  BAR --!
->BAZ -->
-
-#data
-FOO<!-- BAR --   >BAZ
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,15): unexpected-char-in-comment
-(1,21): eof-in-comment
-#new-errors
-(1:22) eof-in-comment
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <!--  BAR --   >BAZ -->
-
-#data
-FOO<!-- BAR -- <QUX> -- MUX -->BAZ
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,15): unexpected-char-in-comment
-(1,24): unexpected-char-in-comment
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <!--  BAR -- <QUX> -- MUX  -->
-|     "BAZ"
-
-#data
-FOO<!-- BAR -- <QUX> -- MUX --!>BAZ
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,15): unexpected-char-in-comment
-(1,24): unexpected-char-in-comment
-(1,31): unexpected-bang-after-double-dash-in-comment
-#new-errors
-(1:32) incorrectly-closed-comment
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <!--  BAR -- <QUX> -- MUX  -->
-|     "BAZ"
-
-#data
-FOO<!-- BAR -- <QUX> -- MUX -- >BAZ
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,15): unexpected-char-in-comment
-(1,24): unexpected-char-in-comment
-(1,31): unexpected-char-in-comment
-(1,35): eof-in-comment
-#new-errors
-(1:36) eof-in-comment
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <!--  BAR -- <QUX> -- MUX -- >BAZ -->
-
-#data
-FOO<!---->BAZ
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <!--  -->
-|     "BAZ"
-
-#data
-FOO<!--->BAZ
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,9): incorrect-comment
-#new-errors
-(1:9) abrupt-closing-of-empty-comment
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <!--  -->
-|     "BAZ"
-
-#data
-FOO<!-->BAZ
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,8): incorrect-comment
-#new-errors
-(1:8) abrupt-closing-of-empty-comment
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <!--  -->
-|     "BAZ"
-
-#data
-<?xml version="1.0">Hi
-#errors
-(1,1): expected-tag-name-but-got-question-mark
-(1,22): expected-doctype-but-got-chars
-#new-errors
-(1:2) unexpected-question-mark-instead-of-tag-name
-#document
-| <!-- ?xml version="1.0" -->
-| <html>
-|   <head>
-|   <body>
-|     "Hi"
-
-#data
-<?xml version="1.0">
-#errors
-(1,1): expected-tag-name-but-got-question-mark
-(1,20): expected-doctype-but-got-eof
-#new-errors
-(1:2) unexpected-question-mark-instead-of-tag-name
-#document
-| <!-- ?xml version="1.0" -->
-| <html>
-|   <head>
-|   <body>
-
-#data
-<?xml version
-#errors
-(1,1): expected-tag-name-but-got-question-mark
-(1,13): expected-doctype-but-got-eof
-#new-errors
-(1:2) unexpected-question-mark-instead-of-tag-name
-#document
-| <!-- ?xml version -->
-| <html>
-|   <head>
-|   <body>
-
-#data
-FOO<!----->BAZ
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,10): unexpected-dash-after-double-dash-in-comment
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <!-- - -->
-|     "BAZ"
-
-#data
-<html><!-- comment --><title>Comment before head</title>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <!--  comment  -->
-|   <head>
-|     <title>
-|       "Comment before head"
-|   <body>
diff --git a/html/testdata/webkit/doctype01.dat b/html/testdata/webkit/doctype01.dat
deleted file mode 100644 (file)
index c845bec..0000000
+++ /dev/null
@@ -1,470 +0,0 @@
-#data
-<!DOCTYPE html>Hello
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!dOctYpE HtMl>Hello
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPEhtml>Hello
-#errors
-(1,9): need-space-after-doctype
-#new-errors
-(1:10) missing-whitespace-before-doctype-name
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE>Hello
-#errors
-(1,9): need-space-after-doctype
-(1,10): expected-doctype-name-but-got-right-bracket
-(1,10): unknown-doctype
-#new-errors
-(1:10) missing-doctype-name
-#document
-| <!DOCTYPE >
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE >Hello
-#errors
-(1,11): expected-doctype-name-but-got-right-bracket
-(1,11): unknown-doctype
-#new-errors
-(1:11) missing-doctype-name
-#document
-| <!DOCTYPE >
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE potato>Hello
-#errors
-(1,17): unknown-doctype
-#document
-| <!DOCTYPE potato>
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE potato >Hello
-#errors
-(1,18): unknown-doctype
-#document
-| <!DOCTYPE potato>
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE potato taco>Hello
-#errors
-(1,17): expected-space-or-right-bracket-in-doctype
-(1,22): unknown-doctype
-#new-errors
-(1:18) invalid-character-sequence-after-doctype-name
-#document
-| <!DOCTYPE potato>
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE potato taco "ddd>Hello
-#errors
-(1,17): expected-space-or-right-bracket-in-doctype
-(1,27): unknown-doctype
-#new-errors
-(1:18) invalid-character-sequence-after-doctype-name
-#document
-| <!DOCTYPE potato>
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE potato sYstEM>Hello
-#errors
-(1,24): unexpected-char-in-doctype
-(1,24): unknown-doctype
-#new-errors
-(1:24) missing-doctype-system-identifier
-#document
-| <!DOCTYPE potato>
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE potato sYstEM    >Hello
-#errors
-(1,28): unexpected-char-in-doctype
-(1,28): unknown-doctype
-#new-errors
-(1:28) missing-doctype-system-identifier
-#document
-| <!DOCTYPE potato>
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE   potato       sYstEM  ggg>Hello
-#errors
-(1,34): unexpected-char-in-doctype
-(1,37): unknown-doctype
-#new-errors
-(1:34) missing-quote-before-doctype-system-identifier
-#document
-| <!DOCTYPE potato>
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE potato SYSTEM taco  >Hello
-#errors
-(1,25): unexpected-char-in-doctype
-(1,31): unknown-doctype
-#new-errors
-(1:25) missing-quote-before-doctype-system-identifier
-#document
-| <!DOCTYPE potato>
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE potato SYSTEM 'taco"'>Hello
-#errors
-(1,32): unknown-doctype
-#document
-| <!DOCTYPE potato "" "taco"">
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE potato SYSTEM "taco">Hello
-#errors
-(1,31): unknown-doctype
-#document
-| <!DOCTYPE potato "" "taco">
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE potato SYSTEM "tai'co">Hello
-#errors
-(1,33): unknown-doctype
-#document
-| <!DOCTYPE potato "" "tai'co">
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE potato SYSTEMtaco "ddd">Hello
-#errors
-(1,24): unexpected-char-in-doctype
-(1,34): unknown-doctype
-#new-errors
-(1:24) missing-quote-before-doctype-system-identifier
-#document
-| <!DOCTYPE potato>
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE potato grass SYSTEM taco>Hello
-#errors
-(1,17): expected-space-or-right-bracket-in-doctype
-(1,35): unknown-doctype
-#new-errors
-(1:18) invalid-character-sequence-after-doctype-name
-#document
-| <!DOCTYPE potato>
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE potato pUbLIc>Hello
-#errors
-(1,24): unexpected-end-of-doctype
-(1,24): unknown-doctype
-#new-errors
-(1:24) missing-doctype-public-identifier
-#document
-| <!DOCTYPE potato>
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE potato pUbLIc >Hello
-#errors
-(1,25): unexpected-end-of-doctype
-(1,25): unknown-doctype
-#new-errors
-(1:25) missing-doctype-public-identifier
-#document
-| <!DOCTYPE potato>
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE potato pUbLIcgoof>Hello
-#errors
-(1,24): unexpected-char-in-doctype
-(1,28): unknown-doctype
-#new-errors
-(1:24) missing-quote-before-doctype-public-identifier
-#document
-| <!DOCTYPE potato>
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE potato PUBLIC goof>Hello
-#errors
-(1,25): unexpected-char-in-doctype
-(1,29): unknown-doctype
-#new-errors
-(1:25) missing-quote-before-doctype-public-identifier
-#document
-| <!DOCTYPE potato>
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE potato PUBLIC "go'of">Hello
-#errors
-(1,32): unknown-doctype
-#document
-| <!DOCTYPE potato "go'of" "">
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE potato PUBLIC 'go'of'>Hello
-#errors
-(1,29): unexpected-char-in-doctype
-(1,32): unknown-doctype
-#new-errors
-(1:29) missing-quote-before-doctype-system-identifier
-#document
-| <!DOCTYPE potato "go" "">
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE potato PUBLIC 'go:hh   of' >Hello
-#errors
-(1,38): unknown-doctype
-#document
-| <!DOCTYPE potato "go:hh   of" "">
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE potato PUBLIC "W3C-//dfdf" SYSTEM ggg>Hello
-#errors
-(1,38): unexpected-char-in-doctype
-(1,48): unknown-doctype
-#new-errors
-(1:38) missing-quote-before-doctype-system-identifier
-#document
-| <!DOCTYPE potato "W3C-//dfdf" "">
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-   "http://www.w3.org/TR/html4/strict.dtd">Hello
-#errors
-#document
-| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE ...>Hello
-#errors
-(1,14): unknown-doctype
-#document
-| <!DOCTYPE ...>
-| <html>
-|   <head>
-|   <body>
-|     "Hello"
-
-#data
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
-"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
-#errors
-(2,58): unknown-doctype
-#document
-| <!DOCTYPE html "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
-| <html>
-|   <head>
-|   <body>
-
-#data
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
-"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
-#errors
-(2,54): unknown-doctype
-#document
-| <!DOCTYPE html "-//W3C//DTD XHTML 1.0 Frameset//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
-| <html>
-|   <head>
-|   <body>
-
-#data
-<!DOCTYPE root-element [SYSTEM OR PUBLIC FPI] "uri" [ 
-<!-- internal declarations -->
-]>
-#errors
-(1,23): expected-space-or-right-bracket-in-doctype
-(2,30): unknown-doctype
-#new-errors
-(1:24) invalid-character-sequence-after-doctype-name
-#document
-| <!DOCTYPE root-element>
-| <html>
-|   <head>
-|   <body>
-|     "]>"
-
-#data
-<!DOCTYPE html PUBLIC
-  "-//WAPFORUM//DTD XHTML Mobile 1.0//EN"
-    "http://www.wapforum.org/DTD/xhtml-mobile10.dtd">
-#errors
-(3,53): unknown-doctype
-#document
-| <!DOCTYPE html "-//WAPFORUM//DTD XHTML Mobile 1.0//EN" "http://www.wapforum.org/DTD/xhtml-mobile10.dtd">
-| <html>
-|   <head>
-|   <body>
-
-#data
-<!DOCTYPE HTML SYSTEM "http://www.w3.org/DTD/HTML4-strict.dtd"><body><b>Mine!</b></body>
-#errors
-(1,63): unknown-doctype
-#document
-| <!DOCTYPE html "" "http://www.w3.org/DTD/HTML4-strict.dtd">
-| <html>
-|   <head>
-|   <body>
-|     <b>
-|       "Mine!"
-
-#data
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN""http://www.w3.org/TR/html4/strict.dtd">
-#errors
-(1,50): unexpected-char-in-doctype
-#new-errors
-(1:50) missing-whitespace-between-doctype-public-and-system-identifiers
-#document
-| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
-| <html>
-|   <head>
-|   <body>
-
-#data
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"'http://www.w3.org/TR/html4/strict.dtd'>
-#errors
-(1,50): unexpected-char-in-doctype
-#new-errors
-(1:50) missing-whitespace-between-doctype-public-and-system-identifiers
-#document
-| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
-| <html>
-|   <head>
-|   <body>
-
-#data
-<!DOCTYPE HTML PUBLIC"-//W3C//DTD HTML 4.01//EN"'http://www.w3.org/TR/html4/strict.dtd'>
-#errors
-(1,21): unexpected-char-in-doctype
-(1,49): unexpected-char-in-doctype
-#new-errors
-(1:22) missing-whitespace-after-doctype-public-keyword
-(1:49) missing-whitespace-between-doctype-public-and-system-identifiers
-#document
-| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
-| <html>
-|   <head>
-|   <body>
-
-#data
-<!DOCTYPE HTML PUBLIC'-//W3C//DTD HTML 4.01//EN''http://www.w3.org/TR/html4/strict.dtd'>
-#errors
-(1,21): unexpected-char-in-doctype
-(1,49): unexpected-char-in-doctype
-#new-errors
-(1:22) missing-whitespace-after-doctype-public-keyword
-(1:49) missing-whitespace-between-doctype-public-and-system-identifiers
-#document
-| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
-| <html>
-|   <head>
-|   <body>
diff --git a/html/testdata/webkit/domjs-unsafe.dat b/html/testdata/webkit/domjs-unsafe.dat
deleted file mode 100644 (file)
index 0a1b10f..0000000
Binary files a/html/testdata/webkit/domjs-unsafe.dat and /dev/null differ
diff --git a/html/testdata/webkit/entities01.dat b/html/testdata/webkit/entities01.dat
deleted file mode 100644 (file)
index 9bacebe..0000000
+++ /dev/null
@@ -1,943 +0,0 @@
-#data
-FOO&gt;BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO>BAR"
-
-#data
-FOO&gtBAR
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,6): named-entity-without-semicolon
-#new-errors
-(1:7) missing-semicolon-after-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO>BAR"
-
-#data
-FOO&gt BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,6): named-entity-without-semicolon
-#new-errors
-(1:7) missing-semicolon-after-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO> BAR"
-
-#data
-FOO&gt;;;BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO>;;BAR"
-
-#data
-I'm &notit; I tell you
-#errors
-(1,4): expected-doctype-but-got-chars
-(1,9): named-entity-without-semicolon
-#new-errors
-(1:9) missing-semicolon-after-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "I'm ¬it; I tell you"
-
-#data
-I'm &notin; I tell you
-#errors
-(1,4): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "I'm ∉ I tell you"
-
-#data
-&ammmp;
-#errors
-(1,1): expected-doctype-but-got-chars
-(1,7): unknown-named-character-reference
-#new-errors
-(1:7) unknown-named-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "&ammmp;"
-
-#data
-&ammmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmp;
-#errors
-(1,1): expected-doctype-but-got-chars
-(1,950): unknown-named-character-reference
-#new-errors
-(1:950) unknown-named-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "&ammmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmp;"
-
-#data
-FOO& BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO& BAR"
-
-#data
-FOO&<BAR>
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,9): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO&"
-|     <bar>
-
-#data
-FOO&&&&gt;BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO&&&>BAR"
-
-#data
-FOO&#41;BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO)BAR"
-
-#data
-FOO&#x41;BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOOABAR"
-
-#data
-FOO&#X41;BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOOABAR"
-
-#data
-FOO&#BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,5): expected-numeric-entity
-#new-errors
-(1:6) absence-of-digits-in-numeric-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO&#BAR"
-
-#data
-FOO&#ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,5): expected-numeric-entity
-#new-errors
-(1:6) absence-of-digits-in-numeric-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO&#ZOO"
-
-#data
-FOO&#xBAR
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,7): expected-numeric-entity
-#new-errors
-(1:9) missing-semicolon-after-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOOºR"
-
-#data
-FOO&#xZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,6): expected-numeric-entity
-#new-errors
-(1:7) absence-of-digits-in-numeric-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO&#xZOO"
-
-#data
-FOO&#XZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,6): expected-numeric-entity
-#new-errors
-(1:7) absence-of-digits-in-numeric-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO&#XZOO"
-
-#data
-FOO&#41BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,7): numeric-entity-without-semicolon
-#new-errors
-(1:8) missing-semicolon-after-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO)BAR"
-
-#data
-FOO&#x41BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,10): numeric-entity-without-semicolon
-#new-errors
-(1:11) missing-semicolon-after-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO䆺R"
-
-#data
-FOO&#x41ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,8): numeric-entity-without-semicolon
-#new-errors
-(1:9) missing-semicolon-after-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOOAZOO"
-
-#data
-FOO&#x0000;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#new-errors
-(1:12) null-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO�ZOO"
-
-#data
-FOO&#x0078;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOOxZOO"
-
-#data
-FOO&#x0079;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOOyZOO"
-
-#data
-FOO&#x0080;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#new-errors
-(1:12) control-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO€ZOO"
-
-#data
-FOO&#x0081;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#new-errors
-(1:12) control-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO\81ZOO"
-
-#data
-FOO&#x0082;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#new-errors
-(1:12) control-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO‚ZOO"
-
-#data
-FOO&#x0083;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#new-errors
-(1:12) control-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOOƒZOO"
-
-#data
-FOO&#x0084;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#new-errors
-(1:12) control-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO„ZOO"
-
-#data
-FOO&#x0085;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#new-errors
-(1:12) control-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO…ZOO"
-
-#data
-FOO&#x0086;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#new-errors
-(1:12) control-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO†ZOO"
-
-#data
-FOO&#x0087;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#new-errors
-(1:12) control-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO‡ZOO"
-
-#data
-FOO&#x0088;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#new-errors
-(1:12) control-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOOˆZOO"
-
-#data
-FOO&#x0089;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#new-errors
-(1:12) control-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO‰ZOO"
-
-#data
-FOO&#x008A;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#new-errors
-(1:12) control-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOOŠZOO"
-
-#data
-FOO&#x008B;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#new-errors
-(1:12) control-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO‹ZOO"
-
-#data
-FOO&#x008C;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#new-errors
-(1:12) control-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOOŒZOO"
-
-#data
-FOO&#x008D;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#new-errors
-(1:12) control-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO\8dZOO"
-
-#data
-FOO&#x008E;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#new-errors
-(1:12) control-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOOŽZOO"
-
-#data
-FOO&#x008F;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#new-errors
-(1:12) control-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO\8fZOO"
-
-#data
-FOO&#x0090;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#new-errors
-(1:12) control-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO\90ZOO"
-
-#data
-FOO&#x0091;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#new-errors
-(1:12) control-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO‘ZOO"
-
-#data
-FOO&#x0092;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#new-errors
-(1:12) control-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO’ZOO"
-
-#data
-FOO&#x0093;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#new-errors
-(1:12) control-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO“ZOO"
-
-#data
-FOO&#x0094;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#new-errors
-(1:12) control-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO”ZOO"
-
-#data
-FOO&#x0095;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#new-errors
-(1:12) control-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO•ZOO"
-
-#data
-FOO&#x0096;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#new-errors
-(1:12) control-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO–ZOO"
-
-#data
-FOO&#x0097;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#new-errors
-(1:12) control-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO—ZOO"
-
-#data
-FOO&#x0098;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#new-errors
-(1:12) control-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO˜ZOO"
-
-#data
-FOO&#x0099;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#new-errors
-(1:12) control-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO™ZOO"
-
-#data
-FOO&#x009A;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#new-errors
-(1:12) control-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOOšZOO"
-
-#data
-FOO&#x009B;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#new-errors
-(1:12) control-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO›ZOO"
-
-#data
-FOO&#x009C;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#new-errors
-(1:12) control-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOOœZOO"
-
-#data
-FOO&#x009D;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#new-errors
-(1:12) control-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO\9dZOO"
-
-#data
-FOO&#x009E;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#new-errors
-(1:12) control-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOOžZOO"
-
-#data
-FOO&#x009F;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#new-errors
-(1:12) control-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOOŸZOO"
-
-#data
-FOO&#x00A0;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO ZOO"
-
-#data
-FOO&#xD7FF;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO퟿ZOO"
-
-#data
-FOO&#xD800;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#new-errors
-(1:12) surrogate-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO�ZOO"
-
-#data
-FOO&#xD801;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#new-errors
-(1:12) surrogate-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO�ZOO"
-
-#data
-FOO&#xDFFE;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#new-errors
-(1:12) surrogate-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO�ZOO"
-
-#data
-FOO&#xDFFF;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,11): illegal-codepoint-for-numeric-entity
-#new-errors
-(1:12) surrogate-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO�ZOO"
-
-#data
-FOO&#xE000;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOOZOO"
-
-#data
-FOO&#x10FFFE;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,13): illegal-codepoint-for-numeric-entity
-#new-errors
-(1:14) noncharacter-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO􏿾ZOO"
-
-#data
-FOO&#x1087D4;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO􈟔ZOO"
-
-#data
-FOO&#x10FFFF;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,13): illegal-codepoint-for-numeric-entity
-#new-errors
-(1:14) noncharacter-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO􏿿ZOO"
-
-#data
-FOO&#x110000;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,13): illegal-codepoint-for-numeric-entity
-#new-errors
-(1:14) character-reference-outside-unicode-range
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO�ZOO"
-
-#data
-FOO&#xFFFFFF;ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,13): illegal-codepoint-for-numeric-entity
-#new-errors
-(1:14) character-reference-outside-unicode-range
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO�ZOO"
-
-#data
-FOO&#11111111111
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,13): illegal-codepoint-for-numeric-entity
-(1,13): eof-in-numeric-entity
-#new-errors
-(1:17) missing-semicolon-after-character-reference
-(1:17) character-reference-outside-unicode-range
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO�"
-
-#data
-FOO&#1111111111
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,13): illegal-codepoint-for-numeric-entity
-(1,13): eof-in-numeric-entity
-#new-errors
-(1:16) missing-semicolon-after-character-reference
-(1:16) character-reference-outside-unicode-range
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO�"
-
-#data
-FOO&#111111111111
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,13): illegal-codepoint-for-numeric-entity
-(1,13): eof-in-numeric-entity
-#new-errors
-(1:18) missing-semicolon-after-character-reference
-(1:18) character-reference-outside-unicode-range
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO�"
-
-#data
-FOO&#11111111111ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,16): numeric-entity-without-semicolon
-(1,16): illegal-codepoint-for-numeric-entity
-#new-errors
-(1:17) missing-semicolon-after-character-reference
-(1:17) character-reference-outside-unicode-range
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO�ZOO"
-
-#data
-FOO&#1111111111ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,15): numeric-entity-without-semicolon
-(1,15): illegal-codepoint-for-numeric-entity
-#new-errors
-(1:16) missing-semicolon-after-character-reference
-(1:16) character-reference-outside-unicode-range
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO�ZOO"
-
-#data
-FOO&#111111111111ZOO
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,17): numeric-entity-without-semicolon
-(1,17): illegal-codepoint-for-numeric-entity
-#new-errors
-(1:18) missing-semicolon-after-character-reference
-(1:18) character-reference-outside-unicode-range
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO�ZOO"
diff --git a/html/testdata/webkit/entities02.dat b/html/testdata/webkit/entities02.dat
deleted file mode 100644 (file)
index 0c6e898..0000000
+++ /dev/null
@@ -1,309 +0,0 @@
-#data
-<div bar="ZZ&gt;YY"></div>
-#errors
-(1,20): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       bar="ZZ>YY"
-
-#data
-<div bar="ZZ&"></div>
-#errors
-(1,15): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       bar="ZZ&"
-
-#data
-<div bar='ZZ&'></div>
-#errors
-(1,15): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       bar="ZZ&"
-
-#data
-<div bar=ZZ&></div>
-#errors
-(1,13): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       bar="ZZ&"
-
-#data
-<div bar="ZZ&gt=YY"></div>
-#errors
-(1,15): named-entity-without-semicolon
-(1,20): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       bar="ZZ&gt=YY"
-
-#data
-<div bar="ZZ&gt0YY"></div>
-#errors
-(1,20): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       bar="ZZ&gt0YY"
-
-#data
-<div bar="ZZ&gt9YY"></div>
-#errors
-(1,20): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       bar="ZZ&gt9YY"
-
-#data
-<div bar="ZZ&gtaYY"></div>
-#errors
-(1,20): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       bar="ZZ&gtaYY"
-
-#data
-<div bar="ZZ&gtZYY"></div>
-#errors
-(1,20): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       bar="ZZ&gtZYY"
-
-#data
-<div bar="ZZ&gt YY"></div>
-#errors
-(1,15): named-entity-without-semicolon
-(1,20): expected-doctype-but-got-start-tag
-#new-errors
-(1:16) missing-semicolon-after-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       bar="ZZ> YY"
-
-#data
-<div bar="ZZ&gt"></div>
-#errors
-(1,15): named-entity-without-semicolon
-(1,17): expected-doctype-but-got-start-tag
-#new-errors
-(1:16) missing-semicolon-after-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       bar="ZZ>"
-
-#data
-<div bar='ZZ&gt'></div>
-#errors
-(1,15): named-entity-without-semicolon
-(1,17): expected-doctype-but-got-start-tag
-#new-errors
-(1:16) missing-semicolon-after-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       bar="ZZ>"
-
-#data
-<div bar=ZZ&gt></div>
-#errors
-(1,14): named-entity-without-semicolon
-(1,15): expected-doctype-but-got-start-tag
-#new-errors
-(1:15) missing-semicolon-after-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       bar="ZZ>"
-
-#data
-<div bar="ZZ&pound_id=23"></div>
-#errors
-(1,18): named-entity-without-semicolon
-(1,26): expected-doctype-but-got-start-tag
-#new-errors
-(1:19) missing-semicolon-after-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       bar="ZZ£_id=23"
-
-#data
-<div bar="ZZ&prod_id=23"></div>
-#errors
-(1,25): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       bar="ZZ&prod_id=23"
-
-#data
-<div bar="ZZ&pound;_id=23"></div>
-#errors
-(1,27): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       bar="ZZ£_id=23"
-
-#data
-<div bar="ZZ&prod;_id=23"></div>
-#errors
-(1,26): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       bar="ZZ∏_id=23"
-
-#data
-<div bar="ZZ&pound=23"></div>
-#errors
-(1,18): named-entity-without-semicolon
-(1,23): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       bar="ZZ&pound=23"
-
-#data
-<div bar="ZZ&prod=23"></div>
-#errors
-(1,22): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       bar="ZZ&prod=23"
-
-#data
-<div>ZZ&pound_id=23</div>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,13): named-entity-without-semicolon
-#new-errors
-(1:14) missing-semicolon-after-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       "ZZ£_id=23"
-
-#data
-<div>ZZ&prod_id=23</div>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       "ZZ&prod_id=23"
-
-#data
-<div>ZZ&pound;_id=23</div>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       "ZZ£_id=23"
-
-#data
-<div>ZZ&prod;_id=23</div>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       "ZZ∏_id=23"
-
-#data
-<div>ZZ&pound=23</div>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,13): named-entity-without-semicolon
-#new-errors
-(1:14) missing-semicolon-after-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       "ZZ£=23"
-
-#data
-<div>ZZ&prod=23</div>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       "ZZ&prod=23"
-
-#data
-<div>ZZ&AElig=</div>
-#errors
-#new-errors
-(1:14) missing-semicolon-after-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       "ZZÆ="
diff --git a/html/testdata/webkit/foreign-fragment.dat b/html/testdata/webkit/foreign-fragment.dat
deleted file mode 100644 (file)
index c81ae81..0000000
+++ /dev/null
@@ -1,559 +0,0 @@
-#data
-<nobr>X
-#errors
-6: HTML start tag “nobr” in a foreign namespace context.
-7: End of file seen and there were open elements.
-6: Unclosed element “nobr”.
-#document-fragment
-svg path
-#document
-| <svg nobr>
-|   "X"
-
-#data
-<font color></font>X
-#errors
-12: HTML start tag “font” in a foreign namespace context.
-#document-fragment
-svg path
-#document
-| <svg font>
-|   color=""
-| "X"
-
-#data
-<font></font>X
-#errors
-#document-fragment
-svg path
-#document
-| <svg font>
-| "X"
-
-#data
-<g></path>X
-#errors
-10: End tag “path” did not match the name of the current open element (“g”).
-11: End of file seen and there were open elements.
-3: Unclosed element “g”.
-#document-fragment
-svg path
-#document
-| <svg g>
-|   "X"
-
-#data
-</path>X
-#errors
-5: Stray end tag “path”.
-#document-fragment
-svg path
-#document
-| "X"
-
-#data
-</foreignObject>X
-#errors
-5: Stray end tag “foreignobject”.
-#document-fragment
-svg foreignObject
-#document
-| "X"
-
-#data
-</desc>X
-#errors
-5: Stray end tag “desc”.
-#document-fragment
-svg desc
-#document
-| "X"
-
-#data
-</title>X
-#errors
-5: Stray end tag “title”.
-#document-fragment
-svg title
-#document
-| "X"
-
-#data
-</svg>X
-#errors
-5: Stray end tag “svg”.
-#document-fragment
-svg svg
-#document
-| "X"
-
-#data
-</mfenced>X
-#errors
-5: Stray end tag “mfenced”.
-#document-fragment
-math mfenced
-#document
-| "X"
-
-#data
-</malignmark>X
-#errors
-5: Stray end tag “malignmark”.
-#document-fragment
-math malignmark
-#document
-| "X"
-
-#data
-</math>X
-#errors
-5: Stray end tag “math”.
-#document-fragment
-math math
-#document
-| "X"
-
-#data
-</annotation-xml>X
-#errors
-5: Stray end tag “annotation-xml”.
-#document-fragment
-math annotation-xml
-#document
-| "X"
-
-#data
-</mtext>X
-#errors
-5: Stray end tag “mtext”.
-#document-fragment
-math mtext
-#document
-| "X"
-
-#data
-</mi>X
-#errors
-5: Stray end tag “mi”.
-#document-fragment
-math mi
-#document
-| "X"
-
-#data
-</mo>X
-#errors
-5: Stray end tag “mo”.
-#document-fragment
-math mo
-#document
-| "X"
-
-#data
-</mn>X
-#errors
-5: Stray end tag “mn”.
-#document-fragment
-math mn
-#document
-| "X"
-
-#data
-</ms>X
-#errors
-5: Stray end tag “ms”.
-#document-fragment
-math ms
-#document
-| "X"
-
-#data
-<b></b><mglyph/><i></i><malignmark/><u></u><ms/>X
-#errors
-51: Self-closing syntax (“/>”) used on a non-void HTML element. Ignoring the slash and treating as a start tag.
-52: End of file seen and there were open elements.
-51: Unclosed element “ms”.
-#new-errors
-(1:44-1:49) non-void-html-element-start-tag-with-trailing-solidus
-#document-fragment
-math ms
-#document
-| <b>
-| <math mglyph>
-| <i>
-| <math malignmark>
-| <u>
-| <ms>
-|   "X"
-
-#data
-<malignmark></malignmark>
-#errors
-#document-fragment
-math ms
-#document
-| <math malignmark>
-
-#data
-<div></div>
-#errors
-#document-fragment
-math ms
-#document
-| <div>
-
-#data
-<figure></figure>
-#errors
-#document-fragment
-math ms
-#document
-| <figure>
-
-#data
-<b></b><mglyph/><i></i><malignmark/><u></u><mn/>X
-#errors
-51: Self-closing syntax (“/>”) used on a non-void HTML element. Ignoring the slash and treating as a start tag.
-52: End of file seen and there were open elements.
-51: Unclosed element “mn”.
-#new-errors
-(1:44-1:49) non-void-html-element-start-tag-with-trailing-solidus
-#document-fragment
-math mn
-#document
-| <b>
-| <math mglyph>
-| <i>
-| <math malignmark>
-| <u>
-| <mn>
-|   "X"
-
-#data
-<malignmark></malignmark>
-#errors
-#document-fragment
-math mn
-#document
-| <math malignmark>
-
-#data
-<div></div>
-#errors
-#document-fragment
-math mn
-#document
-| <div>
-
-#data
-<figure></figure>
-#errors
-#document-fragment
-math mn
-#document
-| <figure>
-
-#data
-<b></b><mglyph/><i></i><malignmark/><u></u><mo/>X
-#errors
-51: Self-closing syntax (“/>”) used on a non-void HTML element. Ignoring the slash and treating as a start tag.
-52: End of file seen and there were open elements.
-51: Unclosed element “mo”.
-#new-errors
-(1:44-1:49) non-void-html-element-start-tag-with-trailing-solidus
-#document-fragment
-math mo
-#document
-| <b>
-| <math mglyph>
-| <i>
-| <math malignmark>
-| <u>
-| <mo>
-|   "X"
-
-#data
-<malignmark></malignmark>
-#errors
-#document-fragment
-math mo
-#document
-| <math malignmark>
-
-#data
-<div></div>
-#errors
-#document-fragment
-math mo
-#document
-| <div>
-
-#data
-<figure></figure>
-#errors
-#document-fragment
-math mo
-#document
-| <figure>
-
-#data
-<b></b><mglyph/><i></i><malignmark/><u></u><mi/>X
-#errors
-51: Self-closing syntax (“/>”) used on a non-void HTML element. Ignoring the slash and treating as a start tag.
-52: End of file seen and there were open elements.
-51: Unclosed element “mi”.
-#new-errors
-(1:44-1:49) non-void-html-element-start-tag-with-trailing-solidus
-#document-fragment
-math mi
-#document
-| <b>
-| <math mglyph>
-| <i>
-| <math malignmark>
-| <u>
-| <mi>
-|   "X"
-
-#data
-<malignmark></malignmark>
-#errors
-#document-fragment
-math mi
-#document
-| <math malignmark>
-
-#data
-<div></div>
-#errors
-#document-fragment
-math mi
-#document
-| <div>
-
-#data
-<figure></figure>
-#errors
-#document-fragment
-math mi
-#document
-| <figure>
-
-#data
-<b></b><mglyph/><i></i><malignmark/><u></u><mtext/>X
-#errors
-51: Self-closing syntax (“/>”) used on a non-void HTML element. Ignoring the slash and treating as a start tag.
-52: End of file seen and there were open elements.
-51: Unclosed element “mtext”.
-#new-errors
-(1:44-1:52) non-void-html-element-start-tag-with-trailing-solidus
-#document-fragment
-math mtext
-#document
-| <b>
-| <math mglyph>
-| <i>
-| <math malignmark>
-| <u>
-| <mtext>
-|   "X"
-
-#data
-<malignmark></malignmark>
-#errors
-#document-fragment
-math mtext
-#document
-| <math malignmark>
-
-#data
-<div></div>
-#errors
-#document-fragment
-math mtext
-#document
-| <div>
-
-#data
-<figure></figure>
-#errors
-#document-fragment
-math mtext
-#document
-| <figure>
-
-#data
-<div></div>
-#errors
-5: HTML start tag “div” in a foreign namespace context.
-#document-fragment
-math annotation-xml
-#document
-| <math div>
-
-#data
-<figure></figure>
-#errors
-#document-fragment
-math annotation-xml
-#document
-| <math figure>
-
-#data
-<div></div>
-#errors
-5: HTML start tag “div” in a foreign namespace context.
-#document-fragment
-math math
-#document
-| <math div>
-
-#data
-<figure></figure>
-#errors
-#document-fragment
-math math
-#document
-| <math figure>
-
-#data
-<div></div>
-#errors
-#document-fragment
-svg foreignObject
-#document
-| <div>
-
-#data
-<figure></figure>
-#errors
-#document-fragment
-svg foreignObject
-#document
-| <figure>
-
-#data
-<div></div>
-#errors
-#document-fragment
-svg title
-#document
-| <div>
-
-#data
-<figure></figure>
-#errors
-#document-fragment
-svg title
-#document
-| <figure>
-
-#data
-<figure></figure>
-#errors
-#document-fragment
-svg desc
-#document
-| <figure>
-
-#data
-<div><h1>X</h1></div>
-#errors
-5: HTML start tag “div” in a foreign namespace context.
-9: HTML start tag “h1” in a foreign namespace context.
-#document-fragment
-svg svg
-#document
-| <svg div>
-|   <svg h1>
-|     "X"
-
-#data
-<div></div>
-#errors
-5: HTML start tag “div” in a foreign namespace context.
-#document-fragment
-svg svg
-#document
-| <svg div>
-
-#data
-<div></div>
-#errors
-#document-fragment
-svg desc
-#document
-| <div>
-
-#data
-<figure></figure>
-#errors
-#document-fragment
-svg desc
-#document
-| <figure>
-
-#data
-<plaintext><foo>
-#errors
-(1,16): expected-closing-tag-but-got-eof
-#document-fragment
-svg desc
-#document
-| <plaintext>
-|   "<foo>"
-
-#data
-<frameset>X
-#errors
-6: Stray start tag “frameset”.
-#document-fragment
-svg desc
-#document
-| "X"
-
-#data
-<head>X
-#errors
-6: Stray start tag “head”.
-#document-fragment
-svg desc
-#document
-| "X"
-
-#data
-<body>X
-#errors
-6: Stray start tag “body”.
-#document-fragment
-svg desc
-#document
-| "X"
-
-#data
-<html>X
-#errors
-6: Stray start tag “html”.
-#document-fragment
-svg desc
-#document
-| "X"
-
-#data
-<html class="foo">X
-#errors
-6: Stray start tag “html”.
-#document-fragment
-svg desc
-#document
-| "X"
-
-#data
-<body class="foo">X
-#errors
-6: Stray start tag “body”.
-#document-fragment
-svg desc
-#document
-| "X"
diff --git a/html/testdata/webkit/html5test-com.dat b/html/testdata/webkit/html5test-com.dat
deleted file mode 100644 (file)
index f738010..0000000
+++ /dev/null
@@ -1,302 +0,0 @@
-#data
-<div<div>
-#errors
-(1,9): expected-doctype-but-got-start-tag
-(1,9): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div<div>
-
-#data
-<div foo<bar=''>
-#errors
-(1,9): invalid-character-in-attribute-name
-(1,16): expected-doctype-but-got-start-tag
-(1,16): expected-closing-tag-but-got-eof
-#new-errors
-(1:9) unexpected-character-in-attribute-name
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       foo<bar=""
-
-#data
-<div foo=`bar`>
-#errors
-(1,10): equals-in-unquoted-attribute-value
-(1,14): unexpected-character-in-unquoted-attribute-value
-(1,15): expected-doctype-but-got-start-tag
-(1,15): expected-closing-tag-but-got-eof
-#new-errors
-(1:10) unexpected-character-in-unquoted-attribute-value
-(1:14) unexpected-character-in-unquoted-attribute-value
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       foo="`bar`"
-
-#data
-<div \"foo=''>
-#errors
-(1,7): invalid-character-in-attribute-name
-(1,14): expected-doctype-but-got-start-tag
-(1,14): expected-closing-tag-but-got-eof
-#new-errors
-(1:7) unexpected-character-in-attribute-name
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       \"foo=""
-
-#data
-<a href='\nbar'></a>
-#errors
-(1,16): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <a>
-|       href="\nbar"
-
-#data
-<!DOCTYPE html>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-
-#data
-&lang;&rang;
-#errors
-(1,6): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "⟨⟩"
-
-#data
-&apos;
-#errors
-(1,6): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "'"
-
-#data
-&ImaginaryI;
-#errors
-(1,12): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "ⅈ"
-
-#data
-&Kopf;
-#errors
-(1,6): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "𝕂"
-
-#data
-&notinva;
-#errors
-(1,9): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "∉"
-
-#data
-<?import namespace="foo" implementation="#bar">
-#errors
-(1,1): expected-tag-name-but-got-question-mark
-(1,47): expected-doctype-but-got-eof
-#new-errors
-(1:2) unexpected-question-mark-instead-of-tag-name
-#document
-| <!-- ?import namespace="foo" implementation="#bar" -->
-| <html>
-|   <head>
-|   <body>
-
-#data
-<!--foo--bar-->
-#errors
-(1,10): unexpected-char-in-comment
-(1,15): expected-doctype-but-got-eof
-#document
-| <!-- foo--bar -->
-| <html>
-|   <head>
-|   <body>
-
-#data
-<![CDATA[x]]>
-#errors
-(1,2): expected-dashes-or-doctype
-(1,13): expected-doctype-but-got-eof
-#new-errors
-(1:9) cdata-in-html-content
-#document
-| <!-- [CDATA[x]] -->
-| <html>
-|   <head>
-|   <body>
-
-#data
-<textarea><!--</textarea>--></textarea>
-#errors
-(1,10): expected-doctype-but-got-start-tag
-(1,39): unexpected-end-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <textarea>
-|       "<!--"
-|     "-->"
-
-#data
-<textarea><!--</textarea>-->
-#errors
-(1,10): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <textarea>
-|       "<!--"
-|     "-->"
-
-#data
-<style><!--</style>--></style>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,30): unexpected-end-tag
-#document
-| <html>
-|   <head>
-|     <style>
-|       "<!--"
-|   <body>
-|     "-->"
-
-#data
-<style><!--</style>-->
-#errors
-(1,7): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|     <style>
-|       "<!--"
-|   <body>
-|     "-->"
-
-#data
-<ul><li>A </li> <li>B</li></ul>
-#errors
-(1,4): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <ul>
-|       <li>
-|         "A "
-|       " "
-|       <li>
-|         "B"
-
-#data
-<table><form><input type=hidden><input></form><div></div></table>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,13): unexpected-form-in-table
-(1,32): unexpected-hidden-input-in-table
-(1,39): unexpected-start-tag-implies-table-voodoo
-(1,46): unexpected-end-tag-implies-table-voodoo
-(1,46): unexpected-end-tag
-(1,51): unexpected-start-tag-implies-table-voodoo
-(1,57): unexpected-end-tag-implies-table-voodoo
-#document
-| <html>
-|   <head>
-|   <body>
-|     <input>
-|     <div>
-|     <table>
-|       <form>
-|       <input>
-|         type="hidden"
-
-#data
-<i>A<b>B<p></i>C</b>D
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,15): adoption-agency-1.3
-(1,20): adoption-agency-1.3
-#document
-| <html>
-|   <head>
-|   <body>
-|     <i>
-|       "A"
-|       <b>
-|         "B"
-|     <b>
-|     <p>
-|       <b>
-|         <i>
-|         "C"
-|       "D"
-
-#data
-<div></div>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-
-#data
-<svg></svg>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-
-#data
-<math></math>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <math math>
diff --git a/html/testdata/webkit/inbody01.dat b/html/testdata/webkit/inbody01.dat
deleted file mode 100644 (file)
index 10f6520..0000000
+++ /dev/null
@@ -1,54 +0,0 @@
-#data
-<button>1</foo>
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,15): unexpected-end-tag
-(1,15): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <button>
-|       "1"
-
-#data
-<foo>1<p>2</foo>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,16): unexpected-end-tag
-(1,16): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <foo>
-|       "1"
-|       <p>
-|         "2"
-
-#data
-<dd>1</foo>
-#errors
-(1,4): expected-doctype-but-got-start-tag
-(1,11): unexpected-end-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <dd>
-|       "1"
-
-#data
-<foo>1<dd>2</foo>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,17): unexpected-end-tag
-(1,17): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <foo>
-|       "1"
-|       <dd>
-|         "2"
diff --git a/html/testdata/webkit/isindex.dat b/html/testdata/webkit/isindex.dat
deleted file mode 100644 (file)
index 733f82e..0000000
+++ /dev/null
@@ -1,49 +0,0 @@
-#data
-<isindex>
-#errors
-(1,9): expected-doctype-but-got-start-tag
-(1,9): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <isindex>
-
-#data
-<isindex name="A" action="B" prompt="C" foo="D">
-#errors
-(1,48): expected-doctype-but-got-start-tag
-(1,48): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <isindex>
-|       action="B"
-|       foo="D"
-|       name="A"
-|       prompt="C"
-
-#data
-<form><isindex>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-(1,15): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <form>
-|       <isindex>
-
-#data
-<!doctype html><isindex>x</isindex>x
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <isindex>
-|       "x"
-|     "x"
diff --git a/html/testdata/webkit/main-element.dat b/html/testdata/webkit/main-element.dat
deleted file mode 100644 (file)
index 4b103bb..0000000
+++ /dev/null
@@ -1,46 +0,0 @@
-#data
-<!doctype html><p>foo<main>bar<p>baz
-#errors
-(1,36): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       "foo"
-|     <main>
-|       "bar"
-|       <p>
-|         "baz"
-
-#data
-<!doctype html><main><p>foo</main>bar
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <main>
-|       <p>
-|         "foo"
-|     "bar"
-
-#data
-<!DOCTYPE html>xxx<svg><x><g><a><main><b>
-#errors
- * (1,42) unexpected HTML-like start tag token in foreign content
- * (1,42) unexpected end of file
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     "xxx"
-|     <svg svg>
-|       <svg x>
-|         <svg g>
-|           <svg a>
-|             <svg main>
-|     <b>
diff --git a/html/testdata/webkit/math.dat b/html/testdata/webkit/math.dat
deleted file mode 100644 (file)
index ae9cd7c..0000000
+++ /dev/null
@@ -1,81 +0,0 @@
-#data
-<math><tr><td><mo><tr>
-#errors
-#document-fragment
-td
-#document
-| <math math>
-|   <math tr>
-|     <math td>
-|       <math mo>
-
-#data
-<math><tr><td><mo><tr>
-#errors
-#document-fragment
-tr
-#document
-| <math math>
-|   <math tr>
-|     <math td>
-|       <math mo>
-
-#data
-<math><thead><mo><tbody>
-#errors
-#document-fragment
-thead
-#document
-| <math math>
-|   <math thead>
-|     <math mo>
-
-#data
-<math><tfoot><mo><tbody>
-#errors
-#document-fragment
-tfoot
-#document
-| <math math>
-|   <math tfoot>
-|     <math mo>
-
-#data
-<math><tbody><mo><tfoot>
-#errors
-#document-fragment
-tbody
-#document
-| <math math>
-|   <math tbody>
-|     <math mo>
-
-#data
-<math><tbody><mo></table>
-#errors
-#document-fragment
-tbody
-#document
-| <math math>
-|   <math tbody>
-|     <math mo>
-
-#data
-<math><thead><mo></table>
-#errors
-#document-fragment
-tbody
-#document
-| <math math>
-|   <math thead>
-|     <math mo>
-
-#data
-<math><tfoot><mo></table>
-#errors
-#document-fragment
-tbody
-#document
-| <math math>
-|   <math tfoot>
-|     <math mo>
diff --git a/html/testdata/webkit/menuitem-element.dat b/html/testdata/webkit/menuitem-element.dat
deleted file mode 100644 (file)
index 43aa0c6..0000000
+++ /dev/null
@@ -1,257 +0,0 @@
-#data
-<menuitem>
-#errors
-10: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
-10: End of file seen and there were open elements.
-10: Unclosed element “menuitem”.
-#document
-| <html>
-|   <head>
-|   <body>
-|     <menuitem>
-
-#data
-</menuitem>
-#errors
-11: End tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
-11: Stray end tag “menuitem”.
-#document
-| <html>
-|   <head>
-|   <body>
-
-#data
-<!DOCTYPE html><body><menuitem>A
-#errors
-32: End of file seen and there were open elements.
-31: Unclosed element “menuitem”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <menuitem>
-|       "A"
-
-#data
-<!DOCTYPE html><body><menuitem>A<menuitem>B
-#errors
-43: End of file seen and there were open elements.
-42: Unclosed element “menuitem”.
-31: Unclosed element “menuitem”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <menuitem>
-|       "A"
-|       <menuitem>
-|         "B"
-
-#data
-<!DOCTYPE html><body><menuitem>A<menu>B</menu>
-#errors
-46: End of file seen and there were open elements.
-31: Unclosed element “menuitem”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <menuitem>
-|       "A"
-|       <menu>
-|         "B"
-
-#data
-<!DOCTYPE html><body><menuitem>A<hr>B
-#errors
-37: End of file seen and there were open elements.
-31: Unclosed element “menuitem”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <menuitem>
-|       "A"
-|       <hr>
-|       "B"
-
-#data
-<!DOCTYPE html><li><menuitem><li>
-#errors
-33: End tag “li” implied, but there were open elements.
-29: Unclosed element “menuitem”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <li>
-|       <menuitem>
-|     <li>
-
-#data
-<!DOCTYPE html><menuitem><p></menuitem>x
-#errors
-39: Stray end tag “menuitem”.
-40: End of file seen and there were open elements.
-25: Unclosed element “menuitem”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <menuitem>
-|       <p>
-|         "x"
-
-#data
-<!DOCTYPE html><p><b></p><menuitem>
-#errors
-25: End tag “p” seen, but there were open elements.
-21: Unclosed element “b”.
-35: End of file seen and there were open elements.
-35: Unclosed element “menuitem”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <b>
-|     <b>
-|       <menuitem>
-
-#data
-<!DOCTYPE html><menuitem><asdf></menuitem>x
-#errors
-42: End tag “menuitem” seen, but there were open elements.
-31: Unclosed element “asdf”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <menuitem>
-|       <asdf>
-|     "x"
-
-#data
-<!DOCTYPE html></menuitem>
-#errors
-26: Stray end tag “menuitem”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-
-#data
-<!DOCTYPE html><html></menuitem>
-#errors
-26: Stray end tag “menuitem”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-
-#data
-<!DOCTYPE html><head></menuitem>
-#errors
-26: Stray end tag “menuitem”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-
-#data
-<!DOCTYPE html><select><menuitem></select>
-#errors
-33: Stray start tag “menuitem”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <select>
-
-#data
-<!DOCTYPE html><option><menuitem>
-#errors
-33: End of file seen and there were open elements.
-33: Unclosed element “menuitem”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <option>
-|       <menuitem>
-
-#data
-<!DOCTYPE html><menuitem><option>
-#errors
-33: End of file seen and there were open elements.
-25: Unclosed element “menuitem”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <menuitem>
-|       <option>
-
-#data
-<!DOCTYPE html><menuitem></body>
-#errors
-32: End tag for  “body” seen, but there were unclosed elements.
-25: Unclosed element “menuitem”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <menuitem>
-
-#data
-<!DOCTYPE html><menuitem></html>
-#errors
-32: End tag for  “html” seen, but there were unclosed elements.
-25: Unclosed element “menuitem”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <menuitem>
-
-#data
-<!DOCTYPE html><menuitem><p>
-#errors
-28: End of file seen and there were open elements.
-25: Unclosed element “menuitem”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <menuitem>
-|       <p>
-
-#data
-<!DOCTYPE html><menuitem><li>
-#errors
-29: End of file seen and there were open elements.
-25: Unclosed element “menuitem”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <menuitem>
-|       <li>
diff --git a/html/testdata/webkit/namespace-sensitivity.dat b/html/testdata/webkit/namespace-sensitivity.dat
deleted file mode 100644 (file)
index ca35c0e..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-#data
-<body><table><tr><td><svg><td><foreignObject><span></td>Foo
-#errors
-#document
-| <html>
-|   <head>
-|   <body>
-|     "Foo"
-|     <table>
-|       <tbody>
-|         <tr>
-|           <td>
-|             <svg svg>
-|               <svg td>
-|                 <svg foreignObject>
-|                   <span>
diff --git a/html/testdata/webkit/noscript01.dat b/html/testdata/webkit/noscript01.dat
deleted file mode 100644 (file)
index ec3496c..0000000
+++ /dev/null
@@ -1,237 +0,0 @@
-#data
-<head><noscript><!doctype html><!--foo--></noscript>
-#errors
-Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
-Line: 1 Col: 31 Unexpected DOCTYPE. Ignored.
-#script-off
-#document
-| <html>
-|   <head>
-|     <noscript>
-|       <!-- foo -->
-|   <body>
-
-#data
-<head><noscript><html class="foo"><!--foo--></noscript>
-#errors
-Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
-Line: 1 Col: 34 html needs to be the first start tag.
-#script-off
-#document
-| <html>
-|   class="foo"
-|   <head>
-|     <noscript>
-|       <!-- foo -->
-|   <body>
-
-#data
-<head><noscript></noscript>
-#errors
-(1,6): expected-doctype-but-got-tag
-#script-off
-#document
-| <html>
-|   <head>
-|     <noscript>
-|   <body>
-
-#data
-<head><noscript>   </noscript>
-#errors
-Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
-#script-off
-#document
-| <html>
-|   <head>
-|     <noscript>
-|       "   "
-|   <body>
-
-#data
-<head><noscript><!--foo--></noscript>
-#errors
-(1,6): expected-doctype-but-got-tag
-#script-off
-#document
-| <html>
-|   <head>
-|     <noscript>
-|       <!-- foo -->
-|   <body>
-
-#data
-<head><noscript><basefont><!--foo--></noscript>
-#errors
-Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
-#script-off
-#document
-| <html>
-|   <head>
-|     <noscript>
-|       <basefont>
-|       <!-- foo -->
-|   <body>
-
-#data
-<head><noscript><bgsound><!--foo--></noscript>
-#errors
-Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
-#script-off
-#document
-| <html>
-|   <head>
-|     <noscript>
-|       <bgsound>
-|       <!-- foo -->
-|   <body>
-
-#data
-<head><noscript><link><!--foo--></noscript>
-#errors
-Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
-#script-off
-#document
-| <html>
-|   <head>
-|     <noscript>
-|       <link>
-|       <!-- foo -->
-|   <body>
-
-#data
-<head><noscript><meta><!--foo--></noscript>
-#errors
-Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
-#script-off
-#document
-| <html>
-|   <head>
-|     <noscript>
-|       <meta>
-|       <!-- foo -->
-|   <body>
-
-#data
-<head><noscript><noframes>XXX</noscript></noframes></noscript>
-#errors
-Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
-#script-off
-#document
-| <html>
-|   <head>
-|     <noscript>
-|       <noframes>
-|         "XXX</noscript>"
-|   <body>
-
-#data
-<head><noscript><style>XXX</style></noscript>
-#errors
-Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
-#script-off
-#document
-| <html>
-|   <head>
-|     <noscript>
-|       <style>
-|         "XXX"
-|   <body>
-
-#data
-<head><noscript></br><!--foo--></noscript>
-#errors
-Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
-Line: 1 Col: 21 Element br not allowed in a inhead-noscript context
-Line: 1 Col: 21 Unexpected end tag (br). Treated as br element.
-Line: 1 Col: 42 Unexpected end tag (noscript). Ignored.
-#script-off
-#document
-| <html>
-|   <head>
-|     <noscript>
-|   <body>
-|     <br>
-|     <!-- foo -->
-
-#data
-<head><noscript><head class="foo"><!--foo--></noscript>
-#errors
-Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
-Line: 1 Col: 34 Unexpected start tag (head).
-#script-off
-#document
-| <html>
-|   <head>
-|     <noscript>
-|       <!-- foo -->
-|   <body>
-
-#data
-<head><noscript><noscript class="foo"><!--foo--></noscript>
-#errors
-Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
-Line: 1 Col: 34 Unexpected start tag (noscript).
-#script-off
-#document
-| <html>
-|   <head>
-|     <noscript>
-|       <!-- foo -->
-|   <body>
-
-#data
-<head><noscript></p><!--foo--></noscript>
-#errors
-Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
-Line: 1 Col: 20 Unexpected end tag (p). Ignored.
-#script-off
-#document
-| <html>
-|   <head>
-|     <noscript>
-|       <!-- foo -->
-|   <body>
-
-#data
-<head><noscript><p><!--foo--></noscript>
-#errors
-Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
-Line: 1 Col: 19 Element p not allowed in a inhead-noscript context
-Line: 1 Col: 40 Unexpected end tag (noscript). Ignored.
-#script-off
-#document
-| <html>
-|   <head>
-|     <noscript>
-|   <body>
-|     <p>
-|       <!-- foo -->
-
-#data
-<head><noscript>XXX<!--foo--></noscript></head>
-#errors
-Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
-Line: 1 Col: 19 Unexpected non-space character. Expected inhead-noscript content
-Line: 1 Col: 30 Unexpected end tag (noscript). Ignored.
-Line: 1 Col: 37 Unexpected end tag (head). Ignored.
-#script-off
-#document
-| <html>
-|   <head>
-|     <noscript>
-|   <body>
-|     "XXX"
-|     <!-- foo -->
-
-#data
-<head><noscript>
-#errors
-(1,6): expected-doctype-but-got-tag
-(1,6): eof-in-head-noscript
-#script-off
-#document
-| <html>
-|   <head>
-|     <noscript>
-|   <body>
diff --git a/html/testdata/webkit/pending-spec-changes-plain-text-unsafe.dat b/html/testdata/webkit/pending-spec-changes-plain-text-unsafe.dat
deleted file mode 100644 (file)
index 2c546d4..0000000
Binary files a/html/testdata/webkit/pending-spec-changes-plain-text-unsafe.dat and /dev/null differ
diff --git a/html/testdata/webkit/pending-spec-changes.dat b/html/testdata/webkit/pending-spec-changes.dat
deleted file mode 100644 (file)
index 1647d7f..0000000
+++ /dev/null
@@ -1,46 +0,0 @@
-#data
-<input type="hidden"><frameset>
-#errors
-(1,21): expected-doctype-but-got-start-tag
-(1,31): unexpected-start-tag
-(1,31): eof-in-frameset
-#document
-| <html>
-|   <head>
-|   <frameset>
-
-#data
-<!DOCTYPE html><table><caption><svg>foo</table>bar
-#errors
-(1,47): unexpected-end-tag
-(1,47): end-table-tag-in-caption
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <caption>
-|         <svg svg>
-|           "foo"
-|     "bar"
-
-#data
-<table><tr><td><svg><desc><td></desc><circle>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,30): unexpected-cell-end-tag
-(1,37): unexpected-end-tag
-(1,45): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-|         <tr>
-|           <td>
-|             <svg svg>
-|               <svg desc>
-|           <td>
-|             <circle>
diff --git a/html/testdata/webkit/plain-text-unsafe.dat b/html/testdata/webkit/plain-text-unsafe.dat
deleted file mode 100644 (file)
index dfb5cb6..0000000
Binary files a/html/testdata/webkit/plain-text-unsafe.dat and /dev/null differ
diff --git a/html/testdata/webkit/ruby.dat b/html/testdata/webkit/ruby.dat
deleted file mode 100644 (file)
index 696782f..0000000
+++ /dev/null
@@ -1,301 +0,0 @@
-#data
-<html><ruby>a<rb>b<rb></ruby></html>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <ruby>
-|       "a"
-|       <rb>
-|         "b"
-|       <rb>
-
-#data
-<html><ruby>a<rb>b<rt></ruby></html>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <ruby>
-|       "a"
-|       <rb>
-|         "b"
-|       <rt>
-
-#data
-<html><ruby>a<rb>b<rtc></ruby></html>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <ruby>
-|       "a"
-|       <rb>
-|         "b"
-|       <rtc>
-
-#data
-<html><ruby>a<rb>b<rp></ruby></html>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <ruby>
-|       "a"
-|       <rb>
-|         "b"
-|       <rp>
-
-#data
-<html><ruby>a<rb>b<span></ruby></html>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-(1,31): unexpected-end-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <ruby>
-|       "a"
-|       <rb>
-|         "b"
-|         <span>
-
-#data
-<html><ruby>a<rt>b<rb></ruby></html>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <ruby>
-|       "a"
-|       <rt>
-|         "b"
-|       <rb>
-
-#data
-<html><ruby>a<rt>b<rt></ruby></html>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <ruby>
-|       "a"
-|       <rt>
-|         "b"
-|       <rt>
-
-#data
-<html><ruby>a<rt>b<rtc></ruby></html>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <ruby>
-|       "a"
-|       <rt>
-|         "b"
-|       <rtc>
-
-#data
-<html><ruby>a<rt>b<rp></ruby></html>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <ruby>
-|       "a"
-|       <rt>
-|         "b"
-|       <rp>
-
-#data
-<html><ruby>a<rt>b<span></ruby></html>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-(1,31): unexpected-end-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <ruby>
-|       "a"
-|       <rt>
-|         "b"
-|         <span>
-
-#data
-<html><ruby>a<rtc>b<rb></ruby></html>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <ruby>
-|       "a"
-|       <rtc>
-|         "b"
-|       <rb>
-
-#data
-<html><ruby>a<rtc>b<rt>c<rt>d</ruby></html>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <ruby>
-|       "a"
-|       <rtc>
-|         "b"
-|         <rt>
-|           "c"
-|         <rt>
-|           "d"
-
-#data
-<html><ruby>a<rtc>b<rtc></ruby></html>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <ruby>
-|       "a"
-|       <rtc>
-|         "b"
-|       <rtc>
-
-#data
-<html><ruby>a<rtc>b<rp></ruby></html>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <ruby>
-|       "a"
-|       <rtc>
-|         "b"
-|         <rp>
-
-#data
-<html><ruby>a<rtc>b<span></ruby></html>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <ruby>
-|       "a"
-|       <rtc>
-|         "b"
-|         <span>
-
-#data
-<html><ruby>a<rp>b<rb></ruby></html>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <ruby>
-|       "a"
-|       <rp>
-|         "b"
-|       <rb>
-
-#data
-<html><ruby>a<rp>b<rt></ruby></html>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <ruby>
-|       "a"
-|       <rp>
-|         "b"
-|       <rt>
-
-#data
-<html><ruby>a<rp>b<rtc></ruby></html>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <ruby>
-|       "a"
-|       <rp>
-|         "b"
-|       <rtc>
-
-#data
-<html><ruby>a<rp>b<rp></ruby></html>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <ruby>
-|       "a"
-|       <rp>
-|         "b"
-|       <rp>
-
-#data
-<html><ruby>a<rp>b<span></ruby></html>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-(1,31): unexpected-end-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <ruby>
-|       "a"
-|       <rp>
-|         "b"
-|         <span>
-
-#data
-<html><ruby><rtc><ruby>a<rb>b<rt></ruby></ruby></html>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <ruby>
-|       <rtc>
-|         <ruby>
-|           "a"
-|           <rb>
-|             "b"
-|           <rt>
diff --git a/html/testdata/webkit/scriptdata01.dat b/html/testdata/webkit/scriptdata01.dat
deleted file mode 100644 (file)
index e570858..0000000
+++ /dev/null
@@ -1,385 +0,0 @@
-#data
-FOO<script>'Hello'</script>BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <script>
-|       "'Hello'"
-|     "BAR"
-
-#data
-FOO<script></script>BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <script>
-|     "BAR"
-
-#data
-FOO<script></script >BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <script>
-|     "BAR"
-
-#data
-FOO<script></script/>BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,21): self-closing-flag-on-end-tag
-#new-errors
-(1:21) end-tag-with-trailing-solidus
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <script>
-|     "BAR"
-
-#data
-FOO<script></script/ >BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,20): unexpected-character-after-solidus-in-tag
-#new-errors
-(1:21) unexpected-solidus-in-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <script>
-|     "BAR"
-
-#data
-FOO<script type="text/plain"></scriptx>BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,42): expected-named-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <script>
-|       type="text/plain"
-|       "</scriptx>BAR"
-
-#data
-FOO<script></script foo=">" dd>BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,31): attributes-in-end-tag
-#new-errors
-(1:31) end-tag-with-attributes
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <script>
-|     "BAR"
-
-#data
-FOO<script>'<'</script>BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <script>
-|       "'<'"
-|     "BAR"
-
-#data
-FOO<script>'<!'</script>BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <script>
-|       "'<!'"
-|     "BAR"
-
-#data
-FOO<script>'<!-'</script>BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <script>
-|       "'<!-'"
-|     "BAR"
-
-#data
-FOO<script>'<!--'</script>BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <script>
-|       "'<!--'"
-|     "BAR"
-
-#data
-FOO<script>'<!---'</script>BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <script>
-|       "'<!---'"
-|     "BAR"
-
-#data
-FOO<script>'<!-->'</script>BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <script>
-|       "'<!-->'"
-|     "BAR"
-
-#data
-FOO<script>'<!-->'</script>BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <script>
-|       "'<!-->'"
-|     "BAR"
-
-#data
-FOO<script>'<!-- potato'</script>BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <script>
-|       "'<!-- potato'"
-|     "BAR"
-
-#data
-FOO<script>'<!-- <sCrIpt'</script>BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <script>
-|       "'<!-- <sCrIpt'"
-|     "BAR"
-
-#data
-FOO<script type="text/plain">'<!-- <sCrIpt>'</script>BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,56): expected-script-data-but-got-eof
-(1,56): expected-named-closing-tag-but-got-eof
-#new-errors
-(1:57) eof-in-script-html-comment-like-text
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <script>
-|       type="text/plain"
-|       "'<!-- <sCrIpt>'</script>BAR"
-
-#data
-FOO<script type="text/plain">'<!-- <sCrIpt> -'</script>BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,58): expected-script-data-but-got-eof
-(1,58): expected-named-closing-tag-but-got-eof
-#new-errors
-(1:59) eof-in-script-html-comment-like-text
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <script>
-|       type="text/plain"
-|       "'<!-- <sCrIpt> -'</script>BAR"
-
-#data
-FOO<script type="text/plain">'<!-- <sCrIpt> --'</script>BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,59): expected-script-data-but-got-eof
-(1,59): expected-named-closing-tag-but-got-eof
-#new-errors
-(1:60) eof-in-script-html-comment-like-text
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <script>
-|       type="text/plain"
-|       "'<!-- <sCrIpt> --'</script>BAR"
-
-#data
-FOO<script>'<!-- <sCrIpt> -->'</script>BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <script>
-|       "'<!-- <sCrIpt> -->'"
-|     "BAR"
-
-#data
-FOO<script type="text/plain">'<!-- <sCrIpt> --!>'</script>BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,61): expected-script-data-but-got-eof
-(1,61): expected-named-closing-tag-but-got-eof
-#new-errors
-(1:62) eof-in-script-html-comment-like-text
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <script>
-|       type="text/plain"
-|       "'<!-- <sCrIpt> --!>'</script>BAR"
-
-#data
-FOO<script type="text/plain">'<!-- <sCrIpt> -- >'</script>BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,61): expected-script-data-but-got-eof
-(1,61): expected-named-closing-tag-but-got-eof
-#new-errors
-(1:62) eof-in-script-html-comment-like-text
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <script>
-|       type="text/plain"
-|       "'<!-- <sCrIpt> -- >'</script>BAR"
-
-#data
-FOO<script type="text/plain">'<!-- <sCrIpt '</script>BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,56): expected-script-data-but-got-eof
-(1,56): expected-named-closing-tag-but-got-eof
-#new-errors
-(1:57) eof-in-script-html-comment-like-text
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <script>
-|       type="text/plain"
-|       "'<!-- <sCrIpt '</script>BAR"
-
-#data
-FOO<script type="text/plain">'<!-- <sCrIpt/'</script>BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-(1,56): expected-script-data-but-got-eof
-(1,56): expected-named-closing-tag-but-got-eof
-#new-errors
-(1:57) eof-in-script-html-comment-like-text
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <script>
-|       type="text/plain"
-|       "'<!-- <sCrIpt/'</script>BAR"
-
-#data
-FOO<script type="text/plain">'<!-- <sCrIpt\'</script>BAR
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <script>
-|       type="text/plain"
-|       "'<!-- <sCrIpt\'"
-|     "BAR"
-
-#data
-FOO<script type="text/plain">'<!-- <sCrIpt/'</script>BAR</script>QUX
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <script>
-|       type="text/plain"
-|       "'<!-- <sCrIpt/'</script>BAR"
-|     "QUX"
-
-#data
-FOO<script><!--<script>-></script>--></script>QUX
-#errors
-(1,3): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "FOO"
-|     <script>
-|       "<!--<script>-></script>-->"
-|     "QUX"
diff --git a/html/testdata/webkit/scripted/adoption01.dat b/html/testdata/webkit/scripted/adoption01.dat
deleted file mode 100644 (file)
index 5cc0f07..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-#data
-<p><b id="A"><script>document.getElementById("A").id = "B"</script></p>TEXT</b>
-#errors
-#script-on
-#document
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <b>
-|         id="B"
-|         <script>
-|           "document.getElementById("A").id = "B""
-|     <b>
-|       id="A"
-|       "TEXT"
diff --git a/html/testdata/webkit/scripted/ark.dat b/html/testdata/webkit/scripted/ark.dat
deleted file mode 100644 (file)
index feebead..0000000
+++ /dev/null
@@ -1,27 +0,0 @@
-#data
-<p><font size=4><font size=4><font size=4><script>document.getElementsByTagName("font")[2].setAttribute("size", "5");</script><font size=4><p>X
-#errors
-#script-on
-#document
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <font>
-|         size="4"
-|         <font>
-|           size="4"
-|           <font>
-|             size="5"
-|             <script>
-|               "document.getElementsByTagName("font")[2].setAttribute("size", "5");"
-|             <font>
-|               size="4"
-|     <p>
-|       <font>
-|         size="4"
-|         <font>
-|           size="4"
-|           <font>
-|             size="4"
-|             "X"
diff --git a/html/testdata/webkit/scripted/webkit01.dat b/html/testdata/webkit/scripted/webkit01.dat
deleted file mode 100644 (file)
index 3e71c1b..0000000
+++ /dev/null
@@ -1,30 +0,0 @@
-#data
-1<script>document.write("2")</script>3
-#errors
-#script-on
-#document
-| <html>
-|   <head>
-|   <body>
-|     "1"
-|     <script>
-|       "document.write("2")"
-|     "23"
-
-#data
-1<script>document.write("<script>document.write('2')</scr"+ "ipt><script>document.write('3')</scr" + "ipt>")</script>4
-#errors
-#script-on
-#document
-| <html>
-|   <head>
-|   <body>
-|     "1"
-|     <script>
-|       "document.write("<script>document.write('2')</scr"+ "ipt><script>document.write('3')</scr" + "ipt>")"
-|     <script>
-|       "document.write('2')"
-|     "2"
-|     <script>
-|       "document.write('3')"
-|     "34"
diff --git a/html/testdata/webkit/svg.dat b/html/testdata/webkit/svg.dat
deleted file mode 100644 (file)
index 8e9a2bb..0000000
+++ /dev/null
@@ -1,81 +0,0 @@
-#data
-<svg><tr><td><title><tr>
-#errors
-#document-fragment
-td
-#document
-| <svg svg>
-|   <svg tr>
-|     <svg td>
-|       <svg title>
-
-#data
-<svg><tr><td><title><tr>
-#errors
-#document-fragment
-tr
-#document
-| <svg svg>
-|   <svg tr>
-|     <svg td>
-|       <svg title>
-
-#data
-<svg><thead><title><tbody>
-#errors
-#document-fragment
-thead
-#document
-| <svg svg>
-|   <svg thead>
-|     <svg title>
-
-#data
-<svg><tfoot><title><tbody>
-#errors
-#document-fragment
-tfoot
-#document
-| <svg svg>
-|   <svg tfoot>
-|     <svg title>
-
-#data
-<svg><tbody><title><tfoot>
-#errors
-#document-fragment
-tbody
-#document
-| <svg svg>
-|   <svg tbody>
-|     <svg title>
-
-#data
-<svg><tbody><title></table>
-#errors
-#document-fragment
-tbody
-#document
-| <svg svg>
-|   <svg tbody>
-|     <svg title>
-
-#data
-<svg><thead><title></table>
-#errors
-#document-fragment
-tbody
-#document
-| <svg svg>
-|   <svg thead>
-|     <svg title>
-
-#data
-<svg><tfoot><title></table>
-#errors
-#document-fragment
-tbody
-#document
-| <svg svg>
-|   <svg tfoot>
-|     <svg title>
diff --git a/html/testdata/webkit/tables01.dat b/html/testdata/webkit/tables01.dat
deleted file mode 100644 (file)
index f0caaa3..0000000
+++ /dev/null
@@ -1,286 +0,0 @@
-#data
-<table><th>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,11): unexpected-cell-in-table-body
-(1,11): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-|         <tr>
-|           <th>
-
-#data
-<table><td>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,11): unexpected-cell-in-table-body
-(1,11): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-|         <tr>
-|           <td>
-
-#data
-<table><col foo='bar'>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,22): eof-in-table
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <colgroup>
-|         <col>
-|           foo="bar"
-
-#data
-<table><colgroup></html>foo
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,24): unexpected-end-tag
-(1,27): foster-parenting-character-in-table
-(1,27): foster-parenting-character-in-table
-(1,27): foster-parenting-character-in-table
-(1,27): eof-in-table
-#document
-| <html>
-|   <head>
-|   <body>
-|     "foo"
-|     <table>
-|       <colgroup>
-
-#data
-<table></table><p>foo
-#errors
-(1,7): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|     <p>
-|       "foo"
-
-#data
-<table></body></caption></col></colgroup></html></tbody></td></tfoot></th></thead></tr><td>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,14): unexpected-end-tag
-(1,24): unexpected-end-tag
-(1,30): unexpected-end-tag
-(1,41): unexpected-end-tag
-(1,48): unexpected-end-tag
-(1,56): unexpected-end-tag
-(1,61): unexpected-end-tag
-(1,69): unexpected-end-tag
-(1,74): unexpected-end-tag
-(1,82): unexpected-end-tag
-(1,87): unexpected-end-tag
-(1,91): unexpected-cell-in-table-body
-(1,91): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-|         <tr>
-|           <td>
-
-#data
-<table><select><option>3</select></table>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,15): unexpected-start-tag-implies-table-voodoo
-#document
-| <html>
-|   <head>
-|   <body>
-|     <select>
-|       <option>
-|         "3"
-|     <table>
-
-#data
-<table><select><table></table></select></table>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,15): unexpected-start-tag-implies-table-voodoo
-(1,22): unexpected-table-element-start-tag-in-select-in-table
-(1,22): unexpected-start-tag-implies-end-tag
-(1,39): unexpected-end-tag
-(1,47): unexpected-end-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <select>
-|     <table>
-|     <table>
-
-#data
-<table><select></table>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,15): unexpected-start-tag-implies-table-voodoo
-(1,23): unexpected-table-element-end-tag-in-select-in-table
-#document
-| <html>
-|   <head>
-|   <body>
-|     <select>
-|     <table>
-
-#data
-<table><select><option>A<tr><td>B</td></tr></table>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,15): unexpected-start-tag-implies-table-voodoo
-(1,28): unexpected-table-element-start-tag-in-select-in-table
-#document
-| <html>
-|   <head>
-|   <body>
-|     <select>
-|       <option>
-|         "A"
-|     <table>
-|       <tbody>
-|         <tr>
-|           <td>
-|             "B"
-
-#data
-<table><td></body></caption></col></colgroup></html>foo
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,11): unexpected-cell-in-table-body
-(1,18): unexpected-end-tag
-(1,28): unexpected-end-tag
-(1,34): unexpected-end-tag
-(1,45): unexpected-end-tag
-(1,52): unexpected-end-tag
-(1,55): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-|         <tr>
-|           <td>
-|             "foo"
-
-#data
-<table><td>A</table>B
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,11): unexpected-cell-in-table-body
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-|         <tr>
-|           <td>
-|             "A"
-|     "B"
-
-#data
-<table><tr><caption>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,20): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-|         <tr>
-|       <caption>
-
-#data
-<table><tr></body></caption></col></colgroup></html></td></th><td>foo
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,18): unexpected-end-tag-in-table-row
-(1,28): unexpected-end-tag-in-table-row
-(1,34): unexpected-end-tag-in-table-row
-(1,45): unexpected-end-tag-in-table-row
-(1,52): unexpected-end-tag-in-table-row
-(1,57): unexpected-end-tag-in-table-row
-(1,62): unexpected-end-tag-in-table-row
-(1,69): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-|         <tr>
-|           <td>
-|             "foo"
-
-#data
-<table><td><tr>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,11): unexpected-cell-in-table-body
-(1,15): eof-in-table
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-|         <tr>
-|           <td>
-|         <tr>
-
-#data
-<table><td><button><td>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,11): unexpected-cell-in-table-body
-(1,23): unexpected-cell-end-tag
-(1,23): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-|         <tr>
-|           <td>
-|             <button>
-|           <td>
-
-#data
-<table><tr><td><svg><desc><td>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,30): unexpected-cell-end-tag
-(1,30): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-|         <tr>
-|           <td>
-|             <svg svg>
-|               <svg desc>
-|           <td>
diff --git a/html/testdata/webkit/template.dat b/html/testdata/webkit/template.dat
deleted file mode 100644 (file)
index b38d4f5..0000000
+++ /dev/null
@@ -1,1604 +0,0 @@
-#data
-<body><template>Hello</template>
-#errors
-no doctype
-#document
-| <html>
-|   <head>
-|   <body>
-|     <template>
-|       content
-|         "Hello"
-
-#data
-<template>Hello</template>
-#errors
-no doctype
-#document
-| <html>
-|   <head>
-|     <template>
-|       content
-|         "Hello"
-|   <body>
-
-#data
-<template></template><div></div>
-#errors
-no doctype
-#document
-| <html>
-|   <head>
-|     <template>
-|       content
-|   <body>
-|     <div>
-
-#data
-<html><template>Hello</template>
-#errors
-no doctype
-#document
-| <html>
-|   <head>
-|     <template>
-|       content
-|         "Hello"
-|   <body>
-
-#data
-<head><template><div></div></template></head>
-#errors
-no doctype
-#document
-| <html>
-|   <head>
-|     <template>
-|       content
-|         <div>
-|   <body>
-
-#data
-<div><template><div><span></template><b>
-#errors
- * (1,6) missing DOCTYPE
- * (1,38) mismatched template end tag
- * (1,41) unexpected end of file
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       <template>
-|         content
-|           <div>
-|             <span>
-|       <b>
-
-#data
-<div><template></div>Hello
-#errors
- * (1,6) missing DOCTYPE
- * (1,22) unexpected token in template
- * (1,27) unexpected end of file in template
- * (1,27) unexpected end of file
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       <template>
-|         content
-|           "Hello"
-
-#data
-<div></template></div>
-#errors
- * (1,6) missing DOCTYPE
- * (1,17) unexpected template end tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-
-#data
-<table><template></template></table>
-#errors
-no doctype
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <template>
-|         content
-
-#data
-<table><template></template></div>
-#errors
- * (1,8) missing DOCTYPE
- * (1,35) unexpected token in table - foster parenting
- * (1,35) unexpected end tag
- * (1,35) unexpected end of file
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <template>
-|         content
-
-#data
-<table><div><template></template></div>
-#errors
- * (1,8) missing DOCTYPE
- * (1,13) unexpected token in table - foster parenting
- * (1,40) unexpected token in table - foster parenting
- * (1,40) unexpected end of file
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       <template>
-|         content
-|     <table>
-
-#data
-<table><template></template><div></div>
-#errors
-no doctype
-bad div in table
-bad /div in table
-eof in table
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|     <table>
-|       <template>
-|         content
-
-#data
-<table>   <template></template></table>
-#errors
-no doctype
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       "   "
-|       <template>
-|         content
-
-#data
-<table><tbody><template></template></tbody>
-#errors
-no doctype
-eof in table
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-|         <template>
-|           content
-
-#data
-<table><tbody><template></tbody></template>
-#errors
-no doctype
-bad /tbody
-eof in table
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-|         <template>
-|           content
-
-#data
-<table><tbody><template></template></tbody></table>
-#errors
-no doctype
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-|         <template>
-|           content
-
-#data
-<table><thead><template></template></thead>
-#errors
-no doctype
-eof in table
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <thead>
-|         <template>
-|           content
-
-#data
-<table><tfoot><template></template></tfoot>
-#errors
-no doctype
-eof in table
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tfoot>
-|         <template>
-|           content
-
-#data
-<select><template></template></select>
-#errors
-no doctype
-#document
-| <html>
-|   <head>
-|   <body>
-|     <select>
-|       <template>
-|         content
-
-#data
-<select><template><option></option></template></select>
-#errors
-no doctype
-#document
-| <html>
-|   <head>
-|   <body>
-|     <select>
-|       <template>
-|         content
-|           <option>
-
-#data
-<template><option></option></select><option></option></template>
-#errors
-no doctype
-bad /select
-#document
-| <html>
-|   <head>
-|     <template>
-|       content
-|         <option>
-|         <option>
-|   <body>
-
-#data
-<select><template></template><option></select>
-#errors
-no doctype
-#document
-| <html>
-|   <head>
-|   <body>
-|     <select>
-|       <template>
-|         content
-|       <option>
-
-#data
-<select><option><template></template></select>
-#errors
-no doctype
-#document
-| <html>
-|   <head>
-|   <body>
-|     <select>
-|       <option>
-|         <template>
-|           content
-
-#data
-<select><template>
-#errors
-no doctype
-eof in template
-eof in select
-#document
-| <html>
-|   <head>
-|   <body>
-|     <select>
-|       <template>
-|         content
-
-#data
-<select><option></option><template>
-#errors
-no doctype
-eof in template
-eof in select
-#document
-| <html>
-|   <head>
-|   <body>
-|     <select>
-|       <option>
-|       <template>
-|         content
-
-#data
-<select><option></option><template><option>
-#errors
-no doctype
-eof in template
-eof in select
-#document
-| <html>
-|   <head>
-|   <body>
-|     <select>
-|       <option>
-|       <template>
-|         content
-|           <option>
-
-#data
-<table><thead><template><td></template></table>
-#errors
- * (1,8) missing DOCTYPE
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <thead>
-|         <template>
-|           content
-|             <td>
-
-#data
-<table><template><thead></template></table>
-#errors
-no doctype
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <template>
-|         content
-|           <thead>
-
-#data
-<body><table><template><td></tr><div></template></table>
-#errors
-no doctype
-bad </tr>
-missing </div>
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <template>
-|         content
-|           <td>
-|             <div>
-
-#data
-<table><template><thead></template></thead></table>
-#errors
-no doctype
-bad /thead after /template
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <template>
-|         content
-|           <thead>
-
-#data
-<table><thead><template><tr></template></table>
-#errors
-no doctype
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <thead>
-|         <template>
-|           content
-|             <tr>
-
-#data
-<table><template><tr></template></table>
-#errors
-no doctype
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <template>
-|         content
-|           <tr>
-
-#data
-<table><tr><template><td>
-#errors
-no doctype
-eof in template
-eof in table
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-|         <tr>
-|           <template>
-|             content
-|               <td>
-
-#data
-<table><template><tr><template><td></template></tr></template></table>
-#errors
-no doctype
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <template>
-|         content
-|           <tr>
-|             <template>
-|               content
-|                 <td>
-
-#data
-<table><template><tr><template><td></td></template></tr></template></table>
-#errors
-no doctype
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <template>
-|         content
-|           <tr>
-|             <template>
-|               content
-|                 <td>
-
-#data
-<table><template><td></template>
-#errors
-no doctype
-eof in table
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <template>
-|         content
-|           <td>
-
-#data
-<body><template><td></td></template>
-#errors
-no doctype
-#document
-| <html>
-|   <head>
-|   <body>
-|     <template>
-|       content
-|         <td>
-
-#data
-<body><template><template><tr></tr></template><td></td></template>
-#errors
-no doctype
-#document
-| <html>
-|   <head>
-|   <body>
-|     <template>
-|       content
-|         <template>
-|           content
-|             <tr>
-|         <td>
-
-#data
-<table><colgroup><template><col>
-#errors
-no doctype
-eof in template
-eof in table
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <colgroup>
-|         <template>
-|           content
-|             <col>
-
-#data
-<frameset><template><frame></frame></template></frameset>
-#errors
- * (1,11) missing DOCTYPE
- * (1,21) unexpected start tag token
- * (1,36) unexpected end tag token
- * (1,47) unexpected end tag token
-#document
-| <html>
-|   <head>
-|   <frameset>
-|     <frame>
-
-#data
-<template><frame></frame></frameset><frame></frame></template>
-#errors
- * (1,11) missing DOCTYPE
- * (1,18) unexpected start tag
- * (1,26) unexpected end tag
- * (1,37) unexpected end tag
- * (1,44) unexpected start tag
- * (1,52) unexpected end tag
-#document
-| <html>
-|   <head>
-|     <template>
-|       content
-|   <body>
-
-#data
-<template><div><frameset><span></span></div><span></span></template>
-#errors
-no doctype
-bad frameset
-#document
-| <html>
-|   <head>
-|     <template>
-|       content
-|         <div>
-|           <span>
-|         <span>
-|   <body>
-
-#data
-<body><template><div><frameset><span></span></div><span></span></template></body>
-#errors
-no doctype
-bad frameset
-#document
-| <html>
-|   <head>
-|   <body>
-|     <template>
-|       content
-|         <div>
-|           <span>
-|         <span>
-
-#data
-<body><template><script>var i = 1;</script><td></td></template>
-#errors
-no doctype
-#document
-| <html>
-|   <head>
-|   <body>
-|     <template>
-|       content
-|         <script>
-|           "var i = 1;"
-|         <td>
-
-#data
-<body><template><tr><div></div></tr></template>
-#errors
-no doctype
-foster-parented div
-foster-parented /div
-#document
-| <html>
-|   <head>
-|   <body>
-|     <template>
-|       content
-|         <tr>
-|         <div>
-
-#data
-<body><template><tr></tr><td></td></template>
-#errors
-no doctype
-unexpected <td>
-#document
-| <html>
-|   <head>
-|   <body>
-|     <template>
-|       content
-|         <tr>
-|         <tr>
-|           <td>
-
-#data
-<body><template><td></td></tr><td></td></template>
-#errors
-no doctype
-bad </tr>
-#document
-| <html>
-|   <head>
-|   <body>
-|     <template>
-|       content
-|         <td>
-|         <td>
-
-#data
-<body><template><td></td><tbody><td></td></template>
-#errors
-no doctype
-bad <tbody>
-#document
-| <html>
-|   <head>
-|   <body>
-|     <template>
-|       content
-|         <td>
-|         <td>
-
-#data
-<body><template><td></td><caption></caption><td></td></template>
-#errors
- * (1,7) missing DOCTYPE
- * (1,35) unexpected start tag in table row
- * (1,45) unexpected end tag in table row
-#document
-| <html>
-|   <head>
-|   <body>
-|     <template>
-|       content
-|         <td>
-|         <td>
-
-#data
-<body><template><td></td><colgroup></caption><td></td></template>
-#errors
- * (1,7) missing DOCTYPE
- * (1,36) unexpected start tag in table row
- * (1,46) unexpected end tag in table row
-#document
-| <html>
-|   <head>
-|   <body>
-|     <template>
-|       content
-|         <td>
-|         <td>
-
-#data
-<body><template><td></td></table><td></td></template>
-#errors
-no doctype
-bad </table>
-#document
-| <html>
-|   <head>
-|   <body>
-|     <template>
-|       content
-|         <td>
-|         <td>
-
-#data
-<body><template><tr></tr><tbody><tr></tr></template>
-#errors
-no doctype
-bad <tbody>
-#document
-| <html>
-|   <head>
-|   <body>
-|     <template>
-|       content
-|         <tr>
-|         <tr>
-
-#data
-<body><template><tr></tr><caption><tr></tr></template>
-#errors
-no doctype
-bad <caption>
-#document
-| <html>
-|   <head>
-|   <body>
-|     <template>
-|       content
-|         <tr>
-|         <tr>
-
-#data
-<body><template><tr></tr></table><tr></tr></template>
-#errors
-no doctype
-bad </table>
-#document
-| <html>
-|   <head>
-|   <body>
-|     <template>
-|       content
-|         <tr>
-|         <tr>
-
-#data
-<body><template><thead></thead><caption></caption><tbody></tbody></template>
-#errors
-no doctype
-#document
-| <html>
-|   <head>
-|   <body>
-|     <template>
-|       content
-|         <thead>
-|         <caption>
-|         <tbody>
-
-#data
-<body><template><thead></thead></table><tbody></tbody></template></body>
-#errors
-no doctype
-bad </table>
-#document
-| <html>
-|   <head>
-|   <body>
-|     <template>
-|       content
-|         <thead>
-|         <tbody>
-
-#data
-<body><template><div><tr></tr></div></template>
-#errors
-no doctype
-bad tr
-bad /tr
-#document
-| <html>
-|   <head>
-|   <body>
-|     <template>
-|       content
-|         <div>
-
-#data
-<body><template><em>Hello</em></template>
-#errors
-no doctype
-#document
-| <html>
-|   <head>
-|   <body>
-|     <template>
-|       content
-|         <em>
-|           "Hello"
-
-#data
-<body><template><!--comment--></template>
-#errors
-no doctype
-#document
-| <html>
-|   <head>
-|   <body>
-|     <template>
-|       content
-|         <!-- comment -->
-
-#data
-<body><template><style></style><td></td></template>
-#errors
-no doctype
-#document
-| <html>
-|   <head>
-|   <body>
-|     <template>
-|       content
-|         <style>
-|         <td>
-
-#data
-<body><template><meta><td></td></template>
-#errors
-no doctype
-#document
-| <html>
-|   <head>
-|   <body>
-|     <template>
-|       content
-|         <meta>
-|         <td>
-
-#data
-<body><template><link><td></td></template>
-#errors
-no doctype
-#document
-| <html>
-|   <head>
-|   <body>
-|     <template>
-|       content
-|         <link>
-|         <td>
-
-#data
-<body><template><template><tr></tr></template><td></td></template>
-#errors
-no doctype
-#document
-| <html>
-|   <head>
-|   <body>
-|     <template>
-|       content
-|         <template>
-|           content
-|             <tr>
-|         <td>
-
-#data
-<body><table><colgroup><template><col></col></template></colgroup></table></body>
-#errors
-no doctype
-bad /col
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <colgroup>
-|         <template>
-|           content
-|             <col>
-
-#data
-<body a=b><template><div></div><body c=d><div></div></body></template></body>
-#errors
-no doctype
-bad <body>
-bad </body>
-#document
-| <html>
-|   <head>
-|   <body>
-|     a="b"
-|     <template>
-|       content
-|         <div>
-|         <div>
-
-#data
-<html a=b><template><div><html b=c><span></template>
-#errors
-no doctype
-bad <html>
-missing end tags in template
-#document
-| <html>
-|   a="b"
-|   <head>
-|     <template>
-|       content
-|         <div>
-|           <span>
-|   <body>
-
-#data
-<html a=b><template><col></col><html b=c><col></col></template>
-#errors
-no doctype
-bad /col
-bad html
-bad /col
-#document
-| <html>
-|   a="b"
-|   <head>
-|     <template>
-|       content
-|         <col>
-|         <col>
-|   <body>
-
-#data
-<html a=b><template><frame></frame><html b=c><frame></frame></template>
-#errors
-no doctype
-bad frame
-bad /frame
-bad html
-bad frame
-bad /frame
-#document
-| <html>
-|   a="b"
-|   <head>
-|     <template>
-|       content
-|   <body>
-
-#data
-<body><template><tr></tr><template></template><td></td></template>
-#errors
-no doctype
-unexpected <td>
-#document
-| <html>
-|   <head>
-|   <body>
-|     <template>
-|       content
-|         <tr>
-|         <template>
-|           content
-|         <tr>
-|           <td>
-
-#data
-<body><template><thead></thead><template><tr></tr></template><tr></tr><tfoot></tfoot></template>
-#errors
-no doctype
-#document
-| <html>
-|   <head>
-|   <body>
-|     <template>
-|       content
-|         <thead>
-|         <template>
-|           content
-|             <tr>
-|         <tbody>
-|           <tr>
-|         <tfoot>
-
-#data
-<body><template><template><b><template></template></template>text</template>
-#errors
-no doctype
-missing </b>
-#document
-| <html>
-|   <head>
-|   <body>
-|     <template>
-|       content
-|         <template>
-|           content
-|             <b>
-|               <template>
-|                 content
-|         "text"
-
-#data
-<body><template><col><colgroup>
-#errors
-no doctype
-bad colgroup
-eof in template
-#document
-| <html>
-|   <head>
-|   <body>
-|     <template>
-|       content
-|         <col>
-
-#data
-<body><template><col></colgroup>
-#errors
-no doctype
-bogus /colgroup
-eof in template
-#document
-| <html>
-|   <head>
-|   <body>
-|     <template>
-|       content
-|         <col>
-
-#data
-<body><template><col><colgroup></template></body>
-#errors
-no doctype
-bad colgroup
-#document
-| <html>
-|   <head>
-|   <body>
-|     <template>
-|       content
-|         <col>
-
-#data
-<body><template><col><div>
-#errors
- * (1,7) missing DOCTYPE
- * (1,27) unexpected token
- * (1,27) unexpected end of file in template
-#document
-| <html>
-|   <head>
-|   <body>
-|     <template>
-|       content
-|         <col>
-
-#data
-<body><template><col></div>
-#errors
-no doctype
-bad /div
-eof in template
-#document
-| <html>
-|   <head>
-|   <body>
-|     <template>
-|       content
-|         <col>
-
-#data
-<body><template><col>Hello
-#errors
-no doctype
-unexpected text
-eof in template
-#document
-| <html>
-|   <head>
-|   <body>
-|     <template>
-|       content
-|         <col>
-
-#data
-<body><template><i><menu>Foo</i>
-#errors
-no doctype
-mising /menu
-eof in template
-#document
-| <html>
-|   <head>
-|   <body>
-|     <template>
-|       content
-|         <i>
-|         <menu>
-|           <i>
-|             "Foo"
-
-#data
-<body><template></div><div>Foo</div><template></template><tr></tr>
-#errors
-no doctype
-bogus /div
-bogus tr
-bogus /tr
-eof in template
-#document
-| <html>
-|   <head>
-|   <body>
-|     <template>
-|       content
-|         <div>
-|           "Foo"
-|         <template>
-|           content
-
-#data
-<body><div><template></div><tr><td>Foo</td></tr></template>
-#errors
- * (1,7) missing DOCTYPE
- * (1,28) unexpected token in template
- * (1,60) unexpected end of file
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       <template>
-|         content
-|           <tr>
-|             <td>
-|               "Foo"
-
-#data
-<template></figcaption><sub><table></table>
-#errors
-no doctype
-bad /figcaption
-eof in template
-#document
-| <html>
-|   <head>
-|     <template>
-|       content
-|         <sub>
-|           <table>
-|   <body>
-
-#data
-<template><template>
-#errors
-no doctype
-eof in template
-eof in template
-#document
-| <html>
-|   <head>
-|     <template>
-|       content
-|         <template>
-|           content
-|   <body>
-
-#data
-<template><div>
-#errors
-no doctype
-eof in template
-#document
-| <html>
-|   <head>
-|     <template>
-|       content
-|         <div>
-|   <body>
-
-#data
-<template><template><div>
-#errors
-no doctype
-eof in template
-eof in template
-#document
-| <html>
-|   <head>
-|     <template>
-|       content
-|         <template>
-|           content
-|             <div>
-|   <body>
-
-#data
-<template><template><table>
-#errors
-no doctype
-eof in template
-eof in template
-#document
-| <html>
-|   <head>
-|     <template>
-|       content
-|         <template>
-|           content
-|             <table>
-|   <body>
-
-#data
-<template><template><tbody>
-#errors
-no doctype
-eof in template
-eof in template
-#document
-| <html>
-|   <head>
-|     <template>
-|       content
-|         <template>
-|           content
-|             <tbody>
-|   <body>
-
-#data
-<template><template><tr>
-#errors
-no doctype
-eof in template
-eof in template
-#document
-| <html>
-|   <head>
-|     <template>
-|       content
-|         <template>
-|           content
-|             <tr>
-|   <body>
-
-#data
-<template><template><td>
-#errors
-no doctype
-eof in template
-eof in template
-#document
-| <html>
-|   <head>
-|     <template>
-|       content
-|         <template>
-|           content
-|             <td>
-|   <body>
-
-#data
-<template><template><caption>
-#errors
-no doctype
-eof in template
-eof in template
-#document
-| <html>
-|   <head>
-|     <template>
-|       content
-|         <template>
-|           content
-|             <caption>
-|   <body>
-
-#data
-<template><template><colgroup>
-#errors
-no doctype
-eof in template
-eof in template
-#document
-| <html>
-|   <head>
-|     <template>
-|       content
-|         <template>
-|           content
-|             <colgroup>
-|   <body>
-
-#data
-<template><template><col>
-#errors
-no doctype
-eof in template
-eof in template
-#document
-| <html>
-|   <head>
-|     <template>
-|       content
-|         <template>
-|           content
-|             <col>
-|   <body>
-
-#data
-<template><template><tbody><select>
-#errors
- * (1,11) missing DOCTYPE
- * (1,36) unexpected token in table - foster parenting
- * (1,36) unexpected end of file in template
- * (1,36) unexpected end of file in template
-#document
-| <html>
-|   <head>
-|     <template>
-|       content
-|         <template>
-|           content
-|             <tbody>
-|             <select>
-|   <body>
-
-#data
-<template><template><table>Foo
-#errors
-no doctype
-foster-parenting text F
-foster-parenting text o
-foster-parenting text o
-eof
-eof
-#document
-| <html>
-|   <head>
-|     <template>
-|       content
-|         <template>
-|           content
-|             "Foo"
-|             <table>
-|   <body>
-
-#data
-<template><template><frame>
-#errors
-no doctype
-bad tag
-eof
-eof
-#document
-| <html>
-|   <head>
-|     <template>
-|       content
-|         <template>
-|           content
-|   <body>
-
-#data
-<template><template><script>var i
-#errors
-no doctype
-eof in script
-eof in template
-eof in template
-#document
-| <html>
-|   <head>
-|     <template>
-|       content
-|         <template>
-|           content
-|             <script>
-|               "var i"
-|   <body>
-
-#data
-<template><template><style>var i
-#errors
-no doctype
-eof in style
-eof in template
-eof in template
-#document
-| <html>
-|   <head>
-|     <template>
-|       content
-|         <template>
-|           content
-|             <style>
-|               "var i"
-|   <body>
-
-#data
-<template><table></template><body><span>Foo
-#errors
-no doctype
-missing /table
-bad eof
-#document
-| <html>
-|   <head>
-|     <template>
-|       content
-|         <table>
-|   <body>
-|     <span>
-|       "Foo"
-
-#data
-<template><td></template><body><span>Foo
-#errors
-no doctype
-bad eof
-#document
-| <html>
-|   <head>
-|     <template>
-|       content
-|         <td>
-|   <body>
-|     <span>
-|       "Foo"
-
-#data
-<template><object></template><body><span>Foo
-#errors
-no doctype
-missing /object
-bad eof
-#document
-| <html>
-|   <head>
-|     <template>
-|       content
-|         <object>
-|   <body>
-|     <span>
-|       "Foo"
-
-#data
-<template><svg><template>
-#errors
-no doctype
-eof in template
-#document
-| <html>
-|   <head>
-|     <template>
-|       content
-|         <svg svg>
-|           <svg template>
-|   <body>
-
-#data
-<template><svg><foo><template><foreignObject><div></template><div>
-#errors
-no doctype
-ugly template closure
-bad eof
-#document
-| <html>
-|   <head>
-|     <template>
-|       content
-|         <svg svg>
-|           <svg foo>
-|             <svg template>
-|               <svg foreignObject>
-|                 <div>
-|   <body>
-|     <div>
-
-#data
-<dummy><template><span></dummy>
-#errors
-no doctype
-bad end tag </dummy>
-eof in template
-eof in dummy
-#document
-| <html>
-|   <head>
-|   <body>
-|     <dummy>
-|       <template>
-|         content
-|           <span>
-
-#data
-<body><table><tr><td><select><template>Foo</template><caption>A</table>
-#errors
-no doctype
-(1,62): unexpected-caption-in-select-in-table
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-|         <tr>
-|           <td>
-|             <select>
-|               <template>
-|                 content
-|                   "Foo"
-|       <caption>
-|         "A"
-
-#data
-<body></body><template>
-#errors
-no doctype
-(1,23): template-after-body
-(1,24): eof-in-template
-#document
-| <html>
-|   <head>
-|   <body>
-|     <template>
-|       content
-
-#data
-<head></head><template>
-#errors
-no doctype
-(1,23): template-after-head
-(1,24): eof-in-template
-#document
-| <html>
-|   <head>
-|     <template>
-|       content
-|   <body>
-
-#data
-<head></head><template>Foo</template>
-#errors
-no doctype
-(1,23): template-after-head
-#document
-| <html>
-|   <head>
-|     <template>
-|       content
-|         "Foo"
-|   <body>
-
-#data
-<!DOCTYPE HTML><dummy><table><template><table><template><table><script>
-#errors
-eof script
-eof template
-eof template
-eof table
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <dummy>
-|       <table>
-|         <template>
-|           content
-|             <table>
-|               <template>
-|                 content
-|                   <table>
-|                     <script>
-
-#data
-<template><a><table><a>
-#errors
-#document
-| <html>
-|   <head>
-|     <template>
-|       content
-|         <a>
-|           <a>
-|           <table>
-|   <body>
diff --git a/html/testdata/webkit/tests1.dat b/html/testdata/webkit/tests1.dat
deleted file mode 100644 (file)
index 1c36c1b..0000000
+++ /dev/null
@@ -1,1988 +0,0 @@
-#data
-Test
-#errors
-(1,0): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "Test"
-
-#data
-<p>One<p>Two
-#errors
-(1,3): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       "One"
-|     <p>
-|       "Two"
-
-#data
-Line1<br>Line2<br>Line3<br>Line4
-#errors
-(1,0): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "Line1"
-|     <br>
-|     "Line2"
-|     <br>
-|     "Line3"
-|     <br>
-|     "Line4"
-
-#data
-<html>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-
-#data
-<head>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-
-#data
-<body>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-
-#data
-<html><head>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-
-#data
-<html><head></head>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-
-#data
-<html><head></head><body>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-
-#data
-<html><head></head><body></body>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-
-#data
-<html><head><body></body></html>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-
-#data
-<html><head></body></html>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-
-#data
-<html><head><body></html>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-
-#data
-<html><body></html>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-
-#data
-<body></html>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-
-#data
-<head></html>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-
-#data
-</head>
-#errors
-(1,7): expected-doctype-but-got-end-tag
-#document
-| <html>
-|   <head>
-|   <body>
-
-#data
-</body>
-#errors
-(1,7): expected-doctype-but-got-end-tag element.
-#document
-| <html>
-|   <head>
-|   <body>
-
-#data
-</html>
-#errors
-(1,7): expected-doctype-but-got-end-tag element.
-#document
-| <html>
-|   <head>
-|   <body>
-
-#data
-<b><table><td><i></table>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,14): unexpected-cell-in-table-body
-(1,25): unexpected-cell-end-tag
-(1,25): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <b>
-|       <table>
-|         <tbody>
-|           <tr>
-|             <td>
-|               <i>
-
-#data
-<b><table><td></b><i></table>X
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,14): unexpected-cell-in-table-body
-(1,18): unexpected-end-tag
-(1,29): unexpected-cell-end-tag
-(1,30): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <b>
-|       <table>
-|         <tbody>
-|           <tr>
-|             <td>
-|               <i>
-|       "X"
-
-#data
-<h1>Hello<h2>World
-#errors
-(1,4): expected-doctype-but-got-start-tag
-(1,13): unexpected-start-tag
-(1,18): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <h1>
-|       "Hello"
-|     <h2>
-|       "World"
-
-#data
-<a><p>X<a>Y</a>Z</p></a>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,10): unexpected-start-tag-implies-end-tag
-(1,10): adoption-agency-1.3
-(1,24): unexpected-end-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <a>
-|     <p>
-|       <a>
-|         "X"
-|       <a>
-|         "Y"
-|       "Z"
-
-#data
-<b><button>foo</b>bar
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,18): adoption-agency-1.3
-(1,21): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <b>
-|     <button>
-|       <b>
-|         "foo"
-|       "bar"
-
-#data
-<!DOCTYPE html><span><button>foo</span>bar
-#errors
-(1,39): unexpected-end-tag
-(1,42): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <span>
-|       <button>
-|         "foobar"
-
-#data
-<p><b><div><marquee></p></b></div>X
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,11): unexpected-end-tag
-(1,24): unexpected-end-tag
-(1,28): unexpected-end-tag
-(1,34): end-tag-too-early
-(1,35): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <b>
-|     <div>
-|       <b>
-|         <marquee>
-|           <p>
-|           "X"
-
-#data
-<script><div></script></div><title><p></title><p><p>
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,28): unexpected-end-tag
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<div>"
-|     <title>
-|       "<p>"
-|   <body>
-|     <p>
-|     <p>
-
-#data
-<!--><div>--<!-->
-#errors
-(1,5): incorrect-comment
-(1,10): expected-doctype-but-got-start-tag
-(1,17): incorrect-comment
-(1,17): expected-closing-tag-but-got-eof
-#new-errors
-(1:5) abrupt-closing-of-empty-comment
-(1:17) abrupt-closing-of-empty-comment
-#document
-| <!--  -->
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       "--"
-|       <!--  -->
-
-#data
-<p><hr></p>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,11): unexpected-end-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|     <hr>
-|     <p>
-
-#data
-<select><b><option><select><option></b></select>X
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,11): unexpected-start-tag-in-select
-(1,27): unexpected-select-in-select
-(1,39): unexpected-end-tag
-(1,48): unexpected-end-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <select>
-|       <option>
-|     <option>
-|       "X"
-
-#data
-<a><table><td><a><table></table><a></tr><a></table><b>X</b>C<a>Y
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,14): unexpected-cell-in-table-body
-(1,35): unexpected-start-tag-implies-end-tag
-(1,40): unexpected-cell-end-tag
-(1,43): unexpected-start-tag-implies-table-voodoo
-(1,43): unexpected-start-tag-implies-end-tag
-(1,43): unexpected-end-tag
-(1,63): unexpected-start-tag-implies-end-tag
-(1,64): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <a>
-|       <a>
-|       <table>
-|         <tbody>
-|           <tr>
-|             <td>
-|               <a>
-|                 <table>
-|               <a>
-|     <a>
-|       <b>
-|         "X"
-|       "C"
-|     <a>
-|       "Y"
-
-#data
-<a X>0<b>1<a Y>2
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,15): unexpected-start-tag-implies-end-tag
-(1,15): adoption-agency-1.3
-(1,16): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <a>
-|       x=""
-|       "0"
-|       <b>
-|         "1"
-|     <b>
-|       <a>
-|         y=""
-|         "2"
-
-#data
-<!-----><font><div>hello<table>excite!<b>me!<th><i>please!</tr><!--X-->
-#errors
-(1,7): unexpected-dash-after-double-dash-in-comment
-(1,14): expected-doctype-but-got-start-tag
-(1,41): unexpected-start-tag-implies-table-voodoo
-(1,48): foster-parenting-character-in-table
-(1,48): foster-parenting-character-in-table
-(1,48): foster-parenting-character-in-table
-(1,48): foster-parenting-character-in-table
-(1,48): foster-parenting-character-in-table
-(1,48): foster-parenting-character-in-table
-(1,48): foster-parenting-character-in-table
-(1,48): foster-parenting-character-in-table
-(1,48): foster-parenting-character-in-table
-(1,48): foster-parenting-character-in-table
-(1,48): unexpected-cell-in-table-body
-(1,63): unexpected-cell-end-tag
-(1,71): eof-in-table
-#document
-| <!-- - -->
-| <html>
-|   <head>
-|   <body>
-|     <font>
-|       <div>
-|         "helloexcite!"
-|         <b>
-|           "me!"
-|         <table>
-|           <tbody>
-|             <tr>
-|               <th>
-|                 <i>
-|                   "please!"
-|             <!-- X -->
-
-#data
-<!DOCTYPE html><li>hello<li>world<ul>how<li>do</ul>you</body><!--do-->
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <li>
-|       "hello"
-|     <li>
-|       "world"
-|       <ul>
-|         "how"
-|         <li>
-|           "do"
-|       "you"
-|   <!-- do -->
-
-#data
-<!DOCTYPE html>A<option>B<optgroup>C<select>D</option>E
-#errors
-(1,54): unexpected-end-tag-in-select
-(1,55): eof-in-select
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     "A"
-|     <option>
-|       "B"
-|     <optgroup>
-|       "C"
-|       <select>
-|         "DE"
-
-#data
-<
-#errors
-(1,1): expected-tag-name
-(1,1): expected-doctype-but-got-chars
-#new-errors
-(1:2) eof-before-tag-name
-#document
-| <html>
-|   <head>
-|   <body>
-|     "<"
-
-#data
-<#
-#errors
-(1,1): expected-tag-name
-(1,1): expected-doctype-but-got-chars
-#new-errors
-(1:2) invalid-first-character-of-tag-name
-#document
-| <html>
-|   <head>
-|   <body>
-|     "<#"
-
-#data
-</
-#errors
-(1,2): expected-closing-tag-but-got-eof
-(1,2): expected-doctype-but-got-chars
-#new-errors
-(1:3) eof-before-tag-name
-#document
-| <html>
-|   <head>
-|   <body>
-|     "</"
-
-#data
-</#
-#errors
-(1,2): expected-closing-tag-but-got-char
-(1,3): expected-doctype-but-got-eof
-#new-errors
-(1:3) invalid-first-character-of-tag-name
-#document
-| <!-- # -->
-| <html>
-|   <head>
-|   <body>
-
-#data
-<?
-#errors
-(1,1): expected-tag-name-but-got-question-mark
-(1,2): expected-doctype-but-got-eof
-#new-errors
-(1:2) unexpected-question-mark-instead-of-tag-name
-#document
-| <!-- ? -->
-| <html>
-|   <head>
-|   <body>
-
-#data
-<?#
-#errors
-(1,1): expected-tag-name-but-got-question-mark
-(1,3): expected-doctype-but-got-eof
-#new-errors
-(1:2) unexpected-question-mark-instead-of-tag-name
-#document
-| <!-- ?# -->
-| <html>
-|   <head>
-|   <body>
-
-#data
-<!
-#errors
-(1,2): expected-dashes-or-doctype
-(1,2): expected-doctype-but-got-eof
-#new-errors
-(1:3) incorrectly-opened-comment
-#document
-| <!--  -->
-| <html>
-|   <head>
-|   <body>
-
-#data
-<!#
-#errors
-(1,2): expected-dashes-or-doctype
-(1,3): expected-doctype-but-got-eof
-#new-errors
-(1:3) incorrectly-opened-comment
-#document
-| <!-- # -->
-| <html>
-|   <head>
-|   <body>
-
-#data
-<?COMMENT?>
-#errors
-(1,1): expected-tag-name-but-got-question-mark
-(1,11): expected-doctype-but-got-eof
-#new-errors
-(1:2) unexpected-question-mark-instead-of-tag-name
-#document
-| <!-- ?COMMENT? -->
-| <html>
-|   <head>
-|   <body>
-
-#data
-<!COMMENT>
-#errors
-(1,2): expected-dashes-or-doctype
-(1,10): expected-doctype-but-got-eof
-#new-errors
-(1:3) incorrectly-opened-comment
-#document
-| <!-- COMMENT -->
-| <html>
-|   <head>
-|   <body>
-
-#data
-</ COMMENT >
-#errors
-(1,2): expected-closing-tag-but-got-char
-(1,12): expected-doctype-but-got-eof
-#new-errors
-(1:3) invalid-first-character-of-tag-name
-#document
-| <!--  COMMENT  -->
-| <html>
-|   <head>
-|   <body>
-
-#data
-<?COM--MENT?>
-#errors
-(1,1): expected-tag-name-but-got-question-mark
-(1,13): expected-doctype-but-got-eof
-#new-errors
-(1:2) unexpected-question-mark-instead-of-tag-name
-#document
-| <!-- ?COM--MENT? -->
-| <html>
-|   <head>
-|   <body>
-
-#data
-<!COM--MENT>
-#errors
-(1,2): expected-dashes-or-doctype
-(1,12): expected-doctype-but-got-eof
-#new-errors
-(1:3) incorrectly-opened-comment
-#document
-| <!-- COM--MENT -->
-| <html>
-|   <head>
-|   <body>
-
-#data
-</ COM--MENT >
-#errors
-(1,2): expected-closing-tag-but-got-char
-(1,14): expected-doctype-but-got-eof
-#new-errors
-(1:3) invalid-first-character-of-tag-name
-#document
-| <!--  COM--MENT  -->
-| <html>
-|   <head>
-|   <body>
-
-#data
-<!DOCTYPE html><style> EOF
-#errors
-(1,26): expected-named-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <style>
-|       " EOF"
-|   <body>
-
-#data
-<!DOCTYPE html><script> <!-- </script> --> </script> EOF
-#errors
-(1,52): unexpected-end-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       " <!-- "
-|     " "
-|   <body>
-|     "-->  EOF"
-
-#data
-<b><p></b>TEST
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,10): adoption-agency-1.3
-#document
-| <html>
-|   <head>
-|   <body>
-|     <b>
-|     <p>
-|       <b>
-|       "TEST"
-
-#data
-<p id=a><b><p id=b></b>TEST
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,19): unexpected-end-tag
-(1,23): adoption-agency-1.2
-#document
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       id="a"
-|       <b>
-|     <p>
-|       id="b"
-|       "TEST"
-
-#data
-<b id=a><p><b id=b></p></b>TEST
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,23): unexpected-end-tag
-(1,27): adoption-agency-1.2
-(1,31): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <b>
-|       id="a"
-|       <p>
-|         <b>
-|           id="b"
-|       "TEST"
-
-#data
-<!DOCTYPE html><title>U-test</title><body><div><p>Test<u></p></div></body>
-#errors
-(1,61): unexpected-end-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <title>
-|       "U-test"
-|   <body>
-|     <div>
-|       <p>
-|         "Test"
-|         <u>
-
-#data
-<!DOCTYPE html><font><table></font></table></font>
-#errors
-(1,35): unexpected-end-tag-implies-table-voodoo
-(1,35): unexpected-end-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <font>
-|       <table>
-
-#data
-<font><p>hello<b>cruel</font>world
-#errors
-(1,6): expected-doctype-but-got-start-tag
-(1,29): adoption-agency-1.3
-(1,29): adoption-agency-1.3
-(1,34): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <font>
-|     <p>
-|       <font>
-|         "hello"
-|         <b>
-|           "cruel"
-|       <b>
-|         "world"
-
-#data
-<b>Test</i>Test
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,11): unexpected-end-tag
-(1,15): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <b>
-|       "TestTest"
-
-#data
-<b>A<cite>B<div>C
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,17): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <b>
-|       "A"
-|       <cite>
-|         "B"
-|         <div>
-|           "C"
-
-#data
-<b>A<cite>B<div>C</cite>D
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,24): unexpected-end-tag
-(1,25): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <b>
-|       "A"
-|       <cite>
-|         "B"
-|         <div>
-|           "CD"
-
-#data
-<b>A<cite>B<div>C</b>D
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,21): adoption-agency-1.3
-(1,22): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <b>
-|       "A"
-|       <cite>
-|         "B"
-|     <div>
-|       <b>
-|         "C"
-|       "D"
-
-#data
-
-#errors
-(1,0): expected-doctype-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-
-#data
-<DIV>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,5): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-
-#data
-<DIV> abc
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,9): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       " abc"
-
-#data
-<DIV> abc <B>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,13): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       " abc "
-|       <b>
-
-#data
-<DIV> abc <B> def
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,17): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       " abc "
-|       <b>
-|         " def"
-
-#data
-<DIV> abc <B> def <I>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,21): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       " abc "
-|       <b>
-|         " def "
-|         <i>
-
-#data
-<DIV> abc <B> def <I> ghi
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,25): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       " abc "
-|       <b>
-|         " def "
-|         <i>
-|           " ghi"
-
-#data
-<DIV> abc <B> def <I> ghi <P>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,29): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       " abc "
-|       <b>
-|         " def "
-|         <i>
-|           " ghi "
-|           <p>
-
-#data
-<DIV> abc <B> def <I> ghi <P> jkl
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,33): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       " abc "
-|       <b>
-|         " def "
-|         <i>
-|           " ghi "
-|           <p>
-|             " jkl"
-
-#data
-<DIV> abc <B> def <I> ghi <P> jkl </B>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,38): adoption-agency-1.3
-(1,38): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       " abc "
-|       <b>
-|         " def "
-|         <i>
-|           " ghi "
-|       <i>
-|         <p>
-|           <b>
-|             " jkl "
-
-#data
-<DIV> abc <B> def <I> ghi <P> jkl </B> mno
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,38): adoption-agency-1.3
-(1,42): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       " abc "
-|       <b>
-|         " def "
-|         <i>
-|           " ghi "
-|       <i>
-|         <p>
-|           <b>
-|             " jkl "
-|           " mno"
-
-#data
-<DIV> abc <B> def <I> ghi <P> jkl </B> mno </I>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,38): adoption-agency-1.3
-(1,47): adoption-agency-1.3
-(1,47): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       " abc "
-|       <b>
-|         " def "
-|         <i>
-|           " ghi "
-|       <i>
-|       <p>
-|         <i>
-|           <b>
-|             " jkl "
-|           " mno "
-
-#data
-<DIV> abc <B> def <I> ghi <P> jkl </B> mno </I> pqr
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,38): adoption-agency-1.3
-(1,47): adoption-agency-1.3
-(1,51): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       " abc "
-|       <b>
-|         " def "
-|         <i>
-|           " ghi "
-|       <i>
-|       <p>
-|         <i>
-|           <b>
-|             " jkl "
-|           " mno "
-|         " pqr"
-
-#data
-<DIV> abc <B> def <I> ghi <P> jkl </B> mno </I> pqr </P>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,38): adoption-agency-1.3
-(1,47): adoption-agency-1.3
-(1,56): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       " abc "
-|       <b>
-|         " def "
-|         <i>
-|           " ghi "
-|       <i>
-|       <p>
-|         <i>
-|           <b>
-|             " jkl "
-|           " mno "
-|         " pqr "
-
-#data
-<DIV> abc <B> def <I> ghi <P> jkl </B> mno </I> pqr </P> stu
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,38): adoption-agency-1.3
-(1,47): adoption-agency-1.3
-(1,60): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       " abc "
-|       <b>
-|         " def "
-|         <i>
-|           " ghi "
-|       <i>
-|       <p>
-|         <i>
-|           <b>
-|             " jkl "
-|           " mno "
-|         " pqr "
-|       " stu"
-
-#data
-<test attribute---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------->
-#errors
-(1,1040): expected-doctype-but-got-start-tag
-(1,1040): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <test>
-|       attribute----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------=""
-
-#data
-<a href="blah">aba<table><a href="foo">br<tr><td></td></tr>x</table>aoe
-#errors
-(1,15): expected-doctype-but-got-start-tag
-(1,39): unexpected-start-tag-implies-table-voodoo
-(1,39): unexpected-start-tag-implies-end-tag
-(1,39): unexpected-end-tag
-(1,45): foster-parenting-character-in-table
-(1,45): foster-parenting-character-in-table
-(1,68): foster-parenting-character-in-table
-(1,71): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <a>
-|       href="blah"
-|       "aba"
-|       <a>
-|         href="foo"
-|         "br"
-|       <a>
-|         href="foo"
-|         "x"
-|       <table>
-|         <tbody>
-|           <tr>
-|             <td>
-|     <a>
-|       href="foo"
-|       "aoe"
-
-#data
-<a href="blah">aba<table><tr><td><a href="foo">br</td></tr>x</table>aoe
-#errors
-(1,15): expected-doctype-but-got-start-tag
-(1,54): unexpected-cell-end-tag
-(1,68): unexpected text in table
-(1,71): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <a>
-|       href="blah"
-|       "abax"
-|       <table>
-|         <tbody>
-|           <tr>
-|             <td>
-|               <a>
-|                 href="foo"
-|                 "br"
-|       "aoe"
-
-#data
-<table><a href="blah">aba<tr><td><a href="foo">br</td></tr>x</table>aoe
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,22): unexpected-start-tag-implies-table-voodoo
-(1,29): foster-parenting-character-in-table
-(1,29): foster-parenting-character-in-table
-(1,29): foster-parenting-character-in-table
-(1,54): unexpected-cell-end-tag
-(1,68): foster-parenting-character-in-table
-(1,71): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <a>
-|       href="blah"
-|       "aba"
-|     <a>
-|       href="blah"
-|       "x"
-|     <table>
-|       <tbody>
-|         <tr>
-|           <td>
-|             <a>
-|               href="foo"
-|               "br"
-|     <a>
-|       href="blah"
-|       "aoe"
-
-#data
-<a href=a>aa<marquee>aa<a href=b>bb</marquee>aa
-#errors
-(1,10): expected-doctype-but-got-start-tag
-(1,45): end-tag-too-early
-(1,47): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <a>
-|       href="a"
-|       "aa"
-|       <marquee>
-|         "aa"
-|         <a>
-|           href="b"
-|           "bb"
-|       "aa"
-
-#data
-<wbr><strike><code></strike><code><strike></code>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,28): adoption-agency-1.3
-(1,49): adoption-agency-1.3
-(1,49): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <wbr>
-|     <strike>
-|       <code>
-|     <code>
-|       <code>
-|         <strike>
-
-#data
-<!DOCTYPE html><spacer>foo
-#errors
-(1,26): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <spacer>
-|       "foo"
-
-#data
-<title><meta></title><link><title><meta></title>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|     <title>
-|       "<meta>"
-|     <link>
-|     <title>
-|       "<meta>"
-|   <body>
-
-#data
-<style><!--</style><meta><script>--><link></script>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|     <style>
-|       "<!--"
-|     <meta>
-|     <script>
-|       "--><link>"
-|   <body>
-
-#data
-<head><meta></head><link>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-(1,25): unexpected-start-tag-out-of-my-head
-#document
-| <html>
-|   <head>
-|     <meta>
-|     <link>
-|   <body>
-
-#data
-<table><tr><tr><td><td><span><th><span>X</table>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,33): unexpected-cell-end-tag
-(1,48): unexpected-cell-end-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-|         <tr>
-|         <tr>
-|           <td>
-|           <td>
-|             <span>
-|           <th>
-|             <span>
-|               "X"
-
-#data
-<body><body><base><link><meta><title><p></title><body><p></body>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-(1,12): unexpected-start-tag
-(1,54): unexpected-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <base>
-|     <link>
-|     <meta>
-|     <title>
-|       "<p>"
-|     <p>
-
-#data
-<textarea><p></textarea>
-#errors
-(1,10): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <textarea>
-|       "<p>"
-
-#data
-<p><image></p>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,10): unexpected-start-tag-treated-as
-#document
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <img>
-
-#data
-<a><table><a></table><p><a><div><a>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,13): unexpected-start-tag-implies-table-voodoo
-(1,13): unexpected-start-tag-implies-end-tag
-(1,13): adoption-agency-1.3
-(1,27): unexpected-start-tag-implies-end-tag
-(1,27): adoption-agency-1.2
-(1,32): unexpected-end-tag
-(1,35): unexpected-start-tag-implies-end-tag
-(1,35): adoption-agency-1.2
-(1,35): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <a>
-|       <a>
-|       <table>
-|     <p>
-|       <a>
-|     <div>
-|       <a>
-
-#data
-<head></p><meta><p>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-(1,10): unexpected-end-tag
-#document
-| <html>
-|   <head>
-|     <meta>
-|   <body>
-|     <p>
-
-#data
-<head></html><meta><p>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-(1,19): expected-eof-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <meta>
-|     <p>
-
-#data
-<b><table><td><i></table>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,14): unexpected-cell-in-table-body
-(1,25): unexpected-cell-end-tag
-(1,25): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <b>
-|       <table>
-|         <tbody>
-|           <tr>
-|             <td>
-|               <i>
-
-#data
-<b><table><td></b><i></table>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,14): unexpected-cell-in-table-body
-(1,18): unexpected-end-tag
-(1,29): unexpected-cell-end-tag
-(1,29): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <b>
-|       <table>
-|         <tbody>
-|           <tr>
-|             <td>
-|               <i>
-
-#data
-<h1><h2>
-#errors
-(1,4): expected-doctype-but-got-start-tag
-(1,8): unexpected-start-tag
-(1,8): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <h1>
-|     <h2>
-
-#data
-<a><p><a></a></p></a>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,9): unexpected-start-tag-implies-end-tag
-(1,9): adoption-agency-1.3
-(1,21): unexpected-end-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <a>
-|     <p>
-|       <a>
-|       <a>
-
-#data
-<b><button></b></button></b>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,15): adoption-agency-1.3
-(1,28): unexpected-end-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <b>
-|     <button>
-|       <b>
-
-#data
-<p><b><div><marquee></p></b></div>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,11): unexpected-end-tag
-(1,24): unexpected-end-tag
-(1,28): unexpected-end-tag
-(1,34): end-tag-too-early
-(1,34): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <b>
-|     <div>
-|       <b>
-|         <marquee>
-|           <p>
-
-#data
-<script></script></div><title></title><p><p>
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,23): unexpected-end-tag
-#document
-| <html>
-|   <head>
-|     <script>
-|     <title>
-|   <body>
-|     <p>
-|     <p>
-
-#data
-<p><hr></p>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,11): unexpected-end-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|     <hr>
-|     <p>
-
-#data
-<select><b><option><select><option></b></select>
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,11): unexpected-start-tag-in-select
-(1,27): unexpected-select-in-select
-(1,39): unexpected-end-tag
-(1,48): unexpected-end-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <select>
-|       <option>
-|     <option>
-
-#data
-<html><head><title></title><body></body></html>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|     <title>
-|   <body>
-
-#data
-<a><table><td><a><table></table><a></tr><a></table><a>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,14): unexpected-cell-in-table-body
-(1,35): unexpected-start-tag-implies-end-tag
-(1,40): unexpected-cell-end-tag
-(1,43): unexpected-start-tag-implies-table-voodoo
-(1,43): unexpected-start-tag-implies-end-tag
-(1,43): unexpected-end-tag
-(1,54): unexpected-start-tag-implies-end-tag
-(1,54): adoption-agency-1.2
-(1,54): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <a>
-|       <a>
-|       <table>
-|         <tbody>
-|           <tr>
-|             <td>
-|               <a>
-|                 <table>
-|               <a>
-|     <a>
-
-#data
-<ul><li></li><div><li></div><li><li><div><li><address><li><b><em></b><li></ul>
-#errors
-(1,4): expected-doctype-but-got-start-tag
-(1,45): end-tag-too-early
-(1,58): end-tag-too-early
-(1,69): adoption-agency-1.3
-#document
-| <html>
-|   <head>
-|   <body>
-|     <ul>
-|       <li>
-|       <div>
-|         <li>
-|       <li>
-|       <li>
-|         <div>
-|       <li>
-|         <address>
-|       <li>
-|         <b>
-|           <em>
-|       <li>
-
-#data
-<ul><li><ul></li><li>a</li></ul></li></ul>
-#errors
-(1,4): expected-doctype-but-got-start-tag
-(1,17): unexpected-end-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <ul>
-|       <li>
-|         <ul>
-|           <li>
-|             "a"
-
-#data
-<frameset><frame><frameset><frame></frameset><noframes></noframes></frameset>
-#errors
-(1,10): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <frameset>
-|     <frame>
-|     <frameset>
-|       <frame>
-|     <noframes>
-
-#data
-<h1><table><td><h3></table><h3></h1>
-#errors
-(1,4): expected-doctype-but-got-start-tag
-(1,15): unexpected-cell-in-table-body
-(1,27): unexpected-cell-end-tag
-(1,31): unexpected-start-tag
-(1,36): end-tag-too-early
-#document
-| <html>
-|   <head>
-|   <body>
-|     <h1>
-|       <table>
-|         <tbody>
-|           <tr>
-|             <td>
-|               <h3>
-|     <h3>
-
-#data
-<table><colgroup><col><colgroup><col><col><col><colgroup><col><col><thead><tr><td></table>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <colgroup>
-|         <col>
-|       <colgroup>
-|         <col>
-|         <col>
-|         <col>
-|       <colgroup>
-|         <col>
-|         <col>
-|       <thead>
-|         <tr>
-|           <td>
-
-#data
-<table><col><tbody><col><tr><col><td><col></table><col>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,37): unexpected-cell-in-table-body
-(1,55): unexpected-start-tag-ignored
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <colgroup>
-|         <col>
-|       <tbody>
-|       <colgroup>
-|         <col>
-|       <tbody>
-|         <tr>
-|       <colgroup>
-|         <col>
-|       <tbody>
-|         <tr>
-|           <td>
-|       <colgroup>
-|         <col>
-
-#data
-<table><colgroup><tbody><colgroup><tr><colgroup><td><colgroup></table><colgroup>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,52): unexpected-cell-in-table-body
-(1,80): unexpected-start-tag-ignored
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <colgroup>
-|       <tbody>
-|       <colgroup>
-|       <tbody>
-|         <tr>
-|       <colgroup>
-|       <tbody>
-|         <tr>
-|           <td>
-|       <colgroup>
-
-#data
-</strong></b></em></i></u></strike></s></blink></tt></pre></big></small></font></select></h1></h2></h3></h4></h5></h6></body></br></a></img></title></span></style></script></table></th></td></tr></frame></area></link></param></hr></input></col></base></meta></basefont></bgsound></embed></spacer></p></dd></dt></caption></colgroup></tbody></tfoot></thead></address></blockquote></center></dir></div></dl></fieldset></listing></menu></ol></ul></li></nobr></wbr></form></button></marquee></object></html></frameset></head></iframe></image></isindex></noembed></noframes></noscript></optgroup></option></plaintext></textarea>
-#errors
-(1,9): expected-doctype-but-got-end-tag
-(1,9): unexpected-end-tag-before-html
-(1,13): unexpected-end-tag-before-html
-(1,18): unexpected-end-tag-before-html
-(1,22): unexpected-end-tag-before-html
-(1,26): unexpected-end-tag-before-html
-(1,35): unexpected-end-tag-before-html
-(1,39): unexpected-end-tag-before-html
-(1,47): unexpected-end-tag-before-html
-(1,52): unexpected-end-tag-before-html
-(1,58): unexpected-end-tag-before-html
-(1,64): unexpected-end-tag-before-html
-(1,72): unexpected-end-tag-before-html
-(1,79): unexpected-end-tag-before-html
-(1,88): unexpected-end-tag-before-html
-(1,93): unexpected-end-tag-before-html
-(1,98): unexpected-end-tag-before-html
-(1,103): unexpected-end-tag-before-html
-(1,108): unexpected-end-tag-before-html
-(1,113): unexpected-end-tag-before-html
-(1,118): unexpected-end-tag-before-html
-(1,130): unexpected-end-tag-after-body
-(1,130): unexpected-end-tag-treated-as
-(1,134): unexpected-end-tag
-(1,140): unexpected-end-tag
-(1,148): unexpected-end-tag
-(1,155): unexpected-end-tag
-(1,163): unexpected-end-tag
-(1,172): unexpected-end-tag
-(1,180): unexpected-end-tag
-(1,185): unexpected-end-tag
-(1,190): unexpected-end-tag
-(1,195): unexpected-end-tag
-(1,203): unexpected-end-tag
-(1,210): unexpected-end-tag
-(1,217): unexpected-end-tag
-(1,225): unexpected-end-tag
-(1,230): unexpected-end-tag
-(1,238): unexpected-end-tag
-(1,244): unexpected-end-tag
-(1,251): unexpected-end-tag
-(1,258): unexpected-end-tag
-(1,269): unexpected-end-tag
-(1,279): unexpected-end-tag
-(1,287): unexpected-end-tag
-(1,296): unexpected-end-tag
-(1,300): unexpected-end-tag
-(1,305): unexpected-end-tag
-(1,310): unexpected-end-tag
-(1,320): unexpected-end-tag
-(1,331): unexpected-end-tag
-(1,339): unexpected-end-tag
-(1,347): unexpected-end-tag
-(1,355): unexpected-end-tag
-(1,365): end-tag-too-early
-(1,378): end-tag-too-early
-(1,387): end-tag-too-early
-(1,393): end-tag-too-early
-(1,399): end-tag-too-early
-(1,404): end-tag-too-early
-(1,415): end-tag-too-early
-(1,425): end-tag-too-early
-(1,432): end-tag-too-early
-(1,437): end-tag-too-early
-(1,442): end-tag-too-early
-(1,447): unexpected-end-tag
-(1,454): unexpected-end-tag
-(1,460): unexpected-end-tag
-(1,467): unexpected-end-tag
-(1,476): end-tag-too-early
-(1,486): end-tag-too-early
-(1,495): end-tag-too-early
-(1,513): expected-eof-but-got-end-tag
-(1,513): unexpected-end-tag
-(1,520): unexpected-end-tag
-(1,529): unexpected-end-tag
-(1,537): unexpected-end-tag
-(1,547): unexpected-end-tag
-(1,557): unexpected-end-tag
-(1,568): unexpected-end-tag
-(1,579): unexpected-end-tag
-(1,590): unexpected-end-tag
-(1,599): unexpected-end-tag
-(1,611): unexpected-end-tag
-(1,622): unexpected-end-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <br>
-|     <p>
-
-#data
-<table><tr></strong></b></em></i></u></strike></s></blink></tt></pre></big></small></font></select></h1></h2></h3></h4></h5></h6></body></br></a></img></title></span></style></script></table></th></td></tr></frame></area></link></param></hr></input></col></base></meta></basefont></bgsound></embed></spacer></p></dd></dt></caption></colgroup></tbody></tfoot></thead></address></blockquote></center></dir></div></dl></fieldset></listing></menu></ol></ul></li></nobr></wbr></form></button></marquee></object></html></frameset></head></iframe></image></isindex></noembed></noframes></noscript></optgroup></option></plaintext></textarea>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,20): unexpected-end-tag-implies-table-voodoo
-(1,20): unexpected-end-tag
-(1,24): unexpected-end-tag-implies-table-voodoo
-(1,24): unexpected-end-tag
-(1,29): unexpected-end-tag-implies-table-voodoo
-(1,29): unexpected-end-tag
-(1,33): unexpected-end-tag-implies-table-voodoo
-(1,33): unexpected-end-tag
-(1,37): unexpected-end-tag-implies-table-voodoo
-(1,37): unexpected-end-tag
-(1,46): unexpected-end-tag-implies-table-voodoo
-(1,46): unexpected-end-tag
-(1,50): unexpected-end-tag-implies-table-voodoo
-(1,50): unexpected-end-tag
-(1,58): unexpected-end-tag-implies-table-voodoo
-(1,58): unexpected-end-tag
-(1,63): unexpected-end-tag-implies-table-voodoo
-(1,63): unexpected-end-tag
-(1,69): unexpected-end-tag-implies-table-voodoo
-(1,69): end-tag-too-early
-(1,75): unexpected-end-tag-implies-table-voodoo
-(1,75): unexpected-end-tag
-(1,83): unexpected-end-tag-implies-table-voodoo
-(1,83): unexpected-end-tag
-(1,90): unexpected-end-tag-implies-table-voodoo
-(1,90): unexpected-end-tag
-(1,99): unexpected-end-tag-implies-table-voodoo
-(1,99): unexpected-end-tag
-(1,104): unexpected-end-tag-implies-table-voodoo
-(1,104): end-tag-too-early
-(1,109): unexpected-end-tag-implies-table-voodoo
-(1,109): end-tag-too-early
-(1,114): unexpected-end-tag-implies-table-voodoo
-(1,114): end-tag-too-early
-(1,119): unexpected-end-tag-implies-table-voodoo
-(1,119): end-tag-too-early
-(1,124): unexpected-end-tag-implies-table-voodoo
-(1,124): end-tag-too-early
-(1,129): unexpected-end-tag-implies-table-voodoo
-(1,129): end-tag-too-early
-(1,136): unexpected-end-tag-in-table-row
-(1,141): unexpected-end-tag-implies-table-voodoo
-(1,141): unexpected-end-tag-treated-as
-(1,145): unexpected-end-tag-implies-table-voodoo
-(1,145): unexpected-end-tag
-(1,151): unexpected-end-tag-implies-table-voodoo
-(1,151): unexpected-end-tag
-(1,159): unexpected-end-tag-implies-table-voodoo
-(1,159): unexpected-end-tag
-(1,166): unexpected-end-tag-implies-table-voodoo
-(1,166): unexpected-end-tag
-(1,174): unexpected-end-tag-implies-table-voodoo
-(1,174): unexpected-end-tag
-(1,183): unexpected-end-tag-implies-table-voodoo
-(1,183): unexpected-end-tag
-(1,196): unexpected-end-tag
-(1,201): unexpected-end-tag
-(1,206): unexpected-end-tag
-(1,214): unexpected-end-tag
-(1,221): unexpected-end-tag
-(1,228): unexpected-end-tag
-(1,236): unexpected-end-tag
-(1,241): unexpected-end-tag
-(1,249): unexpected-end-tag
-(1,255): unexpected-end-tag
-(1,262): unexpected-end-tag
-(1,269): unexpected-end-tag
-(1,280): unexpected-end-tag
-(1,290): unexpected-end-tag
-(1,298): unexpected-end-tag
-(1,307): unexpected-end-tag
-(1,311): unexpected-end-tag
-(1,316): unexpected-end-tag
-(1,321): unexpected-end-tag
-(1,331): unexpected-end-tag
-(1,342): unexpected-end-tag
-(1,350): unexpected-end-tag
-(1,358): unexpected-end-tag
-(1,366): unexpected-end-tag
-(1,376): end-tag-too-early
-(1,389): end-tag-too-early
-(1,398): end-tag-too-early
-(1,404): end-tag-too-early
-(1,410): end-tag-too-early
-(1,415): end-tag-too-early
-(1,426): end-tag-too-early
-(1,436): end-tag-too-early
-(1,443): end-tag-too-early
-(1,448): end-tag-too-early
-(1,453): end-tag-too-early
-(1,458): unexpected-end-tag
-(1,465): unexpected-end-tag
-(1,471): unexpected-end-tag
-(1,478): unexpected-end-tag
-(1,487): end-tag-too-early
-(1,497): end-tag-too-early
-(1,506): end-tag-too-early
-(1,524): expected-eof-but-got-end-tag
-(1,524): unexpected-end-tag
-(1,531): unexpected-end-tag
-(1,540): unexpected-end-tag
-(1,548): unexpected-end-tag
-(1,558): unexpected-end-tag
-(1,568): unexpected-end-tag
-(1,579): unexpected-end-tag
-(1,590): unexpected-end-tag
-(1,601): unexpected-end-tag
-(1,610): unexpected-end-tag
-(1,622): unexpected-end-tag
-(1,633): unexpected-end-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <br>
-|     <table>
-|       <tbody>
-|         <tr>
-|     <p>
-
-#data
-<frameset>
-#errors
-(1,10): expected-doctype-but-got-start-tag
-(1,10): eof-in-frameset
-#document
-| <html>
-|   <head>
-|   <frameset>
diff --git a/html/testdata/webkit/tests10.dat b/html/testdata/webkit/tests10.dat
deleted file mode 100644 (file)
index f84e2d5..0000000
+++ /dev/null
@@ -1,849 +0,0 @@
-#data
-<!DOCTYPE html><svg></svg>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-
-#data
-<!DOCTYPE html><svg></svg><![CDATA[a]]>
-#errors
-(1,28) expected-dashes-or-doctype
-#new-errors
-(1:35) cdata-in-html-content
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|     <!-- [CDATA[a]] -->
-
-#data
-<!DOCTYPE html><body><svg></svg>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-
-#data
-<!DOCTYPE html><body><select><svg></svg></select>
-#errors
-(1,34) unexpected-start-tag-in-select
-(1,40) unexpected-end-tag-in-select
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <select>
-
-#data
-<!DOCTYPE html><body><select><option><svg></svg></option></select>
-#errors
-(1,42) unexpected-start-tag-in-select
-(1,48) unexpected-end-tag-in-select
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <select>
-|       <option>
-
-#data
-<!DOCTYPE html><body><table><svg></svg></table>
-#errors
-(1,33) foster-parenting-start-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|     <table>
-
-#data
-<!DOCTYPE html><body><table><svg><g>foo</g></svg></table>
-#errors
-(1,33) foster-parenting-start-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       <svg g>
-|         "foo"
-|     <table>
-
-#data
-<!DOCTYPE html><body><table><svg><g>foo</g><g>bar</g></svg></table>
-#errors
-(1,33) foster-parenting-start-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       <svg g>
-|         "foo"
-|       <svg g>
-|         "bar"
-|     <table>
-
-#data
-<!DOCTYPE html><body><table><tbody><svg><g>foo</g><g>bar</g></svg></tbody></table>
-#errors
-(1,40) foster-parenting-start-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       <svg g>
-|         "foo"
-|       <svg g>
-|         "bar"
-|     <table>
-|       <tbody>
-
-#data
-<!DOCTYPE html><body><table><tbody><tr><svg><g>foo</g><g>bar</g></svg></tr></tbody></table>
-#errors
-(1,44) foster-parenting-start-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       <svg g>
-|         "foo"
-|       <svg g>
-|         "bar"
-|     <table>
-|       <tbody>
-|         <tr>
-
-#data
-<!DOCTYPE html><body><table><tbody><tr><td><svg><g>foo</g><g>bar</g></svg></td></tr></tbody></table>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-|         <tr>
-|           <td>
-|             <svg svg>
-|               <svg g>
-|                 "foo"
-|               <svg g>
-|                 "bar"
-
-#data
-<!DOCTYPE html><body><table><tbody><tr><td><svg><g>foo</g><g>bar</g></svg><p>baz</td></tr></tbody></table>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-|         <tr>
-|           <td>
-|             <svg svg>
-|               <svg g>
-|                 "foo"
-|               <svg g>
-|                 "bar"
-|             <p>
-|               "baz"
-
-#data
-<!DOCTYPE html><body><table><caption><svg><g>foo</g><g>bar</g></svg><p>baz</caption></table>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <caption>
-|         <svg svg>
-|           <svg g>
-|             "foo"
-|           <svg g>
-|             "bar"
-|         <p>
-|           "baz"
-
-#data
-<!DOCTYPE html><body><table><caption><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
-#errors
-(1,65) unexpected-html-element-in-foreign-content
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <caption>
-|         <svg svg>
-|           <svg g>
-|             "foo"
-|           <svg g>
-|             "bar"
-|         <p>
-|           "baz"
-|     <p>
-|       "quux"
-
-#data
-<!DOCTYPE html><body><table><caption><svg><g>foo</g><g>bar</g>baz</table><p>quux
-#errors
-(1,73) unexpected-end-tag
-(1,73) expected-one-end-tag-but-got-another
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <caption>
-|         <svg svg>
-|           <svg g>
-|             "foo"
-|           <svg g>
-|             "bar"
-|           "baz"
-|     <p>
-|       "quux"
-
-#data
-<!DOCTYPE html><body><table><colgroup><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
-#errors
-(1,43) foster-parenting-start-tag svg
-(1,66) unexpected HTML-like start tag token in foreign content
-(1,66) foster-parenting-start-tag
-(1,67) foster-parenting-character
-(1,68) foster-parenting-character
-(1,69) foster-parenting-character
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       <svg g>
-|         "foo"
-|       <svg g>
-|         "bar"
-|     <p>
-|       "baz"
-|     <table>
-|       <colgroup>
-|     <p>
-|       "quux"
-
-#data
-<!DOCTYPE html><body><table><tr><td><select><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
-#errors
-(1,49) unexpected-start-tag-in-select
-(1,52) unexpected-start-tag-in-select
-(1,59) unexpected-end-tag-in-select
-(1,62) unexpected-start-tag-in-select
-(1,69) unexpected-end-tag-in-select
-(1,72) unexpected-start-tag-in-select
-(1,83) unexpected-table-element-end-tag-in-select-in-table
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-|         <tr>
-|           <td>
-|             <select>
-|               "foobarbaz"
-|     <p>
-|       "quux"
-
-#data
-<!DOCTYPE html><body><table><select><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
-#errors
-(1,36) unexpected-start-tag-implies-table-voodoo
-(1,41) unexpected-start-tag-in-select
-(1,44) unexpected-start-tag-in-select
-(1,51) unexpected-end-tag-in-select
-(1,54) unexpected-start-tag-in-select
-(1,61) unexpected-end-tag-in-select
-(1,64) unexpected-start-tag-in-select
-(1,75) unexpected-table-element-end-tag-in-select-in-table
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <select>
-|       "foobarbaz"
-|     <table>
-|     <p>
-|       "quux"
-
-#data
-<!DOCTYPE html><body></body></html><svg><g>foo</g><g>bar</g><p>baz
-#errors
-(1,40) expected-eof-but-got-start-tag
-(1,63) unexpected-html-element-in-foreign-content
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       <svg g>
-|         "foo"
-|       <svg g>
-|         "bar"
-|     <p>
-|       "baz"
-
-#data
-<!DOCTYPE html><body></body><svg><g>foo</g><g>bar</g><p>baz
-#errors
-(1,33) unexpected-start-tag-after-body
-(1,56) unexpected-html-element-in-foreign-content
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       <svg g>
-|         "foo"
-|       <svg g>
-|         "bar"
-|     <p>
-|       "baz"
-
-#data
-<!DOCTYPE html><frameset><svg><g></g><g></g><p><span>
-#errors
-(1,30) unexpected-start-tag-in-frameset
-(1,33) unexpected-start-tag-in-frameset
-(1,37) unexpected-end-tag-in-frameset
-(1,40) unexpected-start-tag-in-frameset
-(1,44) unexpected-end-tag-in-frameset
-(1,47) unexpected-start-tag-in-frameset
-(1,53) unexpected-start-tag-in-frameset
-(1,53) eof-in-frameset
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <frameset>
-
-#data
-<!DOCTYPE html><frameset></frameset><svg><g></g><g></g><p><span>
-#errors
-(1,41) unexpected-start-tag-after-frameset
-(1,44) unexpected-start-tag-after-frameset
-(1,48) unexpected-end-tag-after-frameset
-(1,51) unexpected-start-tag-after-frameset
-(1,55) unexpected-end-tag-after-frameset
-(1,58) unexpected-start-tag-after-frameset
-(1,64) unexpected-start-tag-after-frameset
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <frameset>
-
-#data
-<!DOCTYPE html><body xlink:href=foo><svg xlink:href=foo></svg>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     xlink:href="foo"
-|     <svg svg>
-|       xlink href="foo"
-
-#data
-<!DOCTYPE html><body xlink:href=foo xml:lang=en><svg><g xml:lang=en xlink:href=foo></g></svg>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     xlink:href="foo"
-|     xml:lang="en"
-|     <svg svg>
-|       <svg g>
-|         xlink href="foo"
-|         xml lang="en"
-
-#data
-<!DOCTYPE html><body xlink:href=foo xml:lang=en><svg><g xml:lang=en xlink:href=foo /></svg>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     xlink:href="foo"
-|     xml:lang="en"
-|     <svg svg>
-|       <svg g>
-|         xlink href="foo"
-|         xml lang="en"
-
-#data
-<!DOCTYPE html><body xlink:href=foo xml:lang=en><svg><g xml:lang=en xlink:href=foo />bar</svg>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     xlink:href="foo"
-|     xml:lang="en"
-|     <svg svg>
-|       <svg g>
-|         xlink href="foo"
-|         xml lang="en"
-|       "bar"
-
-#data
-<svg></path>
-#errors
-(1,5) expected-doctype-but-got-start-tag
-(1,12) unexpected-end-tag
-(1,12) unexpected-end-tag
-(1,12) expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-
-#data
-<div><svg></div>a
-#errors
-(1,5) expected-doctype-but-got-start-tag
-(1,16) unexpected-end-tag
-(1,16) end-tag-too-early
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       <svg svg>
-|     "a"
-
-#data
-<div><svg><path></div>a
-#errors
-(1,5) expected-doctype-but-got-start-tag
-(1,22) unexpected-end-tag
-(1,22) end-tag-too-early
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       <svg svg>
-|         <svg path>
-|     "a"
-
-#data
-<div><svg><path></svg><path>
-#errors
-(1,5) expected-doctype-but-got-start-tag
-(1,22) unexpected-end-tag
-(1,28) expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       <svg svg>
-|         <svg path>
-|       <path>
-
-#data
-<div><svg><path><foreignObject><math></div>a
-#errors
-(1,5) expected-doctype-but-got-start-tag
-(1,43) unexpected-end-tag
-(1,43) end-tag-too-early
-(1,44) expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       <svg svg>
-|         <svg path>
-|           <svg foreignObject>
-|             <math math>
-|               "a"
-
-#data
-<div><svg><path><foreignObject><p></div>a
-#errors
-(1,5) expected-doctype-but-got-start-tag
-(1,40) end-tag-too-early
-(1,41) expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       <svg svg>
-|         <svg path>
-|           <svg foreignObject>
-|             <p>
-|               "a"
-
-#data
-<!DOCTYPE html><svg><desc><div><svg><ul>a
-#errors
-(1,40) unexpected-html-element-in-foreign-content
-(1,41) expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       <svg desc>
-|         <div>
-|           <svg svg>
-|           <ul>
-|             "a"
-
-#data
-<!DOCTYPE html><svg><desc><svg><ul>a
-#errors
-(1,35) unexpected-html-element-in-foreign-content
-(1,36) expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       <svg desc>
-|         <svg svg>
-|         <ul>
-|           "a"
-
-#data
-<!DOCTYPE html><p><svg><desc><p>
-#errors
-(1,32) expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <svg svg>
-|         <svg desc>
-|           <p>
-
-#data
-<!DOCTYPE html><p><svg><title><p>
-#errors
-(1,33) expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <svg svg>
-|         <svg title>
-|           <p>
-
-#data
-<div><svg><path><foreignObject><p></foreignObject><p>
-#errors
-(1,5) expected-doctype-but-got-start-tag
-(1,50) unexpected-end-tag
-(1,53) expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       <svg svg>
-|         <svg path>
-|           <svg foreignObject>
-|             <p>
-|             <p>
-
-#data
-<math><mi><div><object><div><span></span></div></object></div></mi><mi>
-#errors
-(1,6) expected-doctype-but-got-start-tag
-(1,71) expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       <math mi>
-|         <div>
-|           <object>
-|             <div>
-|               <span>
-|       <math mi>
-
-#data
-<math><mi><svg><foreignObject><div><div></div></div></foreignObject></svg></mi><mi>
-#errors
-(1,6) expected-doctype-but-got-start-tag
-(1,83) expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       <math mi>
-|         <svg svg>
-|           <svg foreignObject>
-|             <div>
-|               <div>
-|       <math mi>
-
-#data
-<svg><script></script><path>
-#errors
-(1,5) expected-doctype-but-got-start-tag
-(1,28) expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       <svg script>
-|       <svg path>
-
-#data
-<table><svg></svg><tr>
-#errors
-(1,7) expected-doctype-but-got-start-tag
-(1,12) unexpected-start-tag-implies-table-voodoo
-(1,22) eof-in-table
-#document
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|     <table>
-|       <tbody>
-|         <tr>
-
-#data
-<math><mi><mglyph>
-#errors
-(1,6) expected-doctype-but-got-start-tag
-(1,18) expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       <math mi>
-|         <math mglyph>
-
-#data
-<math><mi><malignmark>
-#errors
-(1,6) expected-doctype-but-got-start-tag
-(1,22) expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       <math mi>
-|         <math malignmark>
-
-#data
-<math><mo><mglyph>
-#errors
-(1,6) expected-doctype-but-got-start-tag
-(1,18) expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       <math mo>
-|         <math mglyph>
-
-#data
-<math><mo><malignmark>
-#errors
-(1,6) expected-doctype-but-got-start-tag
-(1,22) expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       <math mo>
-|         <math malignmark>
-
-#data
-<math><mn><mglyph>
-#errors
-(1,6) expected-doctype-but-got-start-tag
-(1,18) expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       <math mn>
-|         <math mglyph>
-
-#data
-<math><mn><malignmark>
-#errors
-(1,6) expected-doctype-but-got-start-tag
-(1,22) expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       <math mn>
-|         <math malignmark>
-
-#data
-<math><ms><mglyph>
-#errors
-(1,6) expected-doctype-but-got-start-tag
-(1,18) expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       <math ms>
-|         <math mglyph>
-
-#data
-<math><ms><malignmark>
-#errors
-(1,6) expected-doctype-but-got-start-tag
-(1,22) expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       <math ms>
-|         <math malignmark>
-
-#data
-<math><mtext><mglyph>
-#errors
-(1,6) expected-doctype-but-got-start-tag
-(1,21) expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       <math mtext>
-|         <math mglyph>
-
-#data
-<math><mtext><malignmark>
-#errors
-(1,6) expected-doctype-but-got-start-tag
-(1,25) expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       <math mtext>
-|         <math malignmark>
-
-#data
-<math><annotation-xml><svg></svg></annotation-xml><mi>
-#errors
-(1,6) expected-doctype-but-got-start-tag
-(1,54) expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       <math annotation-xml>
-|         <svg svg>
-|       <math mi>
-
-#data
-<math><annotation-xml><svg><foreignObject><div><math><mi></mi></math><span></span></div></foreignObject><path></path></svg></annotation-xml><mi>
-#errors
-(1,6) expected-doctype-but-got-start-tag
-(1,144) expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       <math annotation-xml>
-|         <svg svg>
-|           <svg foreignObject>
-|             <div>
-|               <math math>
-|                 <math mi>
-|               <span>
-|           <svg path>
-|       <math mi>
-
-#data
-<math><annotation-xml><svg><foreignObject><math><mi><svg></svg></mi><mo></mo></math><span></span></foreignObject><path></path></svg></annotation-xml><mi>
-#errors
-(1,6) expected-doctype-but-got-start-tag
-(1,153) expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       <math annotation-xml>
-|         <svg svg>
-|           <svg foreignObject>
-|             <math math>
-|               <math mi>
-|                 <svg svg>
-|               <math mo>
-|             <span>
-|           <svg path>
-|       <math mi>
diff --git a/html/testdata/webkit/tests11.dat b/html/testdata/webkit/tests11.dat
deleted file mode 100644 (file)
index b9901e7..0000000
+++ /dev/null
@@ -1,523 +0,0 @@
-#data
-<!DOCTYPE html><body><svg attributeName='' attributeType='' baseFrequency='' baseProfile='' calcMode='' clipPathUnits='' diffuseConstant='' edgeMode='' filterUnits='' glyphRef='' gradientTransform='' gradientUnits='' kernelMatrix='' kernelUnitLength='' keyPoints='' keySplines='' keyTimes='' lengthAdjust='' limitingConeAngle='' markerHeight='' markerUnits='' markerWidth='' maskContentUnits='' maskUnits='' numOctaves='' pathLength='' patternContentUnits='' patternTransform='' patternUnits='' pointsAtX='' pointsAtY='' pointsAtZ='' preserveAlpha='' preserveAspectRatio='' primitiveUnits='' refX='' refY='' repeatCount='' repeatDur='' requiredExtensions='' requiredFeatures='' specularConstant='' specularExponent='' spreadMethod='' startOffset='' stdDeviation='' stitchTiles='' surfaceScale='' systemLanguage='' tableValues='' targetX='' targetY='' textLength='' viewBox='' viewTarget='' xChannelSelector='' yChannelSelector='' zoomAndPan=''></svg>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       attributeName=""
-|       attributeType=""
-|       baseFrequency=""
-|       baseProfile=""
-|       calcMode=""
-|       clipPathUnits=""
-|       diffuseConstant=""
-|       edgeMode=""
-|       filterUnits=""
-|       glyphRef=""
-|       gradientTransform=""
-|       gradientUnits=""
-|       kernelMatrix=""
-|       kernelUnitLength=""
-|       keyPoints=""
-|       keySplines=""
-|       keyTimes=""
-|       lengthAdjust=""
-|       limitingConeAngle=""
-|       markerHeight=""
-|       markerUnits=""
-|       markerWidth=""
-|       maskContentUnits=""
-|       maskUnits=""
-|       numOctaves=""
-|       pathLength=""
-|       patternContentUnits=""
-|       patternTransform=""
-|       patternUnits=""
-|       pointsAtX=""
-|       pointsAtY=""
-|       pointsAtZ=""
-|       preserveAlpha=""
-|       preserveAspectRatio=""
-|       primitiveUnits=""
-|       refX=""
-|       refY=""
-|       repeatCount=""
-|       repeatDur=""
-|       requiredExtensions=""
-|       requiredFeatures=""
-|       specularConstant=""
-|       specularExponent=""
-|       spreadMethod=""
-|       startOffset=""
-|       stdDeviation=""
-|       stitchTiles=""
-|       surfaceScale=""
-|       systemLanguage=""
-|       tableValues=""
-|       targetX=""
-|       targetY=""
-|       textLength=""
-|       viewBox=""
-|       viewTarget=""
-|       xChannelSelector=""
-|       yChannelSelector=""
-|       zoomAndPan=""
-
-#data
-<!DOCTYPE html><BODY><SVG ATTRIBUTENAME='' ATTRIBUTETYPE='' BASEFREQUENCY='' BASEPROFILE='' CALCMODE='' CLIPPATHUNITS='' DIFFUSECONSTANT='' EDGEMODE='' FILTERUNITS='' GLYPHREF='' GRADIENTTRANSFORM='' GRADIENTUNITS='' KERNELMATRIX='' KERNELUNITLENGTH='' KEYPOINTS='' KEYSPLINES='' KEYTIMES='' LENGTHADJUST='' LIMITINGCONEANGLE='' MARKERHEIGHT='' MARKERUNITS='' MARKERWIDTH='' MASKCONTENTUNITS='' MASKUNITS='' NUMOCTAVES='' PATHLENGTH='' PATTERNCONTENTUNITS='' PATTERNTRANSFORM='' PATTERNUNITS='' POINTSATX='' POINTSATY='' POINTSATZ='' PRESERVEALPHA='' PRESERVEASPECTRATIO='' PRIMITIVEUNITS='' REFX='' REFY='' REPEATCOUNT='' REPEATDUR='' REQUIREDEXTENSIONS='' REQUIREDFEATURES='' SPECULARCONSTANT='' SPECULAREXPONENT='' SPREADMETHOD='' STARTOFFSET='' STDDEVIATION='' STITCHTILES='' SURFACESCALE='' SYSTEMLANGUAGE='' TABLEVALUES='' TARGETX='' TARGETY='' TEXTLENGTH='' VIEWBOX='' VIEWTARGET='' XCHANNELSELECTOR='' YCHANNELSELECTOR='' ZOOMANDPAN=''></SVG>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       attributeName=""
-|       attributeType=""
-|       baseFrequency=""
-|       baseProfile=""
-|       calcMode=""
-|       clipPathUnits=""
-|       diffuseConstant=""
-|       edgeMode=""
-|       filterUnits=""
-|       glyphRef=""
-|       gradientTransform=""
-|       gradientUnits=""
-|       kernelMatrix=""
-|       kernelUnitLength=""
-|       keyPoints=""
-|       keySplines=""
-|       keyTimes=""
-|       lengthAdjust=""
-|       limitingConeAngle=""
-|       markerHeight=""
-|       markerUnits=""
-|       markerWidth=""
-|       maskContentUnits=""
-|       maskUnits=""
-|       numOctaves=""
-|       pathLength=""
-|       patternContentUnits=""
-|       patternTransform=""
-|       patternUnits=""
-|       pointsAtX=""
-|       pointsAtY=""
-|       pointsAtZ=""
-|       preserveAlpha=""
-|       preserveAspectRatio=""
-|       primitiveUnits=""
-|       refX=""
-|       refY=""
-|       repeatCount=""
-|       repeatDur=""
-|       requiredExtensions=""
-|       requiredFeatures=""
-|       specularConstant=""
-|       specularExponent=""
-|       spreadMethod=""
-|       startOffset=""
-|       stdDeviation=""
-|       stitchTiles=""
-|       surfaceScale=""
-|       systemLanguage=""
-|       tableValues=""
-|       targetX=""
-|       targetY=""
-|       textLength=""
-|       viewBox=""
-|       viewTarget=""
-|       xChannelSelector=""
-|       yChannelSelector=""
-|       zoomAndPan=""
-
-#data
-<!DOCTYPE html><body><svg attributename='' attributetype='' basefrequency='' baseprofile='' calcmode='' clippathunits='' diffuseconstant='' edgemode='' filterunits='' filterres='' glyphref='' gradienttransform='' gradientunits='' kernelmatrix='' kernelunitlength='' keypoints='' keysplines='' keytimes='' lengthadjust='' limitingconeangle='' markerheight='' markerunits='' markerwidth='' maskcontentunits='' maskunits='' numoctaves='' pathlength='' patterncontentunits='' patterntransform='' patternunits='' pointsatx='' pointsaty='' pointsatz='' preservealpha='' preserveaspectratio='' primitiveunits='' refx='' refy='' repeatcount='' repeatdur='' requiredextensions='' requiredfeatures='' specularconstant='' specularexponent='' spreadmethod='' startoffset='' stddeviation='' stitchtiles='' surfacescale='' systemlanguage='' tablevalues='' targetx='' targety='' textlength='' viewbox='' viewtarget='' xchannelselector='' ychannelselector='' zoomandpan=''></svg>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       attributeName=""
-|       attributeType=""
-|       baseFrequency=""
-|       baseProfile=""
-|       calcMode=""
-|       clipPathUnits=""
-|       diffuseConstant=""
-|       edgeMode=""
-|       filterUnits=""
-|       filterres=""
-|       glyphRef=""
-|       gradientTransform=""
-|       gradientUnits=""
-|       kernelMatrix=""
-|       kernelUnitLength=""
-|       keyPoints=""
-|       keySplines=""
-|       keyTimes=""
-|       lengthAdjust=""
-|       limitingConeAngle=""
-|       markerHeight=""
-|       markerUnits=""
-|       markerWidth=""
-|       maskContentUnits=""
-|       maskUnits=""
-|       numOctaves=""
-|       pathLength=""
-|       patternContentUnits=""
-|       patternTransform=""
-|       patternUnits=""
-|       pointsAtX=""
-|       pointsAtY=""
-|       pointsAtZ=""
-|       preserveAlpha=""
-|       preserveAspectRatio=""
-|       primitiveUnits=""
-|       refX=""
-|       refY=""
-|       repeatCount=""
-|       repeatDur=""
-|       requiredExtensions=""
-|       requiredFeatures=""
-|       specularConstant=""
-|       specularExponent=""
-|       spreadMethod=""
-|       startOffset=""
-|       stdDeviation=""
-|       stitchTiles=""
-|       surfaceScale=""
-|       systemLanguage=""
-|       tableValues=""
-|       targetX=""
-|       targetY=""
-|       textLength=""
-|       viewBox=""
-|       viewTarget=""
-|       xChannelSelector=""
-|       yChannelSelector=""
-|       zoomAndPan=""
-
-#data
-<!DOCTYPE html><body><math attributeName='' attributeType='' baseFrequency='' baseProfile='' calcMode='' clipPathUnits='' diffuseConstant='' edgeMode='' filterUnits='' glyphRef='' gradientTransform='' gradientUnits='' kernelMatrix='' kernelUnitLength='' keyPoints='' keySplines='' keyTimes='' lengthAdjust='' limitingConeAngle='' markerHeight='' markerUnits='' markerWidth='' maskContentUnits='' maskUnits='' numOctaves='' pathLength='' patternContentUnits='' patternTransform='' patternUnits='' pointsAtX='' pointsAtY='' pointsAtZ='' preserveAlpha='' preserveAspectRatio='' primitiveUnits='' refX='' refY='' repeatCount='' repeatDur='' requiredExtensions='' requiredFeatures='' specularConstant='' specularExponent='' spreadMethod='' startOffset='' stdDeviation='' stitchTiles='' surfaceScale='' systemLanguage='' tableValues='' targetX='' targetY='' textLength='' viewBox='' viewTarget='' xChannelSelector='' yChannelSelector='' zoomAndPan=''></math>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       attributename=""
-|       attributetype=""
-|       basefrequency=""
-|       baseprofile=""
-|       calcmode=""
-|       clippathunits=""
-|       diffuseconstant=""
-|       edgemode=""
-|       filterunits=""
-|       glyphref=""
-|       gradienttransform=""
-|       gradientunits=""
-|       kernelmatrix=""
-|       kernelunitlength=""
-|       keypoints=""
-|       keysplines=""
-|       keytimes=""
-|       lengthadjust=""
-|       limitingconeangle=""
-|       markerheight=""
-|       markerunits=""
-|       markerwidth=""
-|       maskcontentunits=""
-|       maskunits=""
-|       numoctaves=""
-|       pathlength=""
-|       patterncontentunits=""
-|       patterntransform=""
-|       patternunits=""
-|       pointsatx=""
-|       pointsaty=""
-|       pointsatz=""
-|       preservealpha=""
-|       preserveaspectratio=""
-|       primitiveunits=""
-|       refx=""
-|       refy=""
-|       repeatcount=""
-|       repeatdur=""
-|       requiredextensions=""
-|       requiredfeatures=""
-|       specularconstant=""
-|       specularexponent=""
-|       spreadmethod=""
-|       startoffset=""
-|       stddeviation=""
-|       stitchtiles=""
-|       surfacescale=""
-|       systemlanguage=""
-|       tablevalues=""
-|       targetx=""
-|       targety=""
-|       textlength=""
-|       viewbox=""
-|       viewtarget=""
-|       xchannelselector=""
-|       ychannelselector=""
-|       zoomandpan=""
-
-#data
-<!DOCTYPE html><body><svg contentScriptType='' contentStyleType='' externalResourcesRequired='' filterRes=''></svg>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       contentscripttype=""
-|       contentstyletype=""
-|       externalresourcesrequired=""
-|       filterres=""
-
-#data
-<!DOCTYPE html><body><svg CONTENTSCRIPTTYPE='' CONTENTSTYLETYPE='' EXTERNALRESOURCESREQUIRED='' FILTERRES=''></svg>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       contentscripttype=""
-|       contentstyletype=""
-|       externalresourcesrequired=""
-|       filterres=""
-
-#data
-<!DOCTYPE html><body><svg contentscripttype='' contentstyletype='' externalresourcesrequired='' filterres=''></svg>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       contentscripttype=""
-|       contentstyletype=""
-|       externalresourcesrequired=""
-|       filterres=""
-
-#data
-<!DOCTYPE html><body><math contentScriptType='' contentStyleType='' externalResourcesRequired='' filterRes=''></math>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       contentscripttype=""
-|       contentstyletype=""
-|       externalresourcesrequired=""
-|       filterres=""
-
-#data
-<!DOCTYPE html><body><svg><altGlyph /><altGlyphDef /><altGlyphItem /><animateColor /><animateMotion /><animateTransform /><clipPath /><feBlend /><feColorMatrix /><feComponentTransfer /><feComposite /><feConvolveMatrix /><feDiffuseLighting /><feDisplacementMap /><feDistantLight /><feFlood /><feFuncA /><feFuncB /><feFuncG /><feFuncR /><feGaussianBlur /><feImage /><feMerge /><feMergeNode /><feMorphology /><feOffset /><fePointLight /><feSpecularLighting /><feSpotLight /><feTile /><feTurbulence /><foreignObject /><glyphRef /><linearGradient /><radialGradient /><textPath /></svg>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       <svg altGlyph>
-|       <svg altGlyphDef>
-|       <svg altGlyphItem>
-|       <svg animateColor>
-|       <svg animateMotion>
-|       <svg animateTransform>
-|       <svg clipPath>
-|       <svg feBlend>
-|       <svg feColorMatrix>
-|       <svg feComponentTransfer>
-|       <svg feComposite>
-|       <svg feConvolveMatrix>
-|       <svg feDiffuseLighting>
-|       <svg feDisplacementMap>
-|       <svg feDistantLight>
-|       <svg feFlood>
-|       <svg feFuncA>
-|       <svg feFuncB>
-|       <svg feFuncG>
-|       <svg feFuncR>
-|       <svg feGaussianBlur>
-|       <svg feImage>
-|       <svg feMerge>
-|       <svg feMergeNode>
-|       <svg feMorphology>
-|       <svg feOffset>
-|       <svg fePointLight>
-|       <svg feSpecularLighting>
-|       <svg feSpotLight>
-|       <svg feTile>
-|       <svg feTurbulence>
-|       <svg foreignObject>
-|       <svg glyphRef>
-|       <svg linearGradient>
-|       <svg radialGradient>
-|       <svg textPath>
-
-#data
-<!DOCTYPE html><body><svg><altglyph /><altglyphdef /><altglyphitem /><animatecolor /><animatemotion /><animatetransform /><clippath /><feblend /><fecolormatrix /><fecomponenttransfer /><fecomposite /><feconvolvematrix /><fediffuselighting /><fedisplacementmap /><fedistantlight /><feflood /><fefunca /><fefuncb /><fefuncg /><fefuncr /><fegaussianblur /><feimage /><femerge /><femergenode /><femorphology /><feoffset /><fepointlight /><fespecularlighting /><fespotlight /><fetile /><feturbulence /><foreignobject /><glyphref /><lineargradient /><radialgradient /><textpath /></svg>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       <svg altGlyph>
-|       <svg altGlyphDef>
-|       <svg altGlyphItem>
-|       <svg animateColor>
-|       <svg animateMotion>
-|       <svg animateTransform>
-|       <svg clipPath>
-|       <svg feBlend>
-|       <svg feColorMatrix>
-|       <svg feComponentTransfer>
-|       <svg feComposite>
-|       <svg feConvolveMatrix>
-|       <svg feDiffuseLighting>
-|       <svg feDisplacementMap>
-|       <svg feDistantLight>
-|       <svg feFlood>
-|       <svg feFuncA>
-|       <svg feFuncB>
-|       <svg feFuncG>
-|       <svg feFuncR>
-|       <svg feGaussianBlur>
-|       <svg feImage>
-|       <svg feMerge>
-|       <svg feMergeNode>
-|       <svg feMorphology>
-|       <svg feOffset>
-|       <svg fePointLight>
-|       <svg feSpecularLighting>
-|       <svg feSpotLight>
-|       <svg feTile>
-|       <svg feTurbulence>
-|       <svg foreignObject>
-|       <svg glyphRef>
-|       <svg linearGradient>
-|       <svg radialGradient>
-|       <svg textPath>
-
-#data
-<!DOCTYPE html><BODY><SVG><ALTGLYPH /><ALTGLYPHDEF /><ALTGLYPHITEM /><ANIMATECOLOR /><ANIMATEMOTION /><ANIMATETRANSFORM /><CLIPPATH /><FEBLEND /><FECOLORMATRIX /><FECOMPONENTTRANSFER /><FECOMPOSITE /><FECONVOLVEMATRIX /><FEDIFFUSELIGHTING /><FEDISPLACEMENTMAP /><FEDISTANTLIGHT /><FEFLOOD /><FEFUNCA /><FEFUNCB /><FEFUNCG /><FEFUNCR /><FEGAUSSIANBLUR /><FEIMAGE /><FEMERGE /><FEMERGENODE /><FEMORPHOLOGY /><FEOFFSET /><FEPOINTLIGHT /><FESPECULARLIGHTING /><FESPOTLIGHT /><FETILE /><FETURBULENCE /><FOREIGNOBJECT /><GLYPHREF /><LINEARGRADIENT /><RADIALGRADIENT /><TEXTPATH /></SVG>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       <svg altGlyph>
-|       <svg altGlyphDef>
-|       <svg altGlyphItem>
-|       <svg animateColor>
-|       <svg animateMotion>
-|       <svg animateTransform>
-|       <svg clipPath>
-|       <svg feBlend>
-|       <svg feColorMatrix>
-|       <svg feComponentTransfer>
-|       <svg feComposite>
-|       <svg feConvolveMatrix>
-|       <svg feDiffuseLighting>
-|       <svg feDisplacementMap>
-|       <svg feDistantLight>
-|       <svg feFlood>
-|       <svg feFuncA>
-|       <svg feFuncB>
-|       <svg feFuncG>
-|       <svg feFuncR>
-|       <svg feGaussianBlur>
-|       <svg feImage>
-|       <svg feMerge>
-|       <svg feMergeNode>
-|       <svg feMorphology>
-|       <svg feOffset>
-|       <svg fePointLight>
-|       <svg feSpecularLighting>
-|       <svg feSpotLight>
-|       <svg feTile>
-|       <svg feTurbulence>
-|       <svg foreignObject>
-|       <svg glyphRef>
-|       <svg linearGradient>
-|       <svg radialGradient>
-|       <svg textPath>
-
-#data
-<!DOCTYPE html><body><math><altGlyph /><altGlyphDef /><altGlyphItem /><animateColor /><animateMotion /><animateTransform /><clipPath /><feBlend /><feColorMatrix /><feComponentTransfer /><feComposite /><feConvolveMatrix /><feDiffuseLighting /><feDisplacementMap /><feDistantLight /><feFlood /><feFuncA /><feFuncB /><feFuncG /><feFuncR /><feGaussianBlur /><feImage /><feMerge /><feMergeNode /><feMorphology /><feOffset /><fePointLight /><feSpecularLighting /><feSpotLight /><feTile /><feTurbulence /><foreignObject /><glyphRef /><linearGradient /><radialGradient /><textPath /></math>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       <math altglyph>
-|       <math altglyphdef>
-|       <math altglyphitem>
-|       <math animatecolor>
-|       <math animatemotion>
-|       <math animatetransform>
-|       <math clippath>
-|       <math feblend>
-|       <math fecolormatrix>
-|       <math fecomponenttransfer>
-|       <math fecomposite>
-|       <math feconvolvematrix>
-|       <math fediffuselighting>
-|       <math fedisplacementmap>
-|       <math fedistantlight>
-|       <math feflood>
-|       <math fefunca>
-|       <math fefuncb>
-|       <math fefuncg>
-|       <math fefuncr>
-|       <math fegaussianblur>
-|       <math feimage>
-|       <math femerge>
-|       <math femergenode>
-|       <math femorphology>
-|       <math feoffset>
-|       <math fepointlight>
-|       <math fespecularlighting>
-|       <math fespotlight>
-|       <math fetile>
-|       <math feturbulence>
-|       <math foreignobject>
-|       <math glyphref>
-|       <math lineargradient>
-|       <math radialgradient>
-|       <math textpath>
-
-#data
-<!DOCTYPE html><body><svg><solidColor /></svg>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       <svg solidcolor>
diff --git a/html/testdata/webkit/tests12.dat b/html/testdata/webkit/tests12.dat
deleted file mode 100644 (file)
index 63107d2..0000000
+++ /dev/null
@@ -1,62 +0,0 @@
-#data
-<!DOCTYPE html><body><p>foo<math><mtext><i>baz</i></mtext><annotation-xml><svg><desc><b>eggs</b></desc><g><foreignObject><P>spam<TABLE><tr><td><img></td></table></foreignObject></g><g>quux</g></svg></annotation-xml></math>bar
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       "foo"
-|       <math math>
-|         <math mtext>
-|           <i>
-|             "baz"
-|         <math annotation-xml>
-|           <svg svg>
-|             <svg desc>
-|               <b>
-|                 "eggs"
-|             <svg g>
-|               <svg foreignObject>
-|                 <p>
-|                   "spam"
-|                 <table>
-|                   <tbody>
-|                     <tr>
-|                       <td>
-|                         <img>
-|             <svg g>
-|               "quux"
-|       "bar"
-
-#data
-<!DOCTYPE html><body>foo<math><mtext><i>baz</i></mtext><annotation-xml><svg><desc><b>eggs</b></desc><g><foreignObject><P>spam<TABLE><tr><td><img></td></table></foreignObject></g><g>quux</g></svg></annotation-xml></math>bar
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     "foo"
-|     <math math>
-|       <math mtext>
-|         <i>
-|           "baz"
-|       <math annotation-xml>
-|         <svg svg>
-|           <svg desc>
-|             <b>
-|               "eggs"
-|           <svg g>
-|             <svg foreignObject>
-|               <p>
-|                 "spam"
-|               <table>
-|                 <tbody>
-|                   <tr>
-|                     <td>
-|                       <img>
-|           <svg g>
-|             "quux"
-|     "bar"
diff --git a/html/testdata/webkit/tests14.dat b/html/testdata/webkit/tests14.dat
deleted file mode 100644 (file)
index a08b764..0000000
+++ /dev/null
@@ -1,75 +0,0 @@
-#data
-<!DOCTYPE html><html><body><xyz:abc></xyz:abc>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <xyz:abc>
-
-#data
-<!DOCTYPE html><html><body><xyz:abc></xyz:abc><span></span>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <xyz:abc>
-|     <span>
-
-#data
-<!DOCTYPE html><html><html abc:def=gh><xyz:abc></xyz:abc>
-#errors
-(1,38): non-html-root
-#document
-| <!DOCTYPE html>
-| <html>
-|   abc:def="gh"
-|   <head>
-|   <body>
-|     <xyz:abc>
-
-#data
-<!DOCTYPE html><html xml:lang=bar><html xml:lang=foo>
-#errors
-(1,53): non-html-root
-#document
-| <!DOCTYPE html>
-| <html>
-|   xml:lang="bar"
-|   <head>
-|   <body>
-
-#data
-<!DOCTYPE html><html 123=456>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   123="456"
-|   <head>
-|   <body>
-
-#data
-<!DOCTYPE html><html 123=456><html 789=012>
-#errors
-(1,43): non-html-root
-#document
-| <!DOCTYPE html>
-| <html>
-|   123="456"
-|   789="012"
-|   <head>
-|   <body>
-
-#data
-<!DOCTYPE html><html><body 789=012>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     789="012"
diff --git a/html/testdata/webkit/tests15.dat b/html/testdata/webkit/tests15.dat
deleted file mode 100644 (file)
index 93d06a8..0000000
+++ /dev/null
@@ -1,216 +0,0 @@
-#data
-<!DOCTYPE html><p><b><i><u></p> <p>X
-#errors
-(1,31): unexpected-end-tag
-(1,36): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <b>
-|         <i>
-|           <u>
-|     <b>
-|       <i>
-|         <u>
-|           " "
-|           <p>
-|             "X"
-
-#data
-<p><b><i><u></p>
-<p>X
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,16): unexpected-end-tag
-(2,4): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <b>
-|         <i>
-|           <u>
-|     <b>
-|       <i>
-|         <u>
-|           "
-"
-|           <p>
-|             "X"
-
-#data
-<!doctype html></html> <head>
-#errors
-(1,29): expected-eof-but-got-start-tag
-(1,29): unexpected-start-tag-ignored
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     " "
-
-#data
-<!doctype html></body><meta>
-#errors
-(1,28): unexpected-start-tag-after-body
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <meta>
-
-#data
-<html></html><!-- foo -->
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-| <!--  foo  -->
-
-#data
-<!doctype html></body><title>X</title>
-#errors
-(1,29): unexpected-start-tag-after-body
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <title>
-|       "X"
-
-#data
-<!doctype html><table> X<meta></table>
-#errors
-(1,23): foster-parenting-character
-(1,24): foster-parenting-character
-(1,30): foster-parenting-start-character
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     " X"
-|     <meta>
-|     <table>
-
-#data
-<!doctype html><table> x</table>
-#errors
-(1,23): foster-parenting-character
-(1,24): foster-parenting-character
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     " x"
-|     <table>
-
-#data
-<!doctype html><table> x </table>
-#errors
-(1,23): foster-parenting-character
-(1,24): foster-parenting-character
-(1,25): foster-parenting-character
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     " x "
-|     <table>
-
-#data
-<!doctype html><table><tr> x</table>
-#errors
-(1,27): foster-parenting-character
-(1,28): foster-parenting-character
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     " x"
-|     <table>
-|       <tbody>
-|         <tr>
-
-#data
-<!doctype html><table>X<style> <tr>x </style> </table>
-#errors
-(1,23): foster-parenting-character
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     "X"
-|     <table>
-|       <style>
-|         " <tr>x "
-|       " "
-
-#data
-<!doctype html><div><table><a>foo</a> <tr><td>bar</td> </tr></table></div>
-#errors
-(1,30): foster-parenting-start-tag
-(1,31): foster-parenting-character
-(1,32): foster-parenting-character
-(1,33): foster-parenting-character
-(1,37): foster-parenting-end-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       <a>
-|         "foo"
-|       <table>
-|         " "
-|         <tbody>
-|           <tr>
-|             <td>
-|               "bar"
-|             " "
-
-#data
-<frame></frame></frame><frameset><frame><frameset><frame></frameset><noframes></frameset><noframes>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,7): unexpected-start-tag-ignored
-(1,15): unexpected-end-tag
-(1,23): unexpected-end-tag
-(1,33): unexpected-start-tag
-(1,99): expected-named-closing-tag-but-got-eof
-(1,99): eof-in-frameset
-#document
-| <html>
-|   <head>
-|   <frameset>
-|     <frame>
-|     <frameset>
-|       <frame>
-|     <noframes>
-|       "</frameset><noframes>"
-
-#data
-<!DOCTYPE html><object></html>
-#errors
-(1,30): expected-body-in-scope
-(1,30): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <object>
diff --git a/html/testdata/webkit/tests16.dat b/html/testdata/webkit/tests16.dat
deleted file mode 100644 (file)
index cea7340..0000000
+++ /dev/null
@@ -1,2604 +0,0 @@
-#data
-<!doctype html><script>
-#errors
-(1,23): expected-named-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|   <body>
-
-#data
-<!doctype html><script>a
-#errors
-(1,24): expected-named-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "a"
-|   <body>
-
-#data
-<!doctype html><script><
-#errors
-(1,24): expected-named-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<"
-|   <body>
-
-#data
-<!doctype html><script></
-#errors
-(1,25): expected-named-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "</"
-|   <body>
-
-#data
-<!doctype html><script></S
-#errors
-(1,26): expected-named-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "</S"
-|   <body>
-
-#data
-<!doctype html><script></SC
-#errors
-(1,27): expected-named-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "</SC"
-|   <body>
-
-#data
-<!doctype html><script></SCR
-#errors
-(1,28): expected-named-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "</SCR"
-|   <body>
-
-#data
-<!doctype html><script></SCRI
-#errors
-(1,29): expected-named-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "</SCRI"
-|   <body>
-
-#data
-<!doctype html><script></SCRIP
-#errors
-(1,30): expected-named-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "</SCRIP"
-|   <body>
-
-#data
-<!doctype html><script></SCRIPT
-#errors
-(1,31): expected-named-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "</SCRIPT"
-|   <body>
-
-#data
-<!doctype html><script></SCRIPT 
-#errors
-(1,32): expected-attribute-name-but-got-eof
-(1,32): expected-named-closing-tag-but-got-eof
-#new-errors
-(1:33) eof-in-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|   <body>
-
-#data
-<!doctype html><script></s
-#errors
-(1,26): expected-named-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "</s"
-|   <body>
-
-#data
-<!doctype html><script></sc
-#errors
-(1,27): expected-named-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "</sc"
-|   <body>
-
-#data
-<!doctype html><script></scr
-#errors
-(1,28): expected-named-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "</scr"
-|   <body>
-
-#data
-<!doctype html><script></scri
-#errors
-(1,29): expected-named-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "</scri"
-|   <body>
-
-#data
-<!doctype html><script></scrip
-#errors
-(1,30): expected-named-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "</scrip"
-|   <body>
-
-#data
-<!doctype html><script></script
-#errors
-(1,31): expected-named-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "</script"
-|   <body>
-
-#data
-<!doctype html><script></script 
-#errors
-(1,32): expected-attribute-name-but-got-eof
-(1,32): expected-named-closing-tag-but-got-eof
-#new-errors
-(1:33) eof-in-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|   <body>
-
-#data
-<!doctype html><script><!
-#errors
-(1,25): expected-script-data-but-got-eof
-(1,25): expected-named-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!"
-|   <body>
-
-#data
-<!doctype html><script><!a
-#errors
-(1,26): expected-named-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!a"
-|   <body>
-
-#data
-<!doctype html><script><!-
-#errors
-(1,26): expected-named-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!-"
-|   <body>
-
-#data
-<!doctype html><script><!-a
-#errors
-(1,27): expected-named-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!-a"
-|   <body>
-
-#data
-<!doctype html><script><!--
-#errors
-(1,27): expected-named-closing-tag-but-got-eof
-(1,27): unexpected-eof-in-text-mode
-#new-errors
-(1:28) eof-in-script-html-comment-like-text
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--"
-|   <body>
-
-#data
-<!doctype html><script><!--a
-#errors
-(1,28): expected-named-closing-tag-but-got-eof
-(1,28): unexpected-eof-in-text-mode
-#new-errors
-(1:29) eof-in-script-html-comment-like-text
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--a"
-|   <body>
-
-#data
-<!doctype html><script><!--<
-#errors
-(1,28): expected-named-closing-tag-but-got-eof
-(1,28): unexpected-eof-in-text-mode
-#new-errors
-(1:29) eof-in-script-html-comment-like-text
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--<"
-|   <body>
-
-#data
-<!doctype html><script><!--<a
-#errors
-(1,29): expected-named-closing-tag-but-got-eof
-(1,29): unexpected-eof-in-text-mode
-#new-errors
-(1:30) eof-in-script-html-comment-like-text
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--<a"
-|   <body>
-
-#data
-<!doctype html><script><!--</
-#errors
-(1,29): expected-named-closing-tag-but-got-eof
-(1,29): unexpected-eof-in-text-mode
-#new-errors
-(1:30) eof-in-script-html-comment-like-text
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--</"
-|   <body>
-
-#data
-<!doctype html><script><!--</script
-#errors
-(1,35): expected-named-closing-tag-but-got-eof
-(1,35): unexpected-eof-in-text-mode
-#new-errors
-(1:36) eof-in-script-html-comment-like-text
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--</script"
-|   <body>
-
-#data
-<!doctype html><script><!--</script 
-#errors
-(1,36): expected-attribute-name-but-got-eof
-(1,36): expected-named-closing-tag-but-got-eof
-#new-errors
-(1:37) eof-in-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--"
-|   <body>
-
-#data
-<!doctype html><script><!--<s
-#errors
-(1,29): expected-named-closing-tag-but-got-eof
-(1,29): unexpected-eof-in-text-mode
-#new-errors
-(1:30) eof-in-script-html-comment-like-text
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--<s"
-|   <body>
-
-#data
-<!doctype html><script><!--<script
-#errors
-(1,34): expected-named-closing-tag-but-got-eof
-(1,34): unexpected-eof-in-text-mode
-#new-errors
-(1:35) eof-in-script-html-comment-like-text
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script"
-|   <body>
-
-#data
-<!doctype html><script><!--<script 
-#errors
-(1,35): eof-in-script-in-script
-(1,35): expected-named-closing-tag-but-got-eof
-#new-errors
-(1:36) eof-in-script-html-comment-like-text
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script "
-|   <body>
-
-#data
-<!doctype html><script><!--<script <
-#errors
-(1,36): eof-in-script-in-script
-(1,36): expected-named-closing-tag-but-got-eof
-#new-errors
-(1:37) eof-in-script-html-comment-like-text
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script <"
-|   <body>
-
-#data
-<!doctype html><script><!--<script <a
-#errors
-(1,37): eof-in-script-in-script
-(1,37): expected-named-closing-tag-but-got-eof
-#new-errors
-(1:38) eof-in-script-html-comment-like-text
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script <a"
-|   <body>
-
-#data
-<!doctype html><script><!--<script </
-#errors
-(1,37): eof-in-script-in-script
-(1,37): expected-named-closing-tag-but-got-eof
-#new-errors
-(1:38) eof-in-script-html-comment-like-text
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script </"
-|   <body>
-
-#data
-<!doctype html><script><!--<script </s
-#errors
-(1,38): eof-in-script-in-script
-(1,38): expected-named-closing-tag-but-got-eof
-#new-errors
-(1:39) eof-in-script-html-comment-like-text
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script </s"
-|   <body>
-
-#data
-<!doctype html><script><!--<script </script
-#errors
-(1,43): eof-in-script-in-script
-(1,43): expected-named-closing-tag-but-got-eof
-#new-errors
-(1:44) eof-in-script-html-comment-like-text
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script </script"
-|   <body>
-
-#data
-<!doctype html><script><!--<script </scripta
-#errors
-(1,44): eof-in-script-in-script
-(1,44): expected-named-closing-tag-but-got-eof
-#new-errors
-(1:45) eof-in-script-html-comment-like-text
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script </scripta"
-|   <body>
-
-#data
-<!doctype html><script><!--<script </script 
-#errors
-(1,44): expected-named-closing-tag-but-got-eof
-(1,44): unexpected-eof-in-text-mode
-#new-errors
-(1:45) eof-in-script-html-comment-like-text
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script </script "
-|   <body>
-
-#data
-<!doctype html><script><!--<script </script>
-#errors
-(1,44): expected-named-closing-tag-but-got-eof
-(1,44): unexpected-eof-in-text-mode
-#new-errors
-(1:45) eof-in-script-html-comment-like-text
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script </script>"
-|   <body>
-
-#data
-<!doctype html><script><!--<script </script/
-#errors
-(1,44): expected-named-closing-tag-but-got-eof
-(1,44): unexpected-eof-in-text-mode
-#new-errors
-(1:45) eof-in-script-html-comment-like-text
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script </script/"
-|   <body>
-
-#data
-<!doctype html><script><!--<script </script <
-#errors
-(1,45): expected-named-closing-tag-but-got-eof
-(1,45): unexpected-eof-in-text-mode
-#new-errors
-(1:46) eof-in-script-html-comment-like-text
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script </script <"
-|   <body>
-
-#data
-<!doctype html><script><!--<script </script <a
-#errors
-(1,46): expected-named-closing-tag-but-got-eof
-(1,46): unexpected-eof-in-text-mode
-#new-errors
-(1:47) eof-in-script-html-comment-like-text
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script </script <a"
-|   <body>
-
-#data
-<!doctype html><script><!--<script </script </
-#errors
-(1,46): expected-named-closing-tag-but-got-eof
-(1,46): unexpected-eof-in-text-mode
-#new-errors
-(1:47) eof-in-script-html-comment-like-text
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script </script </"
-|   <body>
-
-#data
-<!doctype html><script><!--<script </script </script
-#errors
-(1,52): expected-named-closing-tag-but-got-eof
-(1,52): unexpected-eof-in-text-mode
-#new-errors
-(1:53) eof-in-script-html-comment-like-text
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script </script </script"
-|   <body>
-
-#data
-<!doctype html><script><!--<script </script </script 
-#errors
-(1,53): expected-attribute-name-but-got-eof
-(1,53): expected-named-closing-tag-but-got-eof
-#new-errors
-(1:54) eof-in-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script </script "
-|   <body>
-
-#data
-<!doctype html><script><!--<script </script </script/
-#errors
-(1,53): unexpected-EOF-after-solidus-in-tag
-(1,53): expected-named-closing-tag-but-got-eof
-#new-errors
-(1:54) eof-in-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script </script "
-|   <body>
-
-#data
-<!doctype html><script><!--<script </script </script>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script </script "
-|   <body>
-
-#data
-<!doctype html><script><!--<script -
-#errors
-(1,36): eof-in-script-in-script
-(1,36): expected-named-closing-tag-but-got-eof
-#new-errors
-(1:37) eof-in-script-html-comment-like-text
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script -"
-|   <body>
-
-#data
-<!doctype html><script><!--<script -a
-#errors
-(1,37): eof-in-script-in-script
-(1,37): expected-named-closing-tag-but-got-eof
-#new-errors
-(1:38) eof-in-script-html-comment-like-text
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script -a"
-|   <body>
-
-#data
-<!doctype html><script><!--<script -<
-#errors
-(1,37): eof-in-script-in-script
-(1,37): expected-named-closing-tag-but-got-eof
-#new-errors
-(1:38) eof-in-script-html-comment-like-text
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script -<"
-|   <body>
-
-#data
-<!doctype html><script><!--<script --
-#errors
-(1,37): eof-in-script-in-script
-(1,37): expected-named-closing-tag-but-got-eof
-#new-errors
-(1:38) eof-in-script-html-comment-like-text
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script --"
-|   <body>
-
-#data
-<!doctype html><script><!--<script --a
-#errors
-(1,38): eof-in-script-in-script
-(1,38): expected-named-closing-tag-but-got-eof
-#new-errors
-(1:39) eof-in-script-html-comment-like-text
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script --a"
-|   <body>
-
-#data
-<!doctype html><script><!--<script --<
-#errors
-(1,38): eof-in-script-in-script
-(1,38): expected-named-closing-tag-but-got-eof
-#new-errors
-(1:39) eof-in-script-html-comment-like-text
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script --<"
-|   <body>
-
-#data
-<!doctype html><script><!--<script -->
-#errors
-(1,38): expected-named-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script -->"
-|   <body>
-
-#data
-<!doctype html><script><!--<script --><
-#errors
-(1,39): expected-named-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script --><"
-|   <body>
-
-#data
-<!doctype html><script><!--<script --></
-#errors
-(1,40): expected-named-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script --></"
-|   <body>
-
-#data
-<!doctype html><script><!--<script --></script
-#errors
-(1,46): expected-named-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script --></script"
-|   <body>
-
-#data
-<!doctype html><script><!--<script --></script 
-#errors
-(1,47): expected-attribute-name-but-got-eof
-(1,47): expected-named-closing-tag-but-got-eof
-#new-errors
-(1:48) eof-in-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script -->"
-|   <body>
-
-#data
-<!doctype html><script><!--<script --></script/
-#errors
-(1,47): unexpected-EOF-after-solidus-in-tag
-(1,47): expected-named-closing-tag-but-got-eof
-#new-errors
-(1:48) eof-in-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script -->"
-|   <body>
-
-#data
-<!doctype html><script><!--<script --></script>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script -->"
-|   <body>
-
-#data
-<!doctype html><script><!--<script><\/script>--></script>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script><\/script>-->"
-|   <body>
-
-#data
-<!doctype html><script><!--<script></scr'+'ipt>--></script>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script></scr'+'ipt>-->"
-|   <body>
-
-#data
-<!doctype html><script><!--<script></script><script></script></script>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script></script><script></script>"
-|   <body>
-
-#data
-<!doctype html><script><!--<script></script><script></script>--><!--</script>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script></script><script></script>--><!--"
-|   <body>
-
-#data
-<!doctype html><script><!--<script></script><script></script>-- ></script>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script></script><script></script>-- >"
-|   <body>
-
-#data
-<!doctype html><script><!--<script></script><script></script>- -></script>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script></script><script></script>- ->"
-|   <body>
-
-#data
-<!doctype html><script><!--<script></script><script></script>- - ></script>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script></script><script></script>- - >"
-|   <body>
-
-#data
-<!doctype html><script><!--<script></script><script></script>-></script>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script></script><script></script>->"
-|   <body>
-
-#data
-<!doctype html><script><!--<script>--!></script>X
-#errors
-(1,49): expected-named-closing-tag-but-got-eof
-(1,49): unexpected-EOF-in-text-mode
-#new-errors
-(1:50) eof-in-script-html-comment-like-text
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script>--!></script>X"
-|   <body>
-
-#data
-<!doctype html><script><!--<scr'+'ipt></script>--></script>
-#errors
-(1,59): unexpected-end-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--<scr'+'ipt>"
-|   <body>
-|     "-->"
-
-#data
-<!doctype html><script><!--<script></scr'+'ipt></script>X
-#errors
-(1,57): expected-named-closing-tag-but-got-eof
-(1,57): unexpected-eof-in-text-mode
-#new-errors
-(1:58) eof-in-script-html-comment-like-text
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script></scr'+'ipt></script>X"
-|   <body>
-
-#data
-<!doctype html><style><!--<style></style>--></style>
-#errors
-(1,52): unexpected-end-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <style>
-|       "<!--<style>"
-|   <body>
-|     "-->"
-
-#data
-<!doctype html><style><!--</style>X
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <style>
-|       "<!--"
-|   <body>
-|     "X"
-
-#data
-<!doctype html><style><!--...</style>...--></style>
-#errors
-(1,51): unexpected-end-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <style>
-|       "<!--..."
-|   <body>
-|     "...-->"
-
-#data
-<!doctype html><style><!--<br><html xmlns:v="urn:schemas-microsoft-com:vml"><!--[if !mso]><style></style>X
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <style>
-|       "<!--<br><html xmlns:v="urn:schemas-microsoft-com:vml"><!--[if !mso]><style>"
-|   <body>
-|     "X"
-
-#data
-<!doctype html><style><!--...<style><!--...--!></style>--></style>
-#errors
-(1,66): unexpected-end-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <style>
-|       "<!--...<style><!--...--!>"
-|   <body>
-|     "-->"
-
-#data
-<!doctype html><style><!--...</style><!-- --><style>@import ...</style>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <style>
-|       "<!--..."
-|     <!--   -->
-|     <style>
-|       "@import ..."
-|   <body>
-
-#data
-<!doctype html><style>...<style><!--...</style><!-- --></style>
-#errors
-(1,63): unexpected-end-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <style>
-|       "...<style><!--..."
-|     <!--   -->
-|   <body>
-
-#data
-<!doctype html><style>...<!--[if IE]><style>...</style>X
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <style>
-|       "...<!--[if IE]><style>..."
-|   <body>
-|     "X"
-
-#data
-<!doctype html><title><!--<title></title>--></title>
-#errors
-(1,52): unexpected-end-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <title>
-|       "<!--<title>"
-|   <body>
-|     "-->"
-
-#data
-<!doctype html><title>&lt;/title></title>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <title>
-|       "</title>"
-|   <body>
-
-#data
-<!doctype html><title>foo/title><link></head><body>X
-#errors
-(1,52): expected-named-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <title>
-|       "foo/title><link></head><body>X"
-|   <body>
-
-#data
-<!doctype html><noscript><!--<noscript></noscript>--></noscript>
-#errors
-(1,64): unexpected-end-tag
-#script-on
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <noscript>
-|       "<!--<noscript>"
-|   <body>
-|     "-->"
-
-#data
-<!doctype html><noscript><!--<noscript></noscript>--></noscript>
-#errors
-#script-off
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <noscript>
-|       <!-- <noscript></noscript> -->
-|   <body>
-
-#data
-<!doctype html><noscript><!--</noscript>X<noscript>--></noscript>
-#errors
-#script-on
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <noscript>
-|       "<!--"
-|   <body>
-|     "X"
-|     <noscript>
-|       "-->"
-
-#data
-<!doctype html><noscript><!--</noscript>X<noscript>--></noscript>
-#errors
-#script-off
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <noscript>
-|       <!-- </noscript>X<noscript> -->
-|   <body>
-
-#data
-<!doctype html><noscript><iframe></noscript>X
-#errors
-#script-on
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <noscript>
-|       "<iframe>"
-|   <body>
-|     "X"
-
-#data
-<!doctype html><noscript><iframe></noscript>X
-#errors
- * (1,34) unexpected token in head noscript
- * (1,46) unexpected EOF
-#script-off
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <noscript>
-|   <body>
-|     <iframe>
-|       "</noscript>X"
-
-#data
-<!doctype html><noframes><!--<noframes></noframes>--></noframes>
-#errors
-(1,64): unexpected-end-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <noframes>
-|       "<!--<noframes>"
-|   <body>
-|     "-->"
-
-#data
-<!doctype html><noframes><body><script><!--...</script></body></noframes></html>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <noframes>
-|       "<body><script><!--...</script></body>"
-|   <body>
-
-#data
-<!doctype html><textarea><!--<textarea></textarea>--></textarea>
-#errors
-(1,64): unexpected-end-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <textarea>
-|       "<!--<textarea>"
-|     "-->"
-
-#data
-<!doctype html><textarea>&lt;/textarea></textarea>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <textarea>
-|       "</textarea>"
-
-#data
-<!doctype html><textarea>&lt;</textarea>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <textarea>
-|       "<"
-
-#data
-<!doctype html><textarea>a&lt;b</textarea>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <textarea>
-|       "a<b"
-
-#data
-<!doctype html><iframe><!--<iframe></iframe>--></iframe>
-#errors
-(1,56): unexpected-end-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <iframe>
-|       "<!--<iframe>"
-|     "-->"
-
-#data
-<!doctype html><iframe>...<!--X->...<!--/X->...</iframe>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <iframe>
-|       "...<!--X->...<!--/X->..."
-
-#data
-<!doctype html><xmp><!--<xmp></xmp>--></xmp>
-#errors
-(1,44): unexpected-end-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <xmp>
-|       "<!--<xmp>"
-|     "-->"
-
-#data
-<!doctype html><noembed><!--<noembed></noembed>--></noembed>
-#errors
-(1,60): unexpected-end-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <noembed>
-|       "<!--<noembed>"
-|     "-->"
-
-#data
-<script>
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,8): expected-named-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|     <script>
-|   <body>
-
-#data
-<script>a
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,9): expected-named-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|     <script>
-|       "a"
-|   <body>
-
-#data
-<script><
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,9): expected-named-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<"
-|   <body>
-
-#data
-<script></
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,10): expected-named-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|     <script>
-|       "</"
-|   <body>
-
-#data
-<script></S
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,11): expected-named-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|     <script>
-|       "</S"
-|   <body>
-
-#data
-<script></SC
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,12): expected-named-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|     <script>
-|       "</SC"
-|   <body>
-
-#data
-<script></SCR
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,13): expected-named-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|     <script>
-|       "</SCR"
-|   <body>
-
-#data
-<script></SCRI
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,14): expected-named-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|     <script>
-|       "</SCRI"
-|   <body>
-
-#data
-<script></SCRIP
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,15): expected-named-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|     <script>
-|       "</SCRIP"
-|   <body>
-
-#data
-<script></SCRIPT
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,16): expected-named-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|     <script>
-|       "</SCRIPT"
-|   <body>
-
-#data
-<script></SCRIPT 
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,17): expected-attribute-name-but-got-eof
-(1,17): expected-named-closing-tag-but-got-eof
-#new-errors
-(1:18) eof-in-tag
-#document
-| <html>
-|   <head>
-|     <script>
-|   <body>
-
-#data
-<script></s
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,11): expected-named-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|     <script>
-|       "</s"
-|   <body>
-
-#data
-<script></sc
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,12): expected-named-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|     <script>
-|       "</sc"
-|   <body>
-
-#data
-<script></scr
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,13): expected-named-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|     <script>
-|       "</scr"
-|   <body>
-
-#data
-<script></scri
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,14): expected-named-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|     <script>
-|       "</scri"
-|   <body>
-
-#data
-<script></scrip
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,15): expected-named-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|     <script>
-|       "</scrip"
-|   <body>
-
-#data
-<script></script
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,16): expected-named-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|     <script>
-|       "</script"
-|   <body>
-
-#data
-<script></script 
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,17): expected-attribute-name-but-got-eof
-(1,17): expected-named-closing-tag-but-got-eof
-#new-errors
-(1:18) eof-in-tag
-#document
-| <html>
-|   <head>
-|     <script>
-|   <body>
-
-#data
-<script><!
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,10): expected-script-data-but-got-eof
-(1,10): expected-named-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!"
-|   <body>
-
-#data
-<script><!a
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,11): expected-named-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!a"
-|   <body>
-
-#data
-<script><!-
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,11): expected-named-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!-"
-|   <body>
-
-#data
-<script><!-a
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,12): expected-named-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!-a"
-|   <body>
-
-#data
-<script><!--
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,12): expected-named-closing-tag-but-got-eof
-(1,12): unexpected-eof-in-text-mode
-#new-errors
-(1:13) eof-in-script-html-comment-like-text
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--"
-|   <body>
-
-#data
-<script><!--a
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,13): expected-named-closing-tag-but-got-eof
-(1,13): unexpected-eof-in-text-mode
-#new-errors
-(1:14) eof-in-script-html-comment-like-text
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--a"
-|   <body>
-
-#data
-<script><!--<
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,13): expected-named-closing-tag-but-got-eof
-(1,13): unexpected-eof-in-text-mode
-#new-errors
-(1:14) eof-in-script-html-comment-like-text
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--<"
-|   <body>
-
-#data
-<script><!--<a
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,14): expected-named-closing-tag-but-got-eof
-(1,14): unexpected-eof-in-text-mode
-#new-errors
-(1:15) eof-in-script-html-comment-like-text
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--<a"
-|   <body>
-
-#data
-<script><!--</
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,14): expected-named-closing-tag-but-got-eof
-(1,14): unexpected-eof-in-text-mode
-#new-errors
-(1:15) eof-in-script-html-comment-like-text
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--</"
-|   <body>
-
-#data
-<script><!--</script
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,20): expected-named-closing-tag-but-got-eof
-(1,20): unexpected-eof-in-text-mode
-#new-errors
-(1:21) eof-in-script-html-comment-like-text
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--</script"
-|   <body>
-
-#data
-<script><!--</script 
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,21): expected-attribute-name-but-got-eof
-(1,21): expected-named-closing-tag-but-got-eof
-#new-errors
-(1:22) eof-in-tag
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--"
-|   <body>
-
-#data
-<script><!--<s
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,14): expected-named-closing-tag-but-got-eof
-(1,14): unexpected-eof-in-text-mode
-#new-errors
-(1:15) eof-in-script-html-comment-like-text
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--<s"
-|   <body>
-
-#data
-<script><!--<script
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,19): expected-named-closing-tag-but-got-eof
-(1,19): unexpected-eof-in-text-mode
-#new-errors
-(1:20) eof-in-script-html-comment-like-text
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script"
-|   <body>
-
-#data
-<script><!--<script 
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,20): eof-in-script-in-script
-(1,20): expected-named-closing-tag-but-got-eof
-#new-errors
-(1:21) eof-in-script-html-comment-like-text
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script "
-|   <body>
-
-#data
-<script><!--<script <
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,21): eof-in-script-in-script
-(1,21): expected-named-closing-tag-but-got-eof
-#new-errors
-(1:22) eof-in-script-html-comment-like-text
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script <"
-|   <body>
-
-#data
-<script><!--<script <a
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,22): eof-in-script-in-script
-(1,22): expected-named-closing-tag-but-got-eof
-#new-errors
-(1:23) eof-in-script-html-comment-like-text
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script <a"
-|   <body>
-
-#data
-<script><!--<script </
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,22): eof-in-script-in-script
-(1,22): expected-named-closing-tag-but-got-eof
-#new-errors
-(1:23) eof-in-script-html-comment-like-text
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script </"
-|   <body>
-
-#data
-<script><!--<script </s
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,23): eof-in-script-in-script
-(1,23): expected-named-closing-tag-but-got-eof
-#new-errors
-(1:24) eof-in-script-html-comment-like-text
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script </s"
-|   <body>
-
-#data
-<script><!--<script </script
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,28): eof-in-script-in-script
-(1,28): expected-named-closing-tag-but-got-eof
-#new-errors
-(1:29) eof-in-script-html-comment-like-text
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script </script"
-|   <body>
-
-#data
-<script><!--<script </scripta
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,29): eof-in-script-in-script
-(1,29): expected-named-closing-tag-but-got-eof
-#new-errors
-(1:30) eof-in-script-html-comment-like-text
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script </scripta"
-|   <body>
-
-#data
-<script><!--<script </script 
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,29): expected-named-closing-tag-but-got-eof
-(1,29): unexpected-eof-in-text-mode
-#new-errors
-(1:30) eof-in-script-html-comment-like-text
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script </script "
-|   <body>
-
-#data
-<script><!--<script </script>
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,29): expected-named-closing-tag-but-got-eof
-(1,29): unexpected-eof-in-text-mode
-#new-errors
-(1:30) eof-in-script-html-comment-like-text
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script </script>"
-|   <body>
-
-#data
-<script><!--<script </script/
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,29): expected-named-closing-tag-but-got-eof
-(1,29): unexpected-eof-in-text-mode
-#new-errors
-(1:30) eof-in-script-html-comment-like-text
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script </script/"
-|   <body>
-
-#data
-<script><!--<script </script <
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,30): expected-named-closing-tag-but-got-eof
-(1,30): unexpected-eof-in-text-mode
-#new-errors
-(1:31) eof-in-script-html-comment-like-text
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script </script <"
-|   <body>
-
-#data
-<script><!--<script </script <a
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,31): expected-named-closing-tag-but-got-eof
-(1,31): unexpected-eof-in-text-mode
-#new-errors
-(1:32) eof-in-script-html-comment-like-text
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script </script <a"
-|   <body>
-
-#data
-<script><!--<script </script </
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,31): expected-named-closing-tag-but-got-eof
-(1,31): unexpected-eof-in-text-mode
-#new-errors
-(1:32) eof-in-script-html-comment-like-text
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script </script </"
-|   <body>
-
-#data
-<script><!--<script </script </script
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,37): expected-named-closing-tag-but-got-eof
-(1,37): unexpected-eof-in-text-mode
-#new-errors
-(1:38) eof-in-script-html-comment-like-text
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script </script </script"
-|   <body>
-
-#data
-<script><!--<script </script </script 
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,38): expected-attribute-name-but-got-eof
-(1,38): expected-named-closing-tag-but-got-eof
-#new-errors
-(1:39) eof-in-tag
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script </script "
-|   <body>
-
-#data
-<script><!--<script </script </script/
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,38): unexpected-EOF-after-solidus-in-tag
-(1,38): expected-named-closing-tag-but-got-eof
-#new-errors
-(1:39) eof-in-tag
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script </script "
-|   <body>
-
-#data
-<script><!--<script </script </script>
-#errors
-(1,8): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script </script "
-|   <body>
-
-#data
-<script><!--<script -
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,21): eof-in-script-in-script
-(1,21): expected-named-closing-tag-but-got-eof
-#new-errors
-(1:22) eof-in-script-html-comment-like-text
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script -"
-|   <body>
-
-#data
-<script><!--<script -a
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,22): eof-in-script-in-script
-(1,22): expected-named-closing-tag-but-got-eof
-#new-errors
-(1:23) eof-in-script-html-comment-like-text
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script -a"
-|   <body>
-
-#data
-<script><!--<script --
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,22): eof-in-script-in-script
-(1,22): expected-named-closing-tag-but-got-eof
-#new-errors
-(1:23) eof-in-script-html-comment-like-text
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script --"
-|   <body>
-
-#data
-<script><!--<script --a
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,23): eof-in-script-in-script
-(1,23): expected-named-closing-tag-but-got-eof
-#new-errors
-(1:24) eof-in-script-html-comment-like-text
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script --a"
-|   <body>
-
-#data
-<script><!--<script -->
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,23): expected-named-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script -->"
-|   <body>
-
-#data
-<script><!--<script --><
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,24): expected-named-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script --><"
-|   <body>
-
-#data
-<script><!--<script --></
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,25): expected-named-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script --></"
-|   <body>
-
-#data
-<script><!--<script --></script
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,31): expected-named-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script --></script"
-|   <body>
-
-#data
-<script><!--<script --></script 
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,32): expected-attribute-name-but-got-eof
-(1,32): expected-named-closing-tag-but-got-eof
-#new-errors
-(1:33) eof-in-tag
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script -->"
-|   <body>
-
-#data
-<script><!--<script --></script/
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,32): unexpected-EOF-after-solidus-in-tag
-(1,32): expected-named-closing-tag-but-got-eof
-#new-errors
-(1:33) eof-in-tag
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script -->"
-|   <body>
-
-#data
-<script><!--<script --></script>
-#errors
-(1,8): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script -->"
-|   <body>
-
-#data
-<script><!--<script><\/script>--></script>
-#errors
-(1,8): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script><\/script>-->"
-|   <body>
-
-#data
-<script><!--<script></scr'+'ipt>--></script>
-#errors
-(1,8): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script></scr'+'ipt>-->"
-|   <body>
-
-#data
-<script><!--<script></script><script></script></script>
-#errors
-(1,8): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script></script><script></script>"
-|   <body>
-
-#data
-<script><!--<script></script><script></script>--><!--</script>
-#errors
-(1,8): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script></script><script></script>--><!--"
-|   <body>
-
-#data
-<script><!--<script></script><script></script>-- ></script>
-#errors
-(1,8): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script></script><script></script>-- >"
-|   <body>
-
-#data
-<script><!--<script></script><script></script>- -></script>
-#errors
-(1,8): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script></script><script></script>- ->"
-|   <body>
-
-#data
-<script><!--<script></script><script></script>- - ></script>
-#errors
-(1,8): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script></script><script></script>- - >"
-|   <body>
-
-#data
-<script><!--<script></script><script></script>-></script>
-#errors
-(1,8): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script></script><script></script>->"
-|   <body>
-
-#data
-<script><!--<script>--!></script>X
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,34): expected-named-closing-tag-but-got-eof
-(1,34): unexpected-eof-in-text-mode
-#new-errors
-(1:35) eof-in-script-html-comment-like-text
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script>--!></script>X"
-|   <body>
-
-#data
-<script><!--<scr'+'ipt></script>--></script>
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,44): unexpected-end-tag
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--<scr'+'ipt>"
-|   <body>
-|     "-->"
-
-#data
-<script><!--<script></scr'+'ipt></script>X
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,42): expected-named-closing-tag-but-got-eof
-(1,42): unexpected-eof-in-text-mode
-#new-errors
-(1:43) eof-in-script-html-comment-like-text
-#document
-| <html>
-|   <head>
-|     <script>
-|       "<!--<script></scr'+'ipt></script>X"
-|   <body>
-
-#data
-<style><!--<style></style>--></style>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,37): unexpected-end-tag
-#document
-| <html>
-|   <head>
-|     <style>
-|       "<!--<style>"
-|   <body>
-|     "-->"
-
-#data
-<style><!--</style>X
-#errors
-(1,7): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|     <style>
-|       "<!--"
-|   <body>
-|     "X"
-
-#data
-<style><!--...</style>...--></style>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,36): unexpected-end-tag
-#document
-| <html>
-|   <head>
-|     <style>
-|       "<!--..."
-|   <body>
-|     "...-->"
-
-#data
-<style><!--<br><html xmlns:v="urn:schemas-microsoft-com:vml"><!--[if !mso]><style></style>X
-#errors
-(1,7): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|     <style>
-|       "<!--<br><html xmlns:v="urn:schemas-microsoft-com:vml"><!--[if !mso]><style>"
-|   <body>
-|     "X"
-
-#data
-<style><!--...<style><!--...--!></style>--></style>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,51): unexpected-end-tag
-#document
-| <html>
-|   <head>
-|     <style>
-|       "<!--...<style><!--...--!>"
-|   <body>
-|     "-->"
-
-#data
-<style><!--...</style><!-- --><style>@import ...</style>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|     <style>
-|       "<!--..."
-|     <!--   -->
-|     <style>
-|       "@import ..."
-|   <body>
-
-#data
-<style>...<style><!--...</style><!-- --></style>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,48): unexpected-end-tag
-#document
-| <html>
-|   <head>
-|     <style>
-|       "...<style><!--..."
-|     <!--   -->
-|   <body>
-
-#data
-<style>...<!--[if IE]><style>...</style>X
-#errors
-(1,7): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|     <style>
-|       "...<!--[if IE]><style>..."
-|   <body>
-|     "X"
-
-#data
-<title><!--<title></title>--></title>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,37): unexpected-end-tag
-#document
-| <html>
-|   <head>
-|     <title>
-|       "<!--<title>"
-|   <body>
-|     "-->"
-
-#data
-<title>&lt;/title></title>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|     <title>
-|       "</title>"
-|   <body>
-
-#data
-<title>foo/title><link></head><body>X
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,37): expected-named-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|     <title>
-|       "foo/title><link></head><body>X"
-|   <body>
-
-#data
-<noscript><!--<noscript></noscript>--></noscript>
-#errors
-(1,10): expected-doctype-but-got-start-tag
-(1,49): unexpected-end-tag
-#script-on
-#document
-| <html>
-|   <head>
-|     <noscript>
-|       "<!--<noscript>"
-|   <body>
-|     "-->"
-
-#data
-<noscript><!--<noscript></noscript>--></noscript>
-#errors
- * (1,11) missing DOCTYPE
-#script-off
-#document
-| <html>
-|   <head>
-|     <noscript>
-|       <!-- <noscript></noscript> -->
-|   <body>
-
-#data
-<noscript><!--</noscript>X<noscript>--></noscript>
-#errors
-(1,10): expected-doctype-but-got-start-tag
-#script-on
-#document
-| <html>
-|   <head>
-|     <noscript>
-|       "<!--"
-|   <body>
-|     "X"
-|     <noscript>
-|       "-->"
-
-#data
-<noscript><!--</noscript>X<noscript>--></noscript>
-#errors
-(1,10): expected-doctype-but-got-start-tag
-#script-off
-#document
-| <html>
-|   <head>
-|     <noscript>
-|       <!-- </noscript>X<noscript> -->
-|   <body>
-
-#data
-<noscript><iframe></noscript>X
-#errors
-(1,10): expected-doctype-but-got-start-tag
-#script-on
-#document
-| <html>
-|   <head>
-|     <noscript>
-|       "<iframe>"
-|   <body>
-|     "X"
-
-#data
-<noscript><iframe></noscript>X
-#errors
- * (1,11) missing DOCTYPE
- * (1,19) unexpected token in head noscript
- * (1,31) unexpected EOF
-#script-off
-#document
-| <html>
-|   <head>
-|     <noscript>
-|   <body>
-|     <iframe>
-|       "</noscript>X"
-
-#data
-<noframes><!--<noframes></noframes>--></noframes>
-#errors
-(1,10): expected-doctype-but-got-start-tag
-(1,49): unexpected-end-tag
-#document
-| <html>
-|   <head>
-|     <noframes>
-|       "<!--<noframes>"
-|   <body>
-|     "-->"
-
-#data
-<noframes><body><script><!--...</script></body></noframes></html>
-#errors
-(1,10): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|     <noframes>
-|       "<body><script><!--...</script></body>"
-|   <body>
-
-#data
-<textarea><!--<textarea></textarea>--></textarea>
-#errors
-(1,10): expected-doctype-but-got-start-tag
-(1,49): unexpected-end-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <textarea>
-|       "<!--<textarea>"
-|     "-->"
-
-#data
-<textarea>&lt;/textarea></textarea>
-#errors
-(1,10): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <textarea>
-|       "</textarea>"
-
-#data
-<iframe><!--<iframe></iframe>--></iframe>
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,41): unexpected-end-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <iframe>
-|       "<!--<iframe>"
-|     "-->"
-
-#data
-<iframe>...<!--X->...<!--/X->...</iframe>
-#errors
-(1,8): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <iframe>
-|       "...<!--X->...<!--/X->..."
-
-#data
-<xmp><!--<xmp></xmp>--></xmp>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,29): unexpected-end-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <xmp>
-|       "<!--<xmp>"
-|     "-->"
-
-#data
-<noembed><!--<noembed></noembed>--></noembed>
-#errors
-(1,9): expected-doctype-but-got-start-tag
-(1,45): unexpected-end-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <noembed>
-|       "<!--<noembed>"
-|     "-->"
-
-#data
-<!doctype html><table>
-
-#errors
-(2,0): eof-in-table
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       "
-"
-
-#data
-<!doctype html><table><td><span><font></span><span>
-#errors
-(1,26): unexpected-cell-in-table-body
-(1,45): unexpected-end-tag
-(1,51): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-|         <tr>
-|           <td>
-|             <span>
-|               <font>
-|             <font>
-|               <span>
-
-#data
-<!doctype html><form><table></form><form></table></form>
-#errors
-(1,35): unexpected-end-tag-implies-table-voodoo
-(1,35): unexpected-end-tag
-(1,41): unexpected-form-in-table
-(1,56): unexpected-end-tag
-(1,56): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <form>
-|       <table>
-|         <form>
diff --git a/html/testdata/webkit/tests17.dat b/html/testdata/webkit/tests17.dat
deleted file mode 100644 (file)
index e49bcf0..0000000
+++ /dev/null
@@ -1,179 +0,0 @@
-#data
-<!doctype html><table><tbody><select><tr>
-#errors
-(1,37): unexpected-start-tag-implies-table-voodoo
-(1,41): unexpected-table-element-start-tag-in-select-in-table
-(1,41): eof-in-table
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <select>
-|     <table>
-|       <tbody>
-|         <tr>
-
-#data
-<!doctype html><table><tr><select><td>
-#errors
-(1,34): unexpected-start-tag-implies-table-voodoo
-(1,38): unexpected-table-element-start-tag-in-select-in-table
-(1,38): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <select>
-|     <table>
-|       <tbody>
-|         <tr>
-|           <td>
-
-#data
-<!doctype html><table><tr><td><select><td>
-#errors
-(1,42): unexpected-table-element-start-tag-in-select-in-table
-(1,42): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-|         <tr>
-|           <td>
-|             <select>
-|           <td>
-
-#data
-<!doctype html><table><tr><th><select><td>
-#errors
-(1,42): unexpected-table-element-start-tag-in-select-in-table
-(1,42): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-|         <tr>
-|           <th>
-|             <select>
-|           <td>
-
-#data
-<!doctype html><table><caption><select><tr>
-#errors
-(1,43): unexpected-table-element-start-tag-in-select-in-table
-(1,43): eof-in-table
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <caption>
-|         <select>
-|       <tbody>
-|         <tr>
-
-#data
-<!doctype html><select><tr>
-#errors
-(1,27): unexpected-start-tag-in-select
-(1,27): eof-in-select
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <select>
-
-#data
-<!doctype html><select><td>
-#errors
-(1,27): unexpected-start-tag-in-select
-(1,27): eof-in-select
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <select>
-
-#data
-<!doctype html><select><th>
-#errors
-(1,27): unexpected-start-tag-in-select
-(1,27): eof-in-select
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <select>
-
-#data
-<!doctype html><select><tbody>
-#errors
-(1,30): unexpected-start-tag-in-select
-(1,30): eof-in-select
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <select>
-
-#data
-<!doctype html><select><thead>
-#errors
-(1,30): unexpected-start-tag-in-select
-(1,30): eof-in-select
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <select>
-
-#data
-<!doctype html><select><tfoot>
-#errors
-(1,30): unexpected-start-tag-in-select
-(1,30): eof-in-select
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <select>
-
-#data
-<!doctype html><select><caption>
-#errors
-(1,32): unexpected-start-tag-in-select
-(1,32): eof-in-select
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <select>
-
-#data
-<!doctype html><table><tr></table>a
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-|         <tr>
-|     "a"
diff --git a/html/testdata/webkit/tests18.dat b/html/testdata/webkit/tests18.dat
deleted file mode 100644 (file)
index 05363b3..0000000
+++ /dev/null
@@ -1,534 +0,0 @@
-#data
-<plaintext></plaintext>
-#errors
-11: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
-23: End of file seen and there were open elements.
-11: Unclosed element “plaintext”.
-#document
-| <html>
-|   <head>
-|   <body>
-|     <plaintext>
-|       "</plaintext>"
-
-#data
-<!doctype html><plaintext></plaintext>
-#errors
-(1,38): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <plaintext>
-|       "</plaintext>"
-
-#data
-<!doctype html><html><plaintext></plaintext>
-#errors
-44: End of file seen and there were open elements.
-32: Unclosed element “plaintext”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <plaintext>
-|       "</plaintext>"
-
-#data
-<!doctype html><head><plaintext></plaintext>
-#errors
-44: End of file seen and there were open elements.
-32: Unclosed element “plaintext”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <plaintext>
-|       "</plaintext>"
-
-#data
-<!doctype html><html><noscript><plaintext></plaintext>
-#errors
-42: Bad start tag in “plaintext” in “head”.
-54: End of file seen and there were open elements.
-42: Unclosed element “plaintext”.
-#script-off
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <noscript>
-|   <body>
-|     <plaintext>
-|       "</plaintext>"
-
-#data
-<!doctype html></head><plaintext></plaintext>
-#errors
-45: End of file seen and there were open elements.
-33: Unclosed element “plaintext”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <plaintext>
-|       "</plaintext>"
-
-#data
-<!doctype html><body><plaintext></plaintext>
-#errors
-44: End of file seen and there were open elements.
-32: Unclosed element “plaintext”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <plaintext>
-|       "</plaintext>"
-
-#data
-<!doctype html><table><plaintext></plaintext>
-#errors
-(1,33): foster-parenting-start-tag
-(1,45): foster-parenting-character
-(1,45): eof-in-table
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <plaintext>
-|       "</plaintext>"
-|     <table>
-
-#data
-<!doctype html><table><tbody><plaintext></plaintext>
-#errors
-(1,40): foster-parenting-start-tag
-(1,41): foster-parenting-character
-(1,52): eof-in-table
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <plaintext>
-|       "</plaintext>"
-|     <table>
-|       <tbody>
-
-#data
-<!doctype html><table><tbody><tr><plaintext></plaintext>
-#errors
-(1,44): foster-parenting-start-tag
-(1,56): foster-parenting-character
-(1,56): eof-in-table
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <plaintext>
-|       "</plaintext>"
-|     <table>
-|       <tbody>
-|         <tr>
-
-#data
-<!doctype html><table><td><plaintext></plaintext>
-#errors
-(1,26): unexpected-cell-in-table-body
-(1,49): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-|         <tr>
-|           <td>
-|             <plaintext>
-|               "</plaintext>"
-
-#data
-<!doctype html><table><caption><plaintext></plaintext>
-#errors
-(1,54): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <caption>
-|         <plaintext>
-|           "</plaintext>"
-
-#data
-<!doctype html><table><colgroup><plaintext></plaintext>
-#errors
-43: Start tag “plaintext” seen in “table”.
-55: Misplaced non-space characters inside a table.
-55: End of file seen and there were open elements.
-43: Unclosed element “plaintext”.
-22: Unclosed element “table”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <plaintext>
-|       "</plaintext>"
-|     <table>
-|       <colgroup>
-
-#data
-<!doctype html><select><plaintext></plaintext>X
-#errors
-34: Stray start tag “plaintext”.
-46: Stray end tag “plaintext”.
-47: End of file seen and there were open elements.
-23: Unclosed element “select”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <select>
-|       "X"
-
-#data
-<!doctype html><table><select><plaintext>a<caption>b
-#errors
-30: Start tag “select” seen in “table”.
-41: Stray start tag “plaintext”.
-51: “caption” start tag with “select” open.
-52: End of file seen and there were open elements.
-51: Unclosed element “caption”.
-22: Unclosed element “table”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <select>
-|       "a"
-|     <table>
-|       <caption>
-|         "b"
-
-#data
-<!doctype html><template><plaintext>a</template>b
-#errors
-49: End of file seen and there were open elements.
-36: Unclosed element “plaintext”.
-25: Unclosed element “template”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <template>
-|       content
-|         <plaintext>
-|           "a</template>b"
-|   <body>
-
-#data
-<!doctype html><body></body><plaintext></plaintext>
-#errors
-39: Stray start tag “plaintext”.
-51: End of file seen and there were open elements.
-39: Unclosed element “plaintext”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <plaintext>
-|       "</plaintext>"
-
-#data
-<!doctype html><frameset><plaintext></plaintext>
-#errors
-36: Stray start tag “plaintext”.
-48: Stray end tag “plaintext”.
-48: End of file seen and there were open elements.
-25: Unclosed element “frameset”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <frameset>
-
-#data
-<!doctype html><frameset></frameset><plaintext></plaintext>
-#errors
-47: Stray start tag “plaintext”.
-59: Stray end tag “plaintext”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <frameset>
-
-#data
-<!doctype html><body></body></html><plaintext></plaintext>
-#errors
-46: Stray start tag “plaintext”.
-58: End of file seen and there were open elements.
-46: Unclosed element “plaintext”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <plaintext>
-|       "</plaintext>"
-
-#data
-<!doctype html><frameset></frameset></html><plaintext></plaintext>
-#errors
-54: Stray start tag “plaintext”.
-66: Stray end tag “plaintext”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <frameset>
-
-#data
-<!doctype html><svg><plaintext>a</plaintext>b
-#errors
-45: End of file seen and there were open elements.
-20: Unclosed element “svg”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       <svg plaintext>
-|         "a"
-|       "b"
-
-#data
-<!doctype html><svg><title><plaintext>a</plaintext>b
-#errors
-52: End of file seen and there were open elements.
-38: Unclosed element “plaintext”.
-27: Unclosed element “title”.
-20: Unclosed element “svg”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       <svg title>
-|         <plaintext>
-|           "a</plaintext>b"
-
-#data
-<!doctype html><table><tr><style></script></style>abc
-#errors
-(1,51): foster-parenting-character
-(1,52): foster-parenting-character
-(1,53): foster-parenting-character
-(1,53): eof-in-table
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     "abc"
-|     <table>
-|       <tbody>
-|         <tr>
-|           <style>
-|             "</script>"
-
-#data
-<!doctype html><table><tr><script></style></script>abc
-#errors
-(1,52): foster-parenting-character
-(1,53): foster-parenting-character
-(1,54): foster-parenting-character
-(1,54): eof-in-table
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     "abc"
-|     <table>
-|       <tbody>
-|         <tr>
-|           <script>
-|             "</style>"
-
-#data
-<!doctype html><table><caption><style></script></style>abc
-#errors
-(1,58): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <caption>
-|         <style>
-|           "</script>"
-|         "abc"
-
-#data
-<!doctype html><table><td><style></script></style>abc
-#errors
-(1,26): unexpected-cell-in-table-body
-(1,53): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-|         <tr>
-|           <td>
-|             <style>
-|               "</script>"
-|             "abc"
-
-#data
-<!doctype html><select><script></style></script>abc
-#errors
-(1,51): eof-in-select
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <select>
-|       <script>
-|         "</style>"
-|       "abc"
-
-#data
-<!doctype html><table><select><script></style></script>abc
-#errors
-(1,30): unexpected-start-tag-implies-table-voodoo
-(1,58): eof-in-select
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <select>
-|       <script>
-|         "</style>"
-|       "abc"
-|     <table>
-
-#data
-<!doctype html><table><tr><select><script></style></script>abc
-#errors
-(1,34): unexpected-start-tag-implies-table-voodoo
-(1,62): eof-in-select
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <select>
-|       <script>
-|         "</style>"
-|       "abc"
-|     <table>
-|       <tbody>
-|         <tr>
-
-#data
-<!doctype html><frameset></frameset><noframes>abc
-#errors
-(1,49): expected-named-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <frameset>
-|   <noframes>
-|     "abc"
-
-#data
-<!doctype html><frameset></frameset><noframes>abc</noframes><!--abc-->
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <frameset>
-|   <noframes>
-|     "abc"
-|   <!-- abc -->
-
-#data
-<!doctype html><frameset></frameset></html><noframes>abc
-#errors
-(1,56): expected-named-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <frameset>
-|   <noframes>
-|     "abc"
-
-#data
-<!doctype html><frameset></frameset></html><noframes>abc</noframes><!--abc-->
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <frameset>
-|   <noframes>
-|     "abc"
-| <!-- abc -->
-
-#data
-<!doctype html><table><tr></tbody><tfoot>
-#errors
-(1,41): eof-in-table
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-|         <tr>
-|       <tfoot>
-
-#data
-<!doctype html><table><td><svg></svg>abc<td>
-#errors
-(1,26): unexpected-cell-in-table-body
-(1,44): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-|         <tr>
-|           <td>
-|             <svg svg>
-|             "abc"
-|           <td>
diff --git a/html/testdata/webkit/tests19.dat b/html/testdata/webkit/tests19.dat
deleted file mode 100644 (file)
index a189777..0000000
+++ /dev/null
@@ -1,1454 +0,0 @@
-#data
-<!doctype html><math><mn DefinitionUrl="foo">
-#errors
-(1,45): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       <math mn>
-|         definitionURL="foo"
-
-#data
-<!doctype html><html></p><!--foo-->
-#errors
-(1,25): end-tag-after-implied-root
-#document
-| <!DOCTYPE html>
-| <html>
-|   <!-- foo -->
-|   <head>
-|   <body>
-
-#data
-<!doctype html><head></head></p><!--foo-->
-#errors
-(1,32): unexpected-end-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <!-- foo -->
-|   <body>
-
-#data
-<!doctype html><body><p><pre>
-#errors
-(1,29): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|     <pre>
-
-#data
-<!doctype html><body><p><listing>
-#errors
-(1,33): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|     <listing>
-
-#data
-<!doctype html><p><plaintext>
-#errors
-(1,29): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|     <plaintext>
-
-#data
-<!doctype html><p><h1>
-#errors
-(1,22): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|     <h1>
-
-#data
-<!doctype html><isindex type="hidden">
-#errors
-(1,38): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <isindex>
-|       type="hidden"
-
-#data
-<!doctype html><ruby><p><rp>
-#errors
-(1,28): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <ruby>
-|       <p>
-|       <rp>
-
-#data
-<!doctype html><ruby><div><span><rp>
-#errors
-(1,36): XXX-undefined-error
-(1,36): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <ruby>
-|       <div>
-|         <span>
-|           <rp>
-
-#data
-<!doctype html><ruby><div><p><rp>
-#errors
-(1,33): XXX-undefined-error
-(1,33): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <ruby>
-|       <div>
-|         <p>
-|         <rp>
-
-#data
-<!doctype html><ruby><p><rt>
-#errors
-(1,28): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <ruby>
-|       <p>
-|       <rt>
-
-#data
-<!doctype html><ruby><div><span><rt>
-#errors
-(1,36): XXX-undefined-error
-(1,36): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <ruby>
-|       <div>
-|         <span>
-|           <rt>
-
-#data
-<!doctype html><ruby><div><p><rt>
-#errors
-(1,33): XXX-undefined-error
-(1,33): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <ruby>
-|       <div>
-|         <p>
-|         <rt>
-
-#data
-<html><ruby>a<rb>b<rt></ruby></html>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <ruby>
-|       "a"
-|       <rb>
-|         "b"
-|       <rt>
-
-#data
-<html><ruby>a<rp>b<rt></ruby></html>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <ruby>
-|       "a"
-|       <rp>
-|         "b"
-|       <rt>
-
-#data
-<html><ruby>a<rt>b<rt></ruby></html>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <ruby>
-|       "a"
-|       <rt>
-|         "b"
-|       <rt>
-
-#data
-<html><ruby>a<rtc>b<rt>c<rb>d</ruby></html>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <ruby>
-|       "a"
-|       <rtc>
-|         "b"
-|         <rt>
-|           "c"
-|       <rb>
-|         "d"
-
-#data
-<!doctype html><math/><foo>
-#errors
-(1,27): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|     <foo>
-
-#data
-<!doctype html><svg/><foo>
-#errors
-(1,26): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|     <foo>
-
-#data
-<!doctype html><div></body><!--foo-->
-#errors
-(1,27): expected-one-end-tag-but-got-another
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|   <!-- foo -->
-
-#data
-<!doctype html><h1><div><h3><span></h1>foo
-#errors
-(1,39): end-tag-too-early
-(1,42): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <h1>
-|       <div>
-|         <h3>
-|           <span>
-|         "foo"
-
-#data
-<!doctype html><p></h3>foo
-#errors
-(1,23): end-tag-too-early
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       "foo"
-
-#data
-<!doctype html><h3><li>abc</h2>foo
-#errors
-(1,31): end-tag-too-early
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <h3>
-|       <li>
-|         "abc"
-|     "foo"
-
-#data
-<!doctype html><table>abc<!--foo-->
-#errors
-(1,23): foster-parenting-character
-(1,24): foster-parenting-character
-(1,25): foster-parenting-character
-(1,35): eof-in-table
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     "abc"
-|     <table>
-|       <!-- foo -->
-
-#data
-<!doctype html><table>  <!--foo-->
-#errors
-(1,34): eof-in-table
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       "  "
-|       <!-- foo -->
-
-#data
-<!doctype html><table> b <!--foo-->
-#errors
-(1,23): foster-parenting-character
-(1,24): foster-parenting-character
-(1,25): foster-parenting-character
-(1,35): eof-in-table
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     " b "
-|     <table>
-|       <!-- foo -->
-
-#data
-<!doctype html><select><option><option>
-#errors
-(1,39): eof-in-select
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <select>
-|       <option>
-|       <option>
-
-#data
-<!doctype html><select><option></optgroup>
-#errors
-(1,42): unexpected-end-tag-in-select
-(1,42): eof-in-select
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <select>
-|       <option>
-
-#data
-<!doctype html><select><option></optgroup>
-#errors
-(1,42): unexpected-end-tag-in-select
-(1,42): eof-in-select
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <select>
-|       <option>
-
-#data
-<!doctype html><dd><optgroup><dd>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <dd>
-|       <optgroup>
-|     <dd>
-
-#data
-<!doctype html><p><math><mi><p><h1>
-#errors
-(1,35): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <math math>
-|         <math mi>
-|           <p>
-|           <h1>
-
-#data
-<!doctype html><p><math><mo><p><h1>
-#errors
-(1,35): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <math math>
-|         <math mo>
-|           <p>
-|           <h1>
-
-#data
-<!doctype html><p><math><mn><p><h1>
-#errors
-(1,35): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <math math>
-|         <math mn>
-|           <p>
-|           <h1>
-
-#data
-<!doctype html><p><math><ms><p><h1>
-#errors
-(1,35): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <math math>
-|         <math ms>
-|           <p>
-|           <h1>
-
-#data
-<!doctype html><p><math><mtext><p><h1>
-#errors
-(1,38): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <math math>
-|         <math mtext>
-|           <p>
-|           <h1>
-
-#data
-<!doctype html><frameset></noframes>
-#errors
-(1,36): unexpected-end-tag-in-frameset
-(1,36): eof-in-frameset
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <frameset>
-
-#data
-<!doctype html><html c=d><body></html><html a=b>
-#errors
-(1,48): non-html-root
-#document
-| <!DOCTYPE html>
-| <html>
-|   a="b"
-|   c="d"
-|   <head>
-|   <body>
-
-#data
-<!doctype html><html c=d><frameset></frameset></html><html a=b>
-#errors
-(1,63): non-html-root
-#document
-| <!DOCTYPE html>
-| <html>
-|   a="b"
-|   c="d"
-|   <head>
-|   <frameset>
-
-#data
-<!doctype html><html><frameset></frameset></html><!--foo-->
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <frameset>
-| <!-- foo -->
-
-#data
-<!doctype html><html><frameset></frameset></html>  
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <frameset>
-|   "  "
-
-#data
-<!doctype html><html><frameset></frameset></html>abc
-#errors
-(1,50): expected-eof-but-got-char
-(1,51): expected-eof-but-got-char
-(1,52): expected-eof-but-got-char
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <frameset>
-
-#data
-<!doctype html><html><frameset></frameset></html><p>
-#errors
-(1,52): expected-eof-but-got-start-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <frameset>
-
-#data
-<!doctype html><html><frameset></frameset></html></p>
-#errors
-(1,53): expected-eof-but-got-end-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <frameset>
-
-#data
-<html><frameset></frameset></html><!doctype html>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-(1,49): unexpected-doctype
-#document
-| <html>
-|   <head>
-|   <frameset>
-
-#data
-<!doctype html><body><frameset>
-#errors
-(1,31): unexpected-start-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-
-#data
-<!doctype html><p><frameset><frame>
-#errors
-(1,28): unexpected-start-tag
-(1,35): eof-in-frameset
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <frameset>
-|     <frame>
-
-#data
-<!doctype html><p>a<frameset>
-#errors
-(1,29): unexpected-start-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       "a"
-
-#data
-<!doctype html><p> <frameset><frame>
-#errors
-(1,29): unexpected-start-tag
-(1,36): eof-in-frameset
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <frameset>
-|     <frame>
-
-#data
-<!doctype html><pre><frameset>
-#errors
-(1,30): unexpected-start-tag
-(1,30): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <pre>
-
-#data
-<!doctype html><listing><frameset>
-#errors
-(1,34): unexpected-start-tag
-(1,34): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <listing>
-
-#data
-<!doctype html><li><frameset>
-#errors
-(1,29): unexpected-start-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <li>
-
-#data
-<!doctype html><dd><frameset>
-#errors
-(1,29): unexpected-start-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <dd>
-
-#data
-<!doctype html><dt><frameset>
-#errors
-(1,29): unexpected-start-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <dt>
-
-#data
-<!doctype html><button><frameset>
-#errors
-(1,33): unexpected-start-tag
-(1,33): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <button>
-
-#data
-<!doctype html><applet><frameset>
-#errors
-(1,33): unexpected-start-tag
-(1,33): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <applet>
-
-#data
-<!doctype html><marquee><frameset>
-#errors
-(1,34): unexpected-start-tag
-(1,34): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <marquee>
-
-#data
-<!doctype html><object><frameset>
-#errors
-(1,33): unexpected-start-tag
-(1,33): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <object>
-
-#data
-<!doctype html><table><frameset>
-#errors
-(1,32): unexpected-start-tag-implies-table-voodoo
-(1,32): unexpected-start-tag
-(1,32): eof-in-table
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <table>
-
-#data
-<!doctype html><area><frameset>
-#errors
-(1,31): unexpected-start-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <area>
-
-#data
-<!doctype html><basefont><frameset>
-#errors
-(1,35): eof-in-frameset
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <basefont>
-|   <frameset>
-
-#data
-<!doctype html><bgsound><frameset>
-#errors
-(1,34): eof-in-frameset
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <bgsound>
-|   <frameset>
-
-#data
-<!doctype html><br><frameset>
-#errors
-(1,29): unexpected-start-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <br>
-
-#data
-<!doctype html><embed><frameset>
-#errors
-(1,32): unexpected-start-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <embed>
-
-#data
-<!doctype html><img><frameset>
-#errors
-(1,30): unexpected-start-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <img>
-
-#data
-<!doctype html><input><frameset>
-#errors
-(1,32): unexpected-start-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <input>
-
-#data
-<!doctype html><keygen><frameset>
-#errors
-(1,33): unexpected-start-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <keygen>
-
-#data
-<!doctype html><wbr><frameset>
-#errors
-(1,30): unexpected-start-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <wbr>
-
-#data
-<!doctype html><hr><frameset>
-#errors
-(1,29): unexpected-start-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <hr>
-
-#data
-<!doctype html><textarea></textarea><frameset>
-#errors
-(1,46): unexpected-start-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <textarea>
-
-#data
-<!doctype html><xmp></xmp><frameset>
-#errors
-(1,36): unexpected-start-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <xmp>
-
-#data
-<!doctype html><iframe></iframe><frameset>
-#errors
-(1,42): unexpected-start-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <iframe>
-
-#data
-<!doctype html><select></select><frameset>
-#errors
-(1,42): unexpected-start-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <select>
-
-#data
-<!doctype html><svg></svg><frameset><frame>
-#errors
-(1,36): unexpected-start-tag
-(1,43): eof-in-frameset
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <frameset>
-|     <frame>
-
-#data
-<!doctype html><math></math><frameset><frame>
-#errors
-(1,38): unexpected-start-tag
-(1,45): eof-in-frameset
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <frameset>
-|     <frame>
-
-#data
-<!doctype html><svg><foreignObject><div> <frameset><frame>
-#errors
-(1,51): unexpected-start-tag
-(1,58): eof-in-frameset
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <frameset>
-|     <frame>
-
-#data
-<!doctype html><svg>a</svg><frameset><frame>
-#errors
-(1,37): unexpected-start-tag
-(1,44): unexpected-start-tag-ignored
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       "a"
-
-#data
-<!doctype html><svg> </svg><frameset><frame>
-#errors
-(1,37): unexpected-start-tag
-(1,44): eof-in-frameset
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <frameset>
-|     <frame>
-
-#data
-<html>aaa<frameset></frameset>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-(1,19): unexpected-start-tag
-(1,30): unexpected-end-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     "aaa"
-
-#data
-<html> a <frameset></frameset>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-(1,19): unexpected-start-tag
-(1,30): unexpected-end-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     "a "
-
-#data
-<!doctype html><div><frameset>
-#errors
-(1,30): unexpected-start-tag
-(1,30): eof-in-frameset
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <frameset>
-
-#data
-<!doctype html><div><body><frameset>
-#errors
-(1,26): unexpected-start-tag
-(1,36): unexpected-start-tag
-(1,36): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <div>
-
-#data
-<!doctype html><p><math></p>a
-#errors
-(1,28): unexpected-end-tag
-(1,28): unexpected-end-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <math math>
-|     "a"
-
-#data
-<!doctype html><p><math><mn><span></p>a
-#errors
-(1,38): unexpected-end-tag
-(1,39): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <math math>
-|         <math mn>
-|           <span>
-|             <p>
-|             "a"
-
-#data
-<!doctype html><math></html>
-#errors
-(1,28): unexpected-end-tag
-(1,28): expected-one-end-tag-but-got-another
-(1,28): unexpected-end-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-
-#data
-<!doctype html><meta charset="ascii">
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <meta>
-|       charset="ascii"
-|   <body>
-
-#data
-<!doctype html><meta http-equiv="content-type" content="text/html;charset=ascii">
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <meta>
-|       content="text/html;charset=ascii"
-|       http-equiv="content-type"
-|   <body>
-
-#data
-<!doctype html><head><!--aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa--><meta charset="utf8">
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <!-- aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa -->
-|     <meta>
-|       charset="utf8"
-|   <body>
-
-#data
-<!doctype html><html a=b><head></head><html c=d>
-#errors
-(1,48): non-html-root
-#document
-| <!DOCTYPE html>
-| <html>
-|   a="b"
-|   c="d"
-|   <head>
-|   <body>
-
-#data
-<!doctype html><image/>
-#errors
-(1,23): image-start-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <img>
-
-#data
-<!doctype html>a<i>b<table>c<b>d</i>e</b>f
-#errors
-(1,28): foster-parenting-character
-(1,31): foster-parenting-start-tag
-(1,32): foster-parenting-character
-(1,36): foster-parenting-end-tag
-(1,36): adoption-agency-1.3
-(1,37): foster-parenting-character
-(1,41): foster-parenting-end-tag
-(1,42): foster-parenting-character
-(1,42): eof-in-table
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     "a"
-|     <i>
-|       "bc"
-|       <b>
-|         "de"
-|       "f"
-|       <table>
-
-#data
-<!doctype html><table><i>a<b>b<div>c<a>d</i>e</b>f
-#errors
-(1,25): foster-parenting-start-tag
-(1,26): foster-parenting-character
-(1,29): foster-parenting-start-tag
-(1,30): foster-parenting-character
-(1,35): foster-parenting-start-tag
-(1,36): foster-parenting-character
-(1,39): foster-parenting-start-tag
-(1,40): foster-parenting-character
-(1,44): foster-parenting-end-tag
-(1,44): adoption-agency-1.3
-(1,44): adoption-agency-1.3
-(1,45): foster-parenting-character
-(1,49): foster-parenting-end-tag
-(1,49): adoption-agency-1.3
-(1,49): adoption-agency-1.3
-(1,50): foster-parenting-character
-(1,50): eof-in-table
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <i>
-|       "a"
-|       <b>
-|         "b"
-|     <b>
-|     <div>
-|       <b>
-|         <i>
-|           "c"
-|           <a>
-|             "d"
-|         <a>
-|           "e"
-|       <a>
-|         "f"
-|     <table>
-
-#data
-<!doctype html><i>a<b>b<div>c<a>d</i>e</b>f
-#errors
-(1,37): adoption-agency-1.3
-(1,37): adoption-agency-1.3
-(1,42): adoption-agency-1.3
-(1,42): adoption-agency-1.3
-(1,43): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <i>
-|       "a"
-|       <b>
-|         "b"
-|     <b>
-|     <div>
-|       <b>
-|         <i>
-|           "c"
-|           <a>
-|             "d"
-|         <a>
-|           "e"
-|       <a>
-|         "f"
-
-#data
-<!doctype html><table><i>a<b>b<div>c</i>
-#errors
-(1,25): foster-parenting-start-tag
-(1,26): foster-parenting-character
-(1,29): foster-parenting-start-tag
-(1,30): foster-parenting-character
-(1,35): foster-parenting-start-tag
-(1,36): foster-parenting-character
-(1,40): foster-parenting-end-tag
-(1,40): adoption-agency-1.3
-(1,40): eof-in-table
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <i>
-|       "a"
-|       <b>
-|         "b"
-|     <b>
-|       <div>
-|         <i>
-|           "c"
-|     <table>
-
-#data
-<!doctype html><table><i>a<b>b<div>c<a>d</i>e</b>f
-#errors
-(1,25): foster-parenting-start-tag
-(1,26): foster-parenting-character
-(1,29): foster-parenting-start-tag
-(1,30): foster-parenting-character
-(1,35): foster-parenting-start-tag
-(1,36): foster-parenting-character
-(1,39): foster-parenting-start-tag
-(1,40): foster-parenting-character
-(1,44): foster-parenting-end-tag
-(1,44): adoption-agency-1.3
-(1,44): adoption-agency-1.3
-(1,45): foster-parenting-character
-(1,49): foster-parenting-end-tag
-(1,44): adoption-agency-1.3
-(1,44): adoption-agency-1.3
-(1,50): foster-parenting-character
-(1,50): eof-in-table
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <i>
-|       "a"
-|       <b>
-|         "b"
-|     <b>
-|     <div>
-|       <b>
-|         <i>
-|           "c"
-|           <a>
-|             "d"
-|         <a>
-|           "e"
-|       <a>
-|         "f"
-|     <table>
-
-#data
-<!doctype html><table><i>a<div>b<tr>c<b>d</i>e
-#errors
-(1,25): foster-parenting-start-tag
-(1,26): foster-parenting-character
-(1,31): foster-parenting-start-tag
-(1,32): foster-parenting-character
-(1,37): foster-parenting-character
-(1,40): foster-parenting-start-tag
-(1,41): foster-parenting-character
-(1,45): foster-parenting-end-tag
-(1,45): adoption-agency-1.3
-(1,46): foster-parenting-character
-(1,46): eof-in-table
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <i>
-|       "a"
-|       <div>
-|         "b"
-|     <i>
-|       "c"
-|       <b>
-|         "d"
-|     <b>
-|       "e"
-|     <table>
-|       <tbody>
-|         <tr>
-
-#data
-<!doctype html><table><td><table><i>a<div>b<b>c</i>d
-#errors
-(1,26): unexpected-cell-in-table-body
-(1,36): foster-parenting-start-tag
-(1,37): foster-parenting-character
-(1,42): foster-parenting-start-tag
-(1,43): foster-parenting-character
-(1,46): foster-parenting-start-tag
-(1,47): foster-parenting-character
-(1,51): foster-parenting-end-tag
-(1,51): adoption-agency-1.3
-(1,51): adoption-agency-1.3
-(1,52): foster-parenting-character
-(1,52): eof-in-table
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-|         <tr>
-|           <td>
-|             <i>
-|               "a"
-|             <div>
-|               <i>
-|                 "b"
-|                 <b>
-|                   "c"
-|               <b>
-|                 "d"
-|             <table>
-
-#data
-<!doctype html><body><bgsound>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <bgsound>
-
-#data
-<!doctype html><body><basefont>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <basefont>
-
-#data
-<!doctype html><a><b></a><basefont>
-#errors
-(1,25): adoption-agency-1.3
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <a>
-|       <b>
-|     <basefont>
-
-#data
-<!doctype html><a><b></a><bgsound>
-#errors
-(1,25): adoption-agency-1.3
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <a>
-|       <b>
-|     <bgsound>
-
-#data
-<!doctype html><figcaption><article></figcaption>a
-#errors
-(1,49): end-tag-too-early
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <figcaption>
-|       <article>
-|     "a"
-
-#data
-<!doctype html><summary><article></summary>a
-#errors
-(1,43): end-tag-too-early
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <summary>
-|       <article>
-|     "a"
-
-#data
-<!doctype html><p><a><plaintext>b
-#errors
-(1,32): unexpected-end-tag
-(1,33): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <a>
-|     <plaintext>
-|       <a>
-|         "b"
-
-#data
-<!DOCTYPE html><div>a<a></div>b<p>c</p>d
-#errors
-(1,30): end-tag-too-early
-(1,40): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       "a"
-|       <a>
-|     <a>
-|       "b"
-|       <p>
-|         "c"
-|       "d"
diff --git a/html/testdata/webkit/tests2.dat b/html/testdata/webkit/tests2.dat
deleted file mode 100644 (file)
index b44fec4..0000000
+++ /dev/null
@@ -1,821 +0,0 @@
-#data
-<!DOCTYPE html>Test
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     "Test"
-
-#data
-<textarea>test</div>test
-#errors
-(1,10): expected-doctype-but-got-start-tag
-(1,24): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <textarea>
-|       "test</div>test"
-
-#data
-<table><td>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,11): unexpected-cell-in-table-body
-(1,11): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-|         <tr>
-|           <td>
-
-#data
-<table><td>test</tbody></table>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,11): unexpected-cell-in-table-body
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-|         <tr>
-|           <td>
-|             "test"
-
-#data
-<frame>test
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,7): unexpected-start-tag-ignored
-#document
-| <html>
-|   <head>
-|   <body>
-|     "test"
-
-#data
-<!DOCTYPE html><frameset>test
-#errors
-(1,29): unexpected-char-in-frameset
-(1,29): unexpected-char-in-frameset
-(1,29): unexpected-char-in-frameset
-(1,29): unexpected-char-in-frameset
-(1,29): eof-in-frameset
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <frameset>
-
-#data
-<!DOCTYPE html><frameset> te st
-#errors
-(1,29): unexpected-char-in-frameset
-(1,29): unexpected-char-in-frameset
-(1,29): unexpected-char-in-frameset
-(1,29): unexpected-char-in-frameset
-(1,29): eof-in-frameset
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <frameset>
-|     "  "
-
-#data
-<!DOCTYPE html><frameset></frameset> te st
-#errors
-(1,29): unexpected-char-after-frameset
-(1,29): unexpected-char-after-frameset
-(1,29): unexpected-char-after-frameset
-(1,29): unexpected-char-after-frameset
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <frameset>
-|   "  "
-
-#data
-<!DOCTYPE html><frameset><!DOCTYPE html>
-#errors
-(1,40): unexpected-doctype
-(1,40): eof-in-frameset
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <frameset>
-
-#data
-<!DOCTYPE html><font><p><b>test</font>
-#errors
-(1,38): adoption-agency-1.3
-(1,38): adoption-agency-1.3
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <font>
-|     <p>
-|       <font>
-|         <b>
-|           "test"
-
-#data
-<!DOCTYPE html><dt><div><dd>
-#errors
-(1,28): end-tag-too-early
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <dt>
-|       <div>
-|     <dd>
-
-#data
-<script></x
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,11): expected-named-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|     <script>
-|       "</x"
-|   <body>
-
-#data
-<table><plaintext><td>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,18): unexpected-start-tag-implies-table-voodoo
-(1,22): foster-parenting-character-in-table
-(1,22): foster-parenting-character-in-table
-(1,22): foster-parenting-character-in-table
-(1,22): foster-parenting-character-in-table
-(1,22): eof-in-table
-#document
-| <html>
-|   <head>
-|   <body>
-|     <plaintext>
-|       "<td>"
-|     <table>
-
-#data
-<plaintext></plaintext>
-#errors
-(1,11): expected-doctype-but-got-start-tag
-(1,23): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <plaintext>
-|       "</plaintext>"
-
-#data
-<!DOCTYPE html><table><tr>TEST
-#errors
-(1,30): foster-parenting-character-in-table
-(1,30): foster-parenting-character-in-table
-(1,30): foster-parenting-character-in-table
-(1,30): foster-parenting-character-in-table
-(1,30): eof-in-table
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     "TEST"
-|     <table>
-|       <tbody>
-|         <tr>
-
-#data
-<!DOCTYPE html><body t1=1><body t2=2><body t3=3 t4=4>
-#errors
-(1,37): unexpected-start-tag
-(1,53): unexpected-start-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     t1="1"
-|     t2="2"
-|     t3="3"
-|     t4="4"
-
-#data
-</b test
-#errors
-(1,8): eof-in-attribute-name
-(1,8): expected-doctype-but-got-eof
-#new-errors
-(1:9) eof-in-tag
-#document
-| <html>
-|   <head>
-|   <body>
-
-#data
-<!DOCTYPE html></b test<b &=&amp>X
-#errors
-(1,24): invalid-character-in-attribute-name
-(1,32): named-entity-without-semicolon
-(1,33): attributes-in-end-tag
-(1,33): unexpected-end-tag-before-html
-#new-errors
-(1:24) unexpected-character-in-attribute-name
-(1:33) missing-semicolon-after-character-reference
-(1:33) end-tag-with-attributes
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     "X"
-
-#data
-<!doctypehtml><scrIPt type=text/x-foobar;baz>X</SCRipt
-#errors
-(1,9): need-space-after-doctype
-(1,54): expected-named-closing-tag-but-got-eof
-#new-errors
-(1:10) missing-whitespace-before-doctype-name
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       type="text/x-foobar;baz"
-|       "X</SCRipt"
-|   <body>
-
-#data
-&
-#errors
-(1,1): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "&"
-
-#data
-&#
-#errors
-(1,2): expected-numeric-entity
-(1,2): expected-doctype-but-got-chars
-#new-errors
-(1:3) absence-of-digits-in-numeric-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "&#"
-
-#data
-&#X
-#errors
-(1,3): expected-numeric-entity
-(1,3): expected-doctype-but-got-chars
-#new-errors
-(1:4) absence-of-digits-in-numeric-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "&#X"
-
-#data
-&#x
-#errors
-(1,3): expected-numeric-entity
-(1,3): expected-doctype-but-got-chars
-#new-errors
-(1:4) absence-of-digits-in-numeric-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "&#x"
-
-#data
-&#45
-#errors
-(1,4): numeric-entity-without-semicolon
-(1,4): expected-doctype-but-got-chars
-#new-errors
-(1:5) missing-semicolon-after-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "-"
-
-#data
-&x-test
-#errors
-(1,2): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "&x-test"
-
-#data
-<!doctypehtml><p><li>
-#errors
-(1,9): need-space-after-doctype
-#new-errors
-(1:10) missing-whitespace-before-doctype-name
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|     <li>
-
-#data
-<!doctypehtml><p><dt>
-#errors
-(1,9): need-space-after-doctype
-#new-errors
-(1:10) missing-whitespace-before-doctype-name
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|     <dt>
-
-#data
-<!doctypehtml><p><dd>
-#errors
-(1,9): need-space-after-doctype
-#new-errors
-(1:10) missing-whitespace-before-doctype-name
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|     <dd>
-
-#data
-<!doctypehtml><p><form>
-#errors
-(1,9): need-space-after-doctype
-(1,23): expected-closing-tag-but-got-eof
-#new-errors
-(1:10) missing-whitespace-before-doctype-name
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|     <form>
-
-#data
-<!DOCTYPE html><p></P>X
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|     "X"
-
-#data
-&AMP
-#errors
-(1,4): named-entity-without-semicolon
-(1,4): expected-doctype-but-got-chars
-#new-errors
-(1:5) missing-semicolon-after-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "&"
-
-#data
-&AMp;
-#errors
-(1,3): expected-named-entity
-(1,3): expected-doctype-but-got-chars
-#new-errors
-(1:5) unknown-named-character-reference
-#document
-| <html>
-|   <head>
-|   <body>
-|     "&AMp;"
-
-#data
-<!DOCTYPE html><html><head></head><body><thisISasillyTESTelementNameToMakeSureCrazyTagNamesArePARSEDcorrectLY>
-#errors
-(1,110): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <thisisasillytestelementnametomakesurecrazytagnamesareparsedcorrectly>
-
-#data
-<!DOCTYPE html>X</body>X
-#errors
-(1,24): unexpected-char-after-body
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     "XX"
-
-#data
-<!DOCTYPE html><!-- X
-#errors
-(1,21): eof-in-comment
-#new-errors
-(1:22) eof-in-comment
-#document
-| <!DOCTYPE html>
-| <!--  X -->
-| <html>
-|   <head>
-|   <body>
-
-#data
-<!DOCTYPE html><table><caption>test TEST</caption><td>test
-#errors
-(1,54): unexpected-cell-in-table-body
-(1,58): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <caption>
-|         "test TEST"
-|       <tbody>
-|         <tr>
-|           <td>
-|             "test"
-
-#data
-<!DOCTYPE html><select><option><optgroup>
-#errors
-(1,41): eof-in-select
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <select>
-|       <option>
-|       <optgroup>
-
-#data
-<!DOCTYPE html><select><optgroup><option></optgroup><option><select><option>
-#errors
-(1,68): unexpected-select-in-select
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <select>
-|       <optgroup>
-|         <option>
-|       <option>
-|     <option>
-
-#data
-<!DOCTYPE html><select><optgroup><option><optgroup>
-#errors
-(1,51): eof-in-select
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <select>
-|       <optgroup>
-|         <option>
-|       <optgroup>
-
-#data
-<!DOCTYPE html><datalist><option>foo</datalist>bar
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <datalist>
-|       <option>
-|         "foo"
-|     "bar"
-
-#data
-<!DOCTYPE html><font><input><input></font>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <font>
-|       <input>
-|       <input>
-
-#data
-<!DOCTYPE html><!-- XXX - XXX -->
-#errors
-#document
-| <!DOCTYPE html>
-| <!--  XXX - XXX  -->
-| <html>
-|   <head>
-|   <body>
-
-#data
-<!DOCTYPE html><!-- XXX - XXX
-#errors
-(1,29): eof-in-comment
-#new-errors
-(1:30) eof-in-comment
-#document
-| <!DOCTYPE html>
-| <!--  XXX - XXX -->
-| <html>
-|   <head>
-|   <body>
-
-#data
-<!DOCTYPE html><!-- XXX - XXX - XXX -->
-#errors
-#document
-| <!DOCTYPE html>
-| <!--  XXX - XXX - XXX  -->
-| <html>
-|   <head>
-|   <body>
-
-#data
-test
-test
-#errors
-(2,4): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "test
-test"
-
-#data
-<!DOCTYPE html><body><title>test</body></title>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <title>
-|       "test</body>"
-
-#data
-<!DOCTYPE html><body><title>X</title><meta name=z><link rel=foo><style>
-x { content:"</style" } </style>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <title>
-|       "X"
-|     <meta>
-|       name="z"
-|     <link>
-|       rel="foo"
-|     <style>
-|       "
-x { content:"</style" } "
-
-#data
-<!DOCTYPE html><select><optgroup></optgroup></select>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <select>
-|       <optgroup>
-
-#data
-#errors
-(2,1): expected-doctype-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-
-#data
-<!DOCTYPE html>  <html>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-
-#data
-<!DOCTYPE html><script>
-</script>  <title>x</title>  </head>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <script>
-|       "
-"
-|     "  "
-|     <title>
-|       "x"
-|     "  "
-|   <body>
-
-#data
-<!DOCTYPE html><html><body><html id=x>
-#errors
-(1,38): non-html-root
-#document
-| <!DOCTYPE html>
-| <html>
-|   id="x"
-|   <head>
-|   <body>
-
-#data
-<!DOCTYPE html>X</body><html id="x">
-#errors
-(1,36): non-html-root
-#document
-| <!DOCTYPE html>
-| <html>
-|   id="x"
-|   <head>
-|   <body>
-|     "X"
-
-#data
-<!DOCTYPE html><head><html id=x>
-#errors
-(1,32): non-html-root
-#document
-| <!DOCTYPE html>
-| <html>
-|   id="x"
-|   <head>
-|   <body>
-
-#data
-<!DOCTYPE html>X</html>X
-#errors
-(1,24): expected-eof-but-got-char
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     "XX"
-
-#data
-<!DOCTYPE html>X</html> 
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     "X "
-
-#data
-<!DOCTYPE html>X</html><p>X
-#errors
-(1,26): expected-eof-but-got-start-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     "X"
-|     <p>
-|       "X"
-
-#data
-<!DOCTYPE html>X<p/x/y/z>
-#errors
-(1,19): unexpected-character-after-solidus-in-tag
-(1,21): unexpected-character-after-solidus-in-tag
-(1,23): unexpected-character-after-solidus-in-tag
-#new-errors
-(1:20) unexpected-solidus-in-tag
-(1:22) unexpected-solidus-in-tag
-(1:24) unexpected-solidus-in-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     "X"
-|     <p>
-|       x=""
-|       y=""
-|       z=""
-
-#data
-<!DOCTYPE html><!--x--
-#errors
-(1,22): eof-in-comment-double-dash
-#new-errors
-(1:23) eof-in-comment
-#document
-| <!DOCTYPE html>
-| <!-- x -->
-| <html>
-|   <head>
-|   <body>
-
-#data
-<!DOCTYPE html><table><tr><td></p></table>
-#errors
-(1,34): unexpected-end-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-|         <tr>
-|           <td>
-|             <p>
-
-#data
-<!DOCTYPE <!DOCTYPE HTML>><!--<!--x-->-->
-#errors
-(1,20): expected-space-or-right-bracket-in-doctype
-(1,25): unknown-doctype
-(1,35): unexpected-char-in-comment
-#new-errors
-(1:21) invalid-character-sequence-after-doctype-name
-(1:35) nested-comment
-#document
-| <!DOCTYPE <!doctype>
-| <html>
-|   <head>
-|   <body>
-|     ">"
-|     <!-- <!--x -->
-|     "-->"
-
-#data
-<!doctype html><div><form></form><div></div></div>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       <form>
-|       <div>
diff --git a/html/testdata/webkit/tests20.dat b/html/testdata/webkit/tests20.dat
deleted file mode 100644 (file)
index afdae74..0000000
+++ /dev/null
@@ -1,582 +0,0 @@
-#data
-<!doctype html><p><button><button>
-#errors
-(1,34): unexpected-start-tag-implies-end-tag
-(1,34): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <button>
-|       <button>
-
-#data
-<!doctype html><p><button><address>
-#errors
-(1,35): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <button>
-|         <address>
-
-#data
-<!doctype html><p><button><blockquote>
-#errors
-(1,38): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <button>
-|         <blockquote>
-
-#data
-<!doctype html><p><button><menu>
-#errors
-(1,32): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <button>
-|         <menu>
-
-#data
-<!doctype html><p><button><p>
-#errors
-(1,29): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <button>
-|         <p>
-
-#data
-<!doctype html><p><button><ul>
-#errors
-(1,30): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <button>
-|         <ul>
-
-#data
-<!doctype html><p><button><h1>
-#errors
-(1,30): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <button>
-|         <h1>
-
-#data
-<!doctype html><p><button><h6>
-#errors
-(1,30): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <button>
-|         <h6>
-
-#data
-<!doctype html><p><button><listing>
-#errors
-(1,35): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <button>
-|         <listing>
-
-#data
-<!doctype html><p><button><pre>
-#errors
-(1,31): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <button>
-|         <pre>
-
-#data
-<!doctype html><p><button><form>
-#errors
-(1,32): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <button>
-|         <form>
-
-#data
-<!doctype html><p><button><li>
-#errors
-(1,30): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <button>
-|         <li>
-
-#data
-<!doctype html><p><button><dd>
-#errors
-(1,30): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <button>
-|         <dd>
-
-#data
-<!doctype html><p><button><dt>
-#errors
-(1,30): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <button>
-|         <dt>
-
-#data
-<!doctype html><p><button><plaintext>
-#errors
-(1,37): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <button>
-|         <plaintext>
-
-#data
-<!doctype html><p><button><table>
-#errors
-(1,33): eof-in-table
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <button>
-|         <table>
-
-#data
-<!doctype html><p><button><hr>
-#errors
-(1,30): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <button>
-|         <hr>
-
-#data
-<!doctype html><p><button><xmp>
-#errors
-(1,31): expected-named-closing-tag-but-got-eof
-(1,31): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <button>
-|         <xmp>
-
-#data
-<!doctype html><p><button></p>
-#errors
-(1,30): unexpected-end-tag
-(1,30): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <button>
-|         <p>
-
-#data
-<!doctype html><address><button></address>a
-#errors
-(1,42): end-tag-too-early
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <address>
-|       <button>
-|     "a"
-
-#data
-<!doctype html><address><button></address>a
-#errors
-(1,42): end-tag-too-early
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <address>
-|       <button>
-|     "a"
-
-#data
-<p><table></p>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,14): unexpected-end-tag-implies-table-voodoo
-(1,14): unexpected-end-tag
-(1,14): eof-in-table
-#document
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <p>
-|       <table>
-
-#data
-<!doctype html><svg>
-#errors
-(1,20): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-
-#data
-<!doctype html><p><figcaption>
-#errors
-(1,30): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|     <figcaption>
-
-#data
-<!doctype html><p><summary>
-#errors
-(1,27): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|     <summary>
-
-#data
-<!doctype html><form><table><form>
-#errors
-(1,34): unexpected-form-in-table
-(1,34): eof-in-table
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <form>
-|       <table>
-
-#data
-<!doctype html><table><form><form>
-#errors
-(1,28): unexpected-form-in-table
-(1,34): unexpected-form-in-table
-(1,34): eof-in-table
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <form>
-
-#data
-<!doctype html><table><form></table><form>
-#errors
-(1,28): unexpected-form-in-table
-(1,42): unexpected-start-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <form>
-
-#data
-<!doctype html><svg><foreignObject><p>
-#errors
-(1,38): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       <svg foreignObject>
-|         <p>
-
-#data
-<!doctype html><svg><title>abc
-#errors
-(1,30): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       <svg title>
-|         "abc"
-
-#data
-<option><span><option>
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,22): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <option>
-|       <span>
-|         <option>
-
-#data
-<option><option>
-#errors
-(1,8): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <option>
-|     <option>
-
-#data
-<math><annotation-xml><div>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-(1,27): unexpected-html-element-in-foreign-content
-(1,27): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       <math annotation-xml>
-|     <div>
-
-#data
-<math><annotation-xml encoding="application/svg+xml"><div>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-(1,58): unexpected-html-element-in-foreign-content
-(1,58): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       <math annotation-xml>
-|         encoding="application/svg+xml"
-|     <div>
-
-#data
-<math><annotation-xml encoding="application/xhtml+xml"><div>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-(1,60): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       <math annotation-xml>
-|         encoding="application/xhtml+xml"
-|         <div>
-
-#data
-<math><annotation-xml encoding="aPPlication/xhtmL+xMl"><div>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-(1,60): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       <math annotation-xml>
-|         encoding="aPPlication/xhtmL+xMl"
-|         <div>
-
-#data
-<math><annotation-xml encoding="text/html"><div>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-(1,48): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       <math annotation-xml>
-|         encoding="text/html"
-|         <div>
-
-#data
-<math><annotation-xml encoding="Text/htmL"><div>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-(1,48): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       <math annotation-xml>
-|         encoding="Text/htmL"
-|         <div>
-
-#data
-<math><annotation-xml encoding=" text/html "><div>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-(1,50): unexpected-html-element-in-foreign-content
-(1,50): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       <math annotation-xml>
-|         encoding=" text/html "
-|     <div>
-
-#data
-<math><annotation-xml> </annotation-xml>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-(1,40): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       <math annotation-xml>
-|         " "
-
-#data
-<math><annotation-xml>c</annotation-xml>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-(1,40): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       <math annotation-xml>
-|         "c"
-
-#data
-<math><annotation-xml><!--foo-->
-#errors
-(1,6): expected-doctype-but-got-start-tag
-(1,32): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       <math annotation-xml>
-|         <!-- foo -->
-
-#data
-<math><annotation-xml></svg>x
-#errors
-(1,6): expected-doctype-but-got-start-tag
-(1,28): unexpected-end-tag
-(1,29): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       <math annotation-xml>
-|         "x"
-
-#data
-<math><annotation-xml><svg>x
-#errors
-(1,6): expected-doctype-but-got-start-tag
-(1,28): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       <math annotation-xml>
-|         <svg svg>
-|           "x"
diff --git a/html/testdata/webkit/tests21.dat b/html/testdata/webkit/tests21.dat
deleted file mode 100644 (file)
index 1e2af7c..0000000
+++ /dev/null
@@ -1,325 +0,0 @@
-#data
-<svg><![CDATA[foo]]>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,20): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       "foo"
-
-#data
-<math><![CDATA[foo]]>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-(1,21): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       "foo"
-
-#data
-<div><![CDATA[foo]]>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,7): expected-dashes-or-doctype
-(1,20): expected-closing-tag-but-got-eof
-#new-errors
-(1:14) cdata-in-html-content
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       <!-- [CDATA[foo]] -->
-
-#data
-<svg><![CDATA[foo
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,17): expected-closing-tag-but-got-eof
-#new-errors
-(1:18) eof-in-cdata
-#document
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       "foo"
-
-#data
-<svg><![CDATA[foo
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,17): expected-closing-tag-but-got-eof
-#new-errors
-(1:18) eof-in-cdata
-#document
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       "foo"
-
-#data
-<svg><![CDATA[
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,14): expected-closing-tag-but-got-eof
-#new-errors
-(1:15) eof-in-cdata
-#document
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-
-#data
-<svg><![CDATA[]]>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,17): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-
-#data
-<svg><![CDATA[]] >]]>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,21): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       "]] >"
-
-#data
-<svg><![CDATA[]] >]]>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,21): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       "]] >"
-
-#data
-<svg><![CDATA[]]
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,16): expected-closing-tag-but-got-eof
-#new-errors
-(1:17) eof-in-cdata
-#document
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       "]]"
-
-#data
-<svg><![CDATA[]
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,15): expected-closing-tag-but-got-eof
-#new-errors
-(1:16) eof-in-cdata
-#document
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       "]"
-
-#data
-<svg><![CDATA[]>a
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,17): expected-closing-tag-but-got-eof
-#new-errors
-(1:18) eof-in-cdata
-#document
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       "]>a"
-
-#data
-<!DOCTYPE html><svg><![CDATA[foo]]]>
-#errors
-(1,36): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       "foo]"
-
-#data
-<!DOCTYPE html><svg><![CDATA[foo]]]]>
-#errors
-(1,37): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       "foo]]"
-
-#data
-<!DOCTYPE html><svg><![CDATA[foo]]]]]>
-#errors
-(1,38): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       "foo]]]"
-
-#data
-<svg><foreignObject><div><![CDATA[foo]]>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,27): expected-dashes-or-doctype
-(1,40): expected-closing-tag-but-got-eof
-#new-errors
-(1:34) cdata-in-html-content
-#document
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       <svg foreignObject>
-|         <div>
-|           <!-- [CDATA[foo]] -->
-
-#data
-<svg><![CDATA[<svg>]]>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,22): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       "<svg>"
-
-#data
-<svg><![CDATA[</svg>a]]>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,24): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       "</svg>a"
-
-#data
-<svg><![CDATA[<svg>a
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,20): expected-closing-tag-but-got-eof
-#new-errors
-(1:21) eof-in-cdata
-#document
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       "<svg>a"
-
-#data
-<svg><![CDATA[</svg>a
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,21): expected-closing-tag-but-got-eof
-#new-errors
-(1:22) eof-in-cdata
-#document
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       "</svg>a"
-
-#data
-<svg><![CDATA[<svg>]]><path>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,28): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       "<svg>"
-|       <svg path>
-
-#data
-<svg><![CDATA[<svg>]]></path>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,29): unexpected-end-tag
-(1,29): unexpected-end-tag
-(1,29): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       "<svg>"
-
-#data
-<svg><![CDATA[<svg>]]><!--path-->
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,33): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       "<svg>"
-|       <!-- path -->
-
-#data
-<svg><![CDATA[<svg>]]>path
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,26): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       "<svg>path"
-
-#data
-<svg><![CDATA[<!--svg-->]]>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,27): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       "<!--svg-->"
diff --git a/html/testdata/webkit/tests22.dat b/html/testdata/webkit/tests22.dat
deleted file mode 100644 (file)
index 31e6d9e..0000000
+++ /dev/null
@@ -1,190 +0,0 @@
-#data
-<a><b><big><em><strong><div>X</a>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,33): adoption-agency-1.3
-(1,33): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <a>
-|       <b>
-|         <big>
-|           <em>
-|             <strong>
-|     <big>
-|       <em>
-|         <strong>
-|           <div>
-|             <a>
-|               "X"
-
-#data
-<a><b><div id=1><div id=2><div id=3><div id=4><div id=5><div id=6><div id=7><div id=8>A</a>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,91): adoption-agency-1.3
-(1,91): adoption-agency-1.3
-(1,91): adoption-agency-1.3
-(1,91): adoption-agency-1.3
-(1,91): adoption-agency-1.3
-(1,91): adoption-agency-1.3
-(1,91): adoption-agency-1.3
-(1,91): adoption-agency-1.3
-(1,91): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <a>
-|       <b>
-|     <b>
-|       <div>
-|         id="1"
-|         <a>
-|         <div>
-|           id="2"
-|           <a>
-|           <div>
-|             id="3"
-|             <a>
-|             <div>
-|               id="4"
-|               <a>
-|               <div>
-|                 id="5"
-|                 <a>
-|                 <div>
-|                   id="6"
-|                   <a>
-|                   <div>
-|                     id="7"
-|                     <a>
-|                     <div>
-|                       id="8"
-|                       <a>
-|                         "A"
-
-#data
-<a><b><div id=1><div id=2><div id=3><div id=4><div id=5><div id=6><div id=7><div id=8><div id=9>A</a>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,101): adoption-agency-1.3
-(1,101): adoption-agency-1.3
-(1,101): adoption-agency-1.3
-(1,101): adoption-agency-1.3
-(1,101): adoption-agency-1.3
-(1,101): adoption-agency-1.3
-(1,101): adoption-agency-1.3
-(1,101): adoption-agency-1.3
-(1,101): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <a>
-|       <b>
-|     <b>
-|       <div>
-|         id="1"
-|         <a>
-|         <div>
-|           id="2"
-|           <a>
-|           <div>
-|             id="3"
-|             <a>
-|             <div>
-|               id="4"
-|               <a>
-|               <div>
-|                 id="5"
-|                 <a>
-|                 <div>
-|                   id="6"
-|                   <a>
-|                   <div>
-|                     id="7"
-|                     <a>
-|                     <div>
-|                       id="8"
-|                       <a>
-|                         <div>
-|                           id="9"
-|                           "A"
-
-#data
-<a><b><div id=1><div id=2><div id=3><div id=4><div id=5><div id=6><div id=7><div id=8><div id=9><div id=10>A</a>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,112): adoption-agency-1.3
-(1,112): adoption-agency-1.3
-(1,112): adoption-agency-1.3
-(1,112): adoption-agency-1.3
-(1,112): adoption-agency-1.3
-(1,112): adoption-agency-1.3
-(1,112): adoption-agency-1.3
-(1,112): adoption-agency-1.3
-(1,112): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <a>
-|       <b>
-|     <b>
-|       <div>
-|         id="1"
-|         <a>
-|         <div>
-|           id="2"
-|           <a>
-|           <div>
-|             id="3"
-|             <a>
-|             <div>
-|               id="4"
-|               <a>
-|               <div>
-|                 id="5"
-|                 <a>
-|                 <div>
-|                   id="6"
-|                   <a>
-|                   <div>
-|                     id="7"
-|                     <a>
-|                     <div>
-|                       id="8"
-|                       <a>
-|                         <div>
-|                           id="9"
-|                           <div>
-|                             id="10"
-|                             "A"
-
-#data
-<cite><b><cite><i><cite><i><cite><i><div>X</b>TEST
-#errors
-(1,6): expected-doctype-but-got-start-tag
-(1,46): adoption-agency-1.3
-(1,50): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <cite>
-|       <b>
-|         <cite>
-|           <i>
-|             <cite>
-|               <i>
-|                 <cite>
-|                   <i>
-|       <i>
-|         <i>
-|           <div>
-|             <b>
-|               "X"
-|             "TEST"
diff --git a/html/testdata/webkit/tests23.dat b/html/testdata/webkit/tests23.dat
deleted file mode 100644 (file)
index 49e4a4a..0000000
+++ /dev/null
@@ -1,168 +0,0 @@
-#data
-<p><font size=4><font color=red><font size=4><font size=4><font size=4><font size=4><font size=4><font color=red><p>X
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,116): unexpected-end-tag
-(1,117): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <font>
-|         size="4"
-|         <font>
-|           color="red"
-|           <font>
-|             size="4"
-|             <font>
-|               size="4"
-|               <font>
-|                 size="4"
-|                 <font>
-|                   size="4"
-|                   <font>
-|                     size="4"
-|                     <font>
-|                       color="red"
-|     <p>
-|       <font>
-|         color="red"
-|         <font>
-|           size="4"
-|           <font>
-|             size="4"
-|             <font>
-|               size="4"
-|               <font>
-|                 color="red"
-|                 "X"
-
-#data
-<p><font size=4><font size=4><font size=4><font size=4><p>X
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,58): unexpected-end-tag
-(1,59): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <font>
-|         size="4"
-|         <font>
-|           size="4"
-|           <font>
-|             size="4"
-|             <font>
-|               size="4"
-|     <p>
-|       <font>
-|         size="4"
-|         <font>
-|           size="4"
-|           <font>
-|             size="4"
-|             "X"
-
-#data
-<p><font size=4><font size=4><font size=4><font size="5"><font size=4><p>X
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,73): unexpected-end-tag
-(1,74): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <font>
-|         size="4"
-|         <font>
-|           size="4"
-|           <font>
-|             size="4"
-|             <font>
-|               size="5"
-|               <font>
-|                 size="4"
-|     <p>
-|       <font>
-|         size="4"
-|         <font>
-|           size="4"
-|           <font>
-|             size="5"
-|             <font>
-|               size="4"
-|               "X"
-
-#data
-<p><font size=4 id=a><font size=4 id=b><font size=4><font size=4><p>X
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,68): unexpected-end-tag
-(1,69): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <font>
-|         id="a"
-|         size="4"
-|         <font>
-|           id="b"
-|           size="4"
-|           <font>
-|             size="4"
-|             <font>
-|               size="4"
-|     <p>
-|       <font>
-|         id="a"
-|         size="4"
-|         <font>
-|           id="b"
-|           size="4"
-|           <font>
-|             size="4"
-|             <font>
-|               size="4"
-|               "X"
-
-#data
-<p><b id=a><b id=a><b id=a><b><object><b id=a><b id=a>X</object><p>Y
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,64): end-tag-too-early
-(1,67): unexpected-end-tag
-(1,68): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <b>
-|         id="a"
-|         <b>
-|           id="a"
-|           <b>
-|             id="a"
-|             <b>
-|               <object>
-|                 <b>
-|                   id="a"
-|                   <b>
-|                     id="a"
-|                     "X"
-|     <p>
-|       <b>
-|         id="a"
-|         <b>
-|           id="a"
-|           <b>
-|             id="a"
-|             <b>
-|               "Y"
diff --git a/html/testdata/webkit/tests24.dat b/html/testdata/webkit/tests24.dat
deleted file mode 100644 (file)
index f6dc7eb..0000000
+++ /dev/null
@@ -1,79 +0,0 @@
-#data
-<!DOCTYPE html>&NotEqualTilde;
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     "≂̸"
-
-#data
-<!DOCTYPE html>&NotEqualTilde;A
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     "≂̸A"
-
-#data
-<!DOCTYPE html>&ThickSpace;
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     "  "
-
-#data
-<!DOCTYPE html>&ThickSpace;A
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     "  A"
-
-#data
-<!DOCTYPE html>&NotSubset;
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     "⊂⃒"
-
-#data
-<!DOCTYPE html>&NotSubset;A
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     "⊂⃒A"
-
-#data
-<!DOCTYPE html>&Gopf;
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     "𝔾"
-
-#data
-<!DOCTYPE html>&Gopf;A
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     "𝔾A"
diff --git a/html/testdata/webkit/tests25.dat b/html/testdata/webkit/tests25.dat
deleted file mode 100644 (file)
index 5bb369b..0000000
+++ /dev/null
@@ -1,288 +0,0 @@
-#data
-<!DOCTYPE html><body><foo>A
-#errors
-(1,27): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <foo>
-|       "A"
-
-#data
-<!DOCTYPE html><body><area>A
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <area>
-|     "A"
-
-#data
-<!DOCTYPE html><body><base>A
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <base>
-|     "A"
-
-#data
-<!DOCTYPE html><body><basefont>A
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <basefont>
-|     "A"
-
-#data
-<!DOCTYPE html><body><bgsound>A
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <bgsound>
-|     "A"
-
-#data
-<!DOCTYPE html><body><br>A
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <br>
-|     "A"
-
-#data
-<!DOCTYPE html><body><col>A
-#errors
-(1,26): unexpected-start-tag-ignored
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     "A"
-
-#data
-<!DOCTYPE html><body><command>A
-#errors
-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <command>
-|       "A"
-
-#data
-<!DOCTYPE html><body><embed>A
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <embed>
-|     "A"
-
-#data
-<!DOCTYPE html><body><frame>A
-#errors
-(1,28): unexpected-start-tag-ignored
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     "A"
-
-#data
-<!DOCTYPE html><body><hr>A
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <hr>
-|     "A"
-
-#data
-<!DOCTYPE html><body><img>A
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <img>
-|     "A"
-
-#data
-<!DOCTYPE html><body><input>A
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <input>
-|     "A"
-
-#data
-<!DOCTYPE html><body><keygen>A
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <keygen>
-|     "A"
-
-#data
-<!DOCTYPE html><keygen>A</keygen>B
-#errors
-33: Stray end tag “keygen”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <keygen>
-|     "AB"
-
-#data
-</keygen>A
-#errors
-9: End tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
-9: Stray end tag “keygen”.
-#document
-| <html>
-|   <head>
-|   <body>
-|     "A"
-
-#data
-<!DOCTYPE html></keygen>A
-#errors
-24: Stray end tag “keygen”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     "A"
-
-#data
-<!DOCTYPE html><head></keygen>A
-#errors
-30: Stray end tag “keygen”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     "A"
-
-#data
-<!DOCTYPE html><head></head></keygen>A
-#errors
-30: Stray end tag “keygen”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     "A"
-
-#data
-<!DOCTYPE html><body></keygen>A
-#errors
-30: Stray end tag “keygen”.
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     "A"
-
-#data
-<!DOCTYPE html><body><link>A
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <link>
-|     "A"
-
-#data
-<!DOCTYPE html><body><meta>A
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <meta>
-|     "A"
-
-#data
-<!DOCTYPE html><body><param>A
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <param>
-|     "A"
-
-#data
-<!DOCTYPE html><body><source>A
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <source>
-|     "A"
-
-#data
-<!DOCTYPE html><body><track>A
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <track>
-|     "A"
-
-#data
-<!DOCTYPE html><body><wbr>A
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <wbr>
-|     "A"
diff --git a/html/testdata/webkit/tests26.dat b/html/testdata/webkit/tests26.dat
deleted file mode 100644 (file)
index de453b9..0000000
+++ /dev/null
@@ -1,393 +0,0 @@
-#data
-<!DOCTYPE html><body><a href='#1'><nobr>1<nobr></a><br><a href='#2'><nobr>2<nobr></a><br><a href='#3'><nobr>3<nobr></a>
-#errors
-(1,47): unexpected-start-tag-implies-end-tag
-(1,51): adoption-agency-1.3
-(1,74): unexpected-start-tag-implies-end-tag
-(1,74): adoption-agency-1.3
-(1,81): unexpected-start-tag-implies-end-tag
-(1,85): adoption-agency-1.3
-(1,108): unexpected-start-tag-implies-end-tag
-(1,108): adoption-agency-1.3
-(1,115): unexpected-start-tag-implies-end-tag
-(1,119): adoption-agency-1.3
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <a>
-|       href="#1"
-|       <nobr>
-|         "1"
-|       <nobr>
-|     <nobr>
-|       <br>
-|       <a>
-|         href="#2"
-|     <a>
-|       href="#2"
-|       <nobr>
-|         "2"
-|       <nobr>
-|     <nobr>
-|       <br>
-|       <a>
-|         href="#3"
-|     <a>
-|       href="#3"
-|       <nobr>
-|         "3"
-|       <nobr>
-
-#data
-<!DOCTYPE html><body><b><nobr>1<nobr></b><i><nobr>2<nobr></i>3
-#errors
-(1,37): unexpected-start-tag-implies-end-tag
-(1,41): adoption-agency-1.3
-(1,50): unexpected-start-tag-implies-end-tag
-(1,50): adoption-agency-1.3
-(1,57): unexpected-start-tag-implies-end-tag
-(1,61): adoption-agency-1.3
-(1,62): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <b>
-|       <nobr>
-|         "1"
-|       <nobr>
-|     <nobr>
-|       <i>
-|     <i>
-|       <nobr>
-|         "2"
-|       <nobr>
-|     <nobr>
-|       "3"
-
-#data
-<!DOCTYPE html><body><b><nobr>1<table><nobr></b><i><nobr>2<nobr></i>3
-#errors
-(1,44): foster-parenting-start-tag
-(1,48): foster-parenting-end-tag
-(1,48): adoption-agency-1.3
-(1,51): foster-parenting-start-tag
-(1,57): foster-parenting-start-tag
-(1,57): nobr-already-in-scope
-(1,57): adoption-agency-1.2
-(1,58): foster-parenting-character
-(1,64): foster-parenting-start-tag
-(1,64): nobr-already-in-scope
-(1,68): foster-parenting-end-tag
-(1,68): adoption-agency-1.2
-(1,69): foster-parenting-character
-(1,69): eof-in-table
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <b>
-|       <nobr>
-|         "1"
-|         <nobr>
-|           <i>
-|         <i>
-|           <nobr>
-|             "2"
-|           <nobr>
-|         <nobr>
-|           "3"
-|         <table>
-
-#data
-<!DOCTYPE html><body><b><nobr>1<table><tr><td><nobr></b><i><nobr>2<nobr></i>3
-#errors
-(1,56): unexpected-end-tag
-(1,65): unexpected-start-tag-implies-end-tag
-(1,65): adoption-agency-1.3
-(1,72): unexpected-start-tag-implies-end-tag
-(1,76): adoption-agency-1.3
-(1,77): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <b>
-|       <nobr>
-|         "1"
-|         <table>
-|           <tbody>
-|             <tr>
-|               <td>
-|                 <nobr>
-|                   <i>
-|                 <i>
-|                   <nobr>
-|                     "2"
-|                   <nobr>
-|                 <nobr>
-|                   "3"
-
-#data
-<!DOCTYPE html><body><b><nobr>1<div><nobr></b><i><nobr>2<nobr></i>3
-#errors
-(1,42): unexpected-start-tag-implies-end-tag
-(1,42): adoption-agency-1.3
-(1,46): adoption-agency-1.3
-(1,46): adoption-agency-1.3
-(1,55): unexpected-start-tag-implies-end-tag
-(1,55): adoption-agency-1.3
-(1,62): unexpected-start-tag-implies-end-tag
-(1,66): adoption-agency-1.3
-(1,67): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <b>
-|       <nobr>
-|         "1"
-|     <div>
-|       <b>
-|         <nobr>
-|         <nobr>
-|       <nobr>
-|         <i>
-|       <i>
-|         <nobr>
-|           "2"
-|         <nobr>
-|       <nobr>
-|         "3"
-
-#data
-<!DOCTYPE html><body><b><nobr>1<nobr></b><div><i><nobr>2<nobr></i>3
-#errors
-(1,37): unexpected-start-tag-implies-end-tag
-(1,41): adoption-agency-1.3
-(1,55): unexpected-start-tag-implies-end-tag
-(1,55): adoption-agency-1.3
-(1,62): unexpected-start-tag-implies-end-tag
-(1,66): adoption-agency-1.3
-(1,67): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <b>
-|       <nobr>
-|         "1"
-|       <nobr>
-|     <div>
-|       <nobr>
-|         <i>
-|       <i>
-|         <nobr>
-|           "2"
-|         <nobr>
-|       <nobr>
-|         "3"
-
-#data
-<!DOCTYPE html><body><b><nobr>1<nobr><ins></b><i><nobr>
-#errors
-(1,37): unexpected-start-tag-implies-end-tag
-(1,46): adoption-agency-1.3
-(1,55): unexpected-start-tag-implies-end-tag
-(1,55): adoption-agency-1.3
-(1,55): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <b>
-|       <nobr>
-|         "1"
-|       <nobr>
-|         <ins>
-|     <nobr>
-|       <i>
-|     <i>
-|       <nobr>
-
-#data
-<!DOCTYPE html><body><b><nobr>1<ins><nobr></b><i>2
-#errors
-(1,42): unexpected-start-tag-implies-end-tag
-(1,42): adoption-agency-1.3
-(1,46): adoption-agency-1.3
-(1,50): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <b>
-|       <nobr>
-|         "1"
-|         <ins>
-|       <nobr>
-|     <nobr>
-|       <i>
-|         "2"
-
-#data
-<!DOCTYPE html><body><b>1<nobr></b><i><nobr>2</i>
-#errors
-(1,35): adoption-agency-1.3
-(1,44): unexpected-start-tag-implies-end-tag
-(1,44): adoption-agency-1.3
-(1,49): adoption-agency-1.3
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <b>
-|       "1"
-|       <nobr>
-|     <nobr>
-|       <i>
-|     <i>
-|       <nobr>
-|         "2"
-
-#data
-<p><code x</code></p>
-
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,11): invalid-character-in-attribute-name
-(1,12): unexpected-character-after-solidus-in-tag
-(1,21): unexpected-end-tag
-(2,0): expected-closing-tag-but-got-eof
-#new-errors
-(1:11) unexpected-character-in-attribute-name
-(1:13) unexpected-solidus-in-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <code>
-|         code=""
-|         x<=""
-|     <code>
-|       code=""
-|       x<=""
-|       "
-"
-
-#data
-<!DOCTYPE html><svg><foreignObject><p><i></p>a
-#errors
-(1,45): unexpected-end-tag
-(1,46): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       <svg foreignObject>
-|         <p>
-|           <i>
-|         <i>
-|           "a"
-
-#data
-<!DOCTYPE html><table><tr><td><svg><foreignObject><p><i></p>a
-#errors
-(1,60): unexpected-end-tag
-(1,61): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-|         <tr>
-|           <td>
-|             <svg svg>
-|               <svg foreignObject>
-|                 <p>
-|                   <i>
-|                 <i>
-|                   "a"
-
-#data
-<!DOCTYPE html><math><mtext><p><i></p>a
-#errors
-(1,38): unexpected-end-tag
-(1,39): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       <math mtext>
-|         <p>
-|           <i>
-|         <i>
-|           "a"
-
-#data
-<!DOCTYPE html><table><tr><td><math><mtext><p><i></p>a
-#errors
-(1,53): unexpected-end-tag
-(1,54): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-|         <tr>
-|           <td>
-|             <math math>
-|               <math mtext>
-|                 <p>
-|                   <i>
-|                 <i>
-|                   "a"
-
-#data
-<!DOCTYPE html><body><div><!/div>a
-#errors
-(1,28): expected-dashes-or-doctype
-(1,34): expected-closing-tag-but-got-eof
-#new-errors
-(1:29) incorrectly-opened-comment
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       <!-- /div -->
-|       "a"
-
-#data
-<button><p><button>
-#errors
-Line 1 Col 8 Unexpected start tag (button). Expected DOCTYPE.
-Line 1 Col 19 Unexpected start tag (button) implies end tag (button).
-Line 1 Col 19 Expected closing tag. Unexpected end of file.
-#document
-| <html>
-|   <head>
-|   <body>
-|     <button>
-|       <p>
-|     <button>
diff --git a/html/testdata/webkit/tests3.dat b/html/testdata/webkit/tests3.dat
deleted file mode 100644 (file)
index c7583d9..0000000
+++ /dev/null
@@ -1,305 +0,0 @@
-#data
-<head></head><style></style>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-(1,20): unexpected-start-tag-out-of-my-head
-#document
-| <html>
-|   <head>
-|     <style>
-|   <body>
-
-#data
-<head></head><script></script>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-(1,21): unexpected-start-tag-out-of-my-head
-#document
-| <html>
-|   <head>
-|     <script>
-|   <body>
-
-#data
-<head></head><!-- --><style></style><!-- --><script></script>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-(1,28): unexpected-start-tag-out-of-my-head
-(1,52): unexpected-start-tag-out-of-my-head
-#document
-| <html>
-|   <head>
-|     <style>
-|     <script>
-|   <!--   -->
-|   <!--   -->
-|   <body>
-
-#data
-<head></head><!-- -->x<style></style><!-- --><script></script>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <!--   -->
-|   <body>
-|     "x"
-|     <style>
-|     <!--   -->
-|     <script>
-
-#data
-<!DOCTYPE html><html><head></head><body><pre>
-</pre></body></html>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <pre>
-
-#data
-<!DOCTYPE html><html><head></head><body><pre>
-foo</pre></body></html>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <pre>
-|       "foo"
-
-#data
-<!DOCTYPE html><html><head></head><body><pre>
-
-foo</pre></body></html>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <pre>
-|       "
-foo"
-
-#data
-<!DOCTYPE html><html><head></head><body><pre>
-foo
-</pre></body></html>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <pre>
-|       "foo
-"
-
-#data
-<!DOCTYPE html><html><head></head><body><pre>x</pre><span>
-</span></body></html>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <pre>
-|       "x"
-|     <span>
-|       "
-"
-
-#data
-<!DOCTYPE html><html><head></head><body><pre>x
-y</pre></body></html>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <pre>
-|       "x
-y"
-
-#data
-<!DOCTYPE html><html><head></head><body><pre>x<div>
-y</pre></body></html>
-#errors
-(2,7): end-tag-too-early
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <pre>
-|       "x"
-|       <div>
-|         "
-y"
-
-#data
-<!DOCTYPE html><pre>&#x0a;&#x0a;A</pre>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <pre>
-|       "
-A"
-
-#data
-<!DOCTYPE html><HTML><META><HEAD></HEAD></HTML>
-#errors
-(1,33): two-heads-are-not-better-than-one
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <meta>
-|   <body>
-
-#data
-<!DOCTYPE html><HTML><HEAD><head></HEAD></HTML>
-#errors
-(1,33): two-heads-are-not-better-than-one
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-
-#data
-<textarea>foo<span>bar</span><i>baz
-#errors
-(1,10): expected-doctype-but-got-start-tag
-(1,35): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <textarea>
-|       "foo<span>bar</span><i>baz"
-
-#data
-<title>foo<span>bar</em><i>baz
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,30): expected-named-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|     <title>
-|       "foo<span>bar</em><i>baz"
-|   <body>
-
-#data
-<!DOCTYPE html><textarea>
-</textarea>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <textarea>
-
-#data
-<!DOCTYPE html><textarea>
-foo</textarea>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <textarea>
-|       "foo"
-
-#data
-<!DOCTYPE html><textarea>
-
-foo</textarea>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <textarea>
-|       "
-foo"
-
-#data
-<!DOCTYPE html><html><head></head><body><ul><li><div><p><li></ul></body></html>
-#errors
-(1,60): end-tag-too-early
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <ul>
-|       <li>
-|         <div>
-|           <p>
-|       <li>
-
-#data
-<!doctype html><nobr><nobr><nobr>
-#errors
-(1,27): unexpected-start-tag-implies-end-tag
-(1,33): unexpected-start-tag-implies-end-tag
-(1,33): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <nobr>
-|     <nobr>
-|     <nobr>
-
-#data
-<!doctype html><nobr><nobr></nobr><nobr>
-#errors
-(1,27): unexpected-start-tag-implies-end-tag
-(1,40): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <nobr>
-|     <nobr>
-|     <nobr>
-
-#data
-<!doctype html><html><body><p><table></table></body></html>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|     <table>
-
-#data
-<p><table></table>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <table>
diff --git a/html/testdata/webkit/tests4.dat b/html/testdata/webkit/tests4.dat
deleted file mode 100644 (file)
index 0a6174c..0000000
+++ /dev/null
@@ -1,58 +0,0 @@
-#data
-direct div content
-#errors
-#document-fragment
-div
-#document
-| "direct div content"
-
-#data
-direct textarea content
-#errors
-#document-fragment
-textarea
-#document
-| "direct textarea content"
-
-#data
-textarea content with <em>pseudo</em> <foo>markup
-#errors
-#document-fragment
-textarea
-#document
-| "textarea content with <em>pseudo</em> <foo>markup"
-
-#data
-this is &#x0043;DATA inside a <style> element
-#errors
-#document-fragment
-style
-#document
-| "this is &#x0043;DATA inside a <style> element"
-
-#data
-</plaintext>
-#errors
-#document-fragment
-plaintext
-#document
-| "</plaintext>"
-
-#data
-setting html's innerHTML
-#errors
-#document-fragment
-html
-#document
-| <head>
-| <body>
-|   "setting html's innerHTML"
-
-#data
-<title>setting head's innerHTML</title>
-#errors
-#document-fragment
-head
-#document
-| <title>
-|   "setting head's innerHTML"
diff --git a/html/testdata/webkit/tests5.dat b/html/testdata/webkit/tests5.dat
deleted file mode 100644 (file)
index 1ef8cae..0000000
+++ /dev/null
@@ -1,210 +0,0 @@
-#data
-<style> <!-- </style>x
-#errors
-(1,7): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|     <style>
-|       " <!-- "
-|   <body>
-|     "x"
-
-#data
-<style> <!-- </style> --> </style>x
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,34): unexpected-end-tag
-#document
-| <html>
-|   <head>
-|     <style>
-|       " <!-- "
-|     " "
-|   <body>
-|     "--> x"
-
-#data
-<style> <!--> </style>x
-#errors
-(1,7): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|     <style>
-|       " <!--> "
-|   <body>
-|     "x"
-
-#data
-<style> <!---> </style>x
-#errors
-(1,7): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|     <style>
-|       " <!---> "
-|   <body>
-|     "x"
-
-#data
-<iframe> <!---> </iframe>x
-#errors
-(1,8): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <iframe>
-|       " <!---> "
-|     "x"
-
-#data
-<iframe> <!--- </iframe>->x</iframe> --> </iframe>x
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,36): unexpected-end-tag
-(1,50): unexpected-end-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <iframe>
-|       " <!--- "
-|     "->x --> x"
-
-#data
-<script> <!-- </script> --> </script>x
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,37): unexpected-end-tag
-#document
-| <html>
-|   <head>
-|     <script>
-|       " <!-- "
-|     " "
-|   <body>
-|     "--> x"
-
-#data
-<title> <!-- </title> --> </title>x
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,34): unexpected-end-tag
-#document
-| <html>
-|   <head>
-|     <title>
-|       " <!-- "
-|     " "
-|   <body>
-|     "--> x"
-
-#data
-<textarea> <!--- </textarea>->x</textarea> --> </textarea>x
-#errors
-(1,10): expected-doctype-but-got-start-tag
-(1,42): unexpected-end-tag
-(1,58): unexpected-end-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <textarea>
-|       " <!--- "
-|     "->x --> x"
-
-#data
-<style> <!</-- </style>x
-#errors
-(1,7): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|     <style>
-|       " <!</-- "
-|   <body>
-|     "x"
-
-#data
-<p><xmp></xmp>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|     <xmp>
-
-#data
-<xmp> <!-- > --> </xmp>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <xmp>
-|       " <!-- > --> "
-
-#data
-<title>&amp;</title>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|     <title>
-|       "&"
-|   <body>
-
-#data
-<title><!--&amp;--></title>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|     <title>
-|       "<!--&-->"
-|   <body>
-
-#data
-<title><!--</title>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|     <title>
-|       "<!--"
-|   <body>
-
-#data
-<noscript><!--</noscript>--></noscript>
-#errors
-(1,10): expected-doctype-but-got-start-tag
-(1,39): unexpected-end-tag
-#script-on
-#document
-| <html>
-|   <head>
-|     <noscript>
-|       "<!--"
-|   <body>
-|     "-->"
-
-#data
-<noscript><!--</noscript>--></noscript>
-#errors
-(1,10): expected-doctype-but-got-start-tag
-#script-off
-#document
-| <html>
-|   <head>
-|     <noscript>
-|       <!-- </noscript> -->
-|   <body>
diff --git a/html/testdata/webkit/tests6.dat b/html/testdata/webkit/tests6.dat
deleted file mode 100644 (file)
index f399123..0000000
+++ /dev/null
@@ -1,663 +0,0 @@
-#data
-<!doctype html></head> <head>
-#errors
-(1,29): unexpected-start-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   " "
-|   <body>
-
-#data
-<!doctype html><form><div></form><div>
-#errors
-(1,33): end-tag-too-early-ignored
-(1,38): expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <form>
-|       <div>
-|         <div>
-
-#data
-<!doctype html><title>&amp;</title>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <title>
-|       "&"
-|   <body>
-
-#data
-<!doctype html><title><!--&amp;--></title>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <title>
-|       "<!--&-->"
-|   <body>
-
-#data
-<!doctype>
-#errors
-(1,9): need-space-after-doctype
-(1,10): expected-doctype-name-but-got-right-bracket
-(1,10): unknown-doctype
-#new-errors
-(1:10) missing-doctype-name
-#document
-| <!DOCTYPE >
-| <html>
-|   <head>
-|   <body>
-
-#data
-<!---x
-#errors
-(1,6): eof-in-comment
-(1,6): expected-doctype-but-got-eof
-#new-errors
-(1:7) eof-in-comment
-#document
-| <!-- -x -->
-| <html>
-|   <head>
-|   <body>
-
-#data
-<body>
-<div>
-#errors
-(1,6): unexpected-start-tag
-(2,5): expected-closing-tag-but-got-eof
-#document-fragment
-div
-#document
-| "
-"
-| <div>
-
-#data
-<frameset></frameset>
-foo
-#errors
-(1,10): expected-doctype-but-got-start-tag
-(2,1): unexpected-char-after-frameset
-(2,2): unexpected-char-after-frameset
-(2,3): unexpected-char-after-frameset
-#document
-| <html>
-|   <head>
-|   <frameset>
-|   "
-"
-
-#data
-<frameset></frameset>
-<noframes>
-#errors
-(1,10): expected-doctype-but-got-start-tag
-(2,10): expected-named-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <frameset>
-|   "
-"
-|   <noframes>
-
-#data
-<frameset></frameset>
-<div>
-#errors
-(1,10): expected-doctype-but-got-start-tag
-(2,5): unexpected-start-tag-after-frameset
-#document
-| <html>
-|   <head>
-|   <frameset>
-|   "
-"
-
-#data
-<frameset></frameset>
-</html>
-#errors
-(1,10): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <frameset>
-|   "
-"
-
-#data
-<frameset></frameset>
-</div>
-#errors
-(1,10): expected-doctype-but-got-start-tag
-(2,6): unexpected-end-tag-after-frameset
-#document
-| <html>
-|   <head>
-|   <frameset>
-|   "
-"
-
-#data
-<form><form>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-(1,12): unexpected-start-tag
-(1,12): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <form>
-
-#data
-<button><button>
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,16): unexpected-start-tag-implies-end-tag
-(1,16): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <button>
-|     <button>
-
-#data
-<table><tr><td></th>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,20): unexpected-end-tag
-(1,20): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-|         <tr>
-|           <td>
-
-#data
-<table><caption><td>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,20): unexpected-cell-in-table-body
-(1,20): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <caption>
-|       <tbody>
-|         <tr>
-|           <td>
-
-#data
-<table><caption><div>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,21): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <caption>
-|         <div>
-
-#data
-</caption><div>
-#errors
-(1,10): XXX-undefined-error
-(1,15): expected-closing-tag-but-got-eof
-#document-fragment
-caption
-#document
-| <div>
-
-#data
-<table><caption><div></caption>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,31): expected-one-end-tag-but-got-another
-(1,31): eof-in-table
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <caption>
-|         <div>
-
-#data
-<table><caption></table>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <caption>
-
-#data
-</table><div>
-#errors
-(1,8): unexpected-end-tag
-(1,13): expected-closing-tag-but-got-eof
-#document-fragment
-caption
-#document
-| <div>
-
-#data
-<table><caption></body></col></colgroup></html></tbody></td></tfoot></th></thead></tr>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,23): unexpected-end-tag
-(1,29): unexpected-end-tag
-(1,40): unexpected-end-tag
-(1,47): unexpected-end-tag
-(1,55): unexpected-end-tag
-(1,60): unexpected-end-tag
-(1,68): unexpected-end-tag
-(1,73): unexpected-end-tag
-(1,81): unexpected-end-tag
-(1,86): unexpected-end-tag
-(1,86): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <caption>
-
-#data
-<table><caption><div></div>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,27): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <caption>
-|         <div>
-
-#data
-<table><tr><td></body></caption></col></colgroup></html>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,22): unexpected-end-tag
-(1,32): unexpected-end-tag
-(1,38): unexpected-end-tag
-(1,49): unexpected-end-tag
-(1,56): unexpected-end-tag
-(1,56): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-|         <tr>
-|           <td>
-
-#data
-</table></tbody></tfoot></thead></tr><div>
-#errors
-(1,8): unexpected-end-tag
-(1,16): unexpected-end-tag
-(1,24): unexpected-end-tag
-(1,32): unexpected-end-tag
-(1,37): unexpected-end-tag
-(1,42): expected-closing-tag-but-got-eof
-#document-fragment
-td
-#document
-| <div>
-
-#data
-<table><colgroup>foo
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,18): foster-parenting-character-in-table
-(1,19): foster-parenting-character-in-table
-(1,20): foster-parenting-character-in-table
-(1,20): eof-in-table
-#document
-| <html>
-|   <head>
-|   <body>
-|     "foo"
-|     <table>
-|       <colgroup>
-
-#data
-foo<col>
-#errors
-(1,1): unexpected-character-in-colgroup
-(1,2): unexpected-character-in-colgroup
-(1,3): unexpected-character-in-colgroup
-#document-fragment
-colgroup
-#document
-| <col>
-
-#data
-<table><colgroup></col>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,23): no-end-tag
-(1,23): eof-in-table
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <colgroup>
-
-#data
-<frameset><div>
-#errors
-(1,10): expected-doctype-but-got-start-tag
-(1,15): unexpected-start-tag-in-frameset
-(1,15): eof-in-frameset
-#document
-| <html>
-|   <head>
-|   <frameset>
-
-#data
-</frameset><frame>
-#errors
-(1,11): unexpected-frameset-in-frameset-innerhtml
-#document-fragment
-frameset
-#document
-| <frame>
-
-#data
-<frameset></div>
-#errors
-(1,10): expected-doctype-but-got-start-tag
-(1,16): unexpected-end-tag-in-frameset
-(1,16): eof-in-frameset
-#document
-| <html>
-|   <head>
-|   <frameset>
-
-#data
-</body><div>
-#errors
-(1,7): unexpected-close-tag
-(1,12): expected-closing-tag-but-got-eof
-#document-fragment
-body
-#document
-| <div>
-
-#data
-<table><tr><div>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,16): unexpected-start-tag-implies-table-voodoo
-(1,16): eof-in-table
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|     <table>
-|       <tbody>
-|         <tr>
-
-#data
-</tr><td>
-#errors
-(1,5): unexpected-end-tag
-#document-fragment
-tr
-#document
-| <td>
-
-#data
-</tbody></tfoot></thead><td>
-#errors
-(1,8): unexpected-end-tag
-(1,16): unexpected-end-tag
-(1,24): unexpected-end-tag
-#document-fragment
-tr
-#document
-| <td>
-
-#data
-<table><tr><div><td>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,16): foster-parenting-start-tag
-(1,20): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|     <table>
-|       <tbody>
-|         <tr>
-|           <td>
-
-#data
-<caption><col><colgroup><tbody><tfoot><thead><tr>
-#errors
-(1,9): unexpected-start-tag
-(1,14): unexpected-start-tag
-(1,24): unexpected-start-tag
-(1,31): unexpected-start-tag
-(1,38): unexpected-start-tag
-(1,45): unexpected-start-tag
-#document-fragment
-tbody
-#document
-| <tr>
-
-#data
-<table><tbody></thead>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,22): unexpected-end-tag-in-table-body
-(1,22): eof-in-table
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-
-#data
-</table><tr>
-#errors
-(1,8): unexpected-end-tag
-#document-fragment
-tbody
-#document
-| <tr>
-
-#data
-<table><tbody></body></caption></col></colgroup></html></td></th></tr>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,21): unexpected-end-tag-in-table-body
-(1,31): unexpected-end-tag-in-table-body
-(1,37): unexpected-end-tag-in-table-body
-(1,48): unexpected-end-tag-in-table-body
-(1,55): unexpected-end-tag-in-table-body
-(1,60): unexpected-end-tag-in-table-body
-(1,65): unexpected-end-tag-in-table-body
-(1,70): unexpected-end-tag-in-table-body
-(1,70): eof-in-table
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-
-#data
-<table><tbody></div>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,20): unexpected-end-tag-implies-table-voodoo
-(1,20): end-tag-too-early
-(1,20): eof-in-table
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-
-#data
-<table><table>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,14): unexpected-start-tag-implies-end-tag
-(1,14): eof-in-table
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|     <table>
-
-#data
-<table></body></caption></col></colgroup></html></tbody></td></tfoot></th></thead></tr>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,14): unexpected-end-tag
-(1,24): unexpected-end-tag
-(1,30): unexpected-end-tag
-(1,41): unexpected-end-tag
-(1,48): unexpected-end-tag
-(1,56): unexpected-end-tag
-(1,61): unexpected-end-tag
-(1,69): unexpected-end-tag
-(1,74): unexpected-end-tag
-(1,82): unexpected-end-tag
-(1,87): unexpected-end-tag
-(1,87): eof-in-table
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-
-#data
-</table><tr>
-#errors
-(1,8): unexpected-end-tag
-#document-fragment
-table
-#document
-| <tbody>
-|   <tr>
-
-#data
-<body></body></html>
-#errors
-(1,20): unexpected-end-tag-after-body-innerhtml
-#document-fragment
-html
-#document
-| <head>
-| <body>
-
-#data
-<html><frameset></frameset></html> 
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <frameset>
-|   " "
-
-#data
-<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html></html>
-#errors
-#document
-| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "">
-| <html>
-|   <head>
-|   <body>
-
-#data
-<param><frameset></frameset>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,17): unexpected-start-tag
-#document
-| <html>
-|   <head>
-|   <frameset>
-
-#data
-<source><frameset></frameset>
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,18): unexpected-start-tag
-#document
-| <html>
-|   <head>
-|   <frameset>
-
-#data
-<track><frameset></frameset>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,17): unexpected-start-tag
-#document
-| <html>
-|   <head>
-|   <frameset>
-
-#data
-</html><frameset></frameset>
-#errors
-(1,7): expected-doctype-but-got-end-tag
-(1,17): expected-eof-but-got-start-tag
-(1,17): unexpected-start-tag
-#document
-| <html>
-|   <head>
-|   <frameset>
-
-#data
-</body><frameset></frameset>
-#errors
-(1,7): expected-doctype-but-got-end-tag
-(1,17): unexpected-start-tag-after-body
-(1,17): unexpected-start-tag
-#document
-| <html>
-|   <head>
-|   <frameset>
diff --git a/html/testdata/webkit/tests7.dat b/html/testdata/webkit/tests7.dat
deleted file mode 100644 (file)
index 395dc72..0000000
+++ /dev/null
@@ -1,418 +0,0 @@
-#data
-<!doctype html><body><title>X</title>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <title>
-|       "X"
-
-#data
-<!doctype html><table><title>X</title></table>
-#errors
-(1,29): unexpected-start-tag-implies-table-voodoo
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <title>
-|       "X"
-|     <table>
-
-#data
-<!doctype html><head></head><title>X</title>
-#errors
-(1,35): unexpected-start-tag-out-of-my-head
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <title>
-|       "X"
-|   <body>
-
-#data
-<!doctype html></head><title>X</title>
-#errors
-(1,29): unexpected-start-tag-out-of-my-head
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|     <title>
-|       "X"
-|   <body>
-
-#data
-<!doctype html><table><meta></table>
-#errors
-(1,28): unexpected-start-tag-implies-table-voodoo
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <meta>
-|     <table>
-
-#data
-<!doctype html><table>X<tr><td><table> <meta></table></table>
-#errors
-unexpected text in table
-(1,45): unexpected-start-tag-implies-table-voodoo
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     "X"
-|     <table>
-|       <tbody>
-|         <tr>
-|           <td>
-|             <meta>
-|             <table>
-|               " "
-
-#data
-<!doctype html><html> <head>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-
-#data
-<!doctype html> <head>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-
-#data
-<!doctype html><table><style> <tr>x </style> </table>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <style>
-|         " <tr>x "
-|       " "
-
-#data
-<!doctype html><table><TBODY><script> <tr>x </script> </table>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-|         <script>
-|           " <tr>x "
-|         " "
-
-#data
-<!doctype html><p><applet><p>X</p></applet>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <applet>
-|         <p>
-|           "X"
-
-#data
-<!doctype html><p><object type="application/x-non-existant-plugin"><p>X</p></object>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <object>
-|         type="application/x-non-existant-plugin"
-|         <p>
-|           "X"
-
-#data
-<!doctype html><listing>
-X</listing>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <listing>
-|       "X"
-
-#data
-<!doctype html><select><input>X
-#errors
-(1,30): unexpected-input-in-select
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <select>
-|     <input>
-|     "X"
-
-#data
-<!doctype html><select><select>X
-#errors
-(1,31): unexpected-select-in-select
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <select>
-|     "X"
-
-#data
-<!doctype html><table><input type=hidDEN></table>
-#errors
-(1,41): unexpected-hidden-input-in-table
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <input>
-|         type="hidDEN"
-
-#data
-<!doctype html><table>X<input type=hidDEN></table>
-#errors
-(1,23): foster-parenting-character
-(1,42): unexpected-hidden-input-in-table
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     "X"
-|     <table>
-|       <input>
-|         type="hidDEN"
-
-#data
-<!doctype html><table>  <input type=hidDEN></table>
-#errors
-(1,43): unexpected-hidden-input-in-table
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       "  "
-|       <input>
-|         type="hidDEN"
-
-#data
-<!doctype html><table>  <input type='hidDEN'></table>
-#errors
-(1,45): unexpected-hidden-input-in-table
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       "  "
-|       <input>
-|         type="hidDEN"
-
-#data
-<!doctype html><table><input type=" hidden"><input type=hidDEN></table>
-#errors
-(1,44): unexpected-start-tag-implies-table-voodoo
-(1,63): unexpected-hidden-input-in-table
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <input>
-|       type=" hidden"
-|     <table>
-|       <input>
-|         type="hidDEN"
-
-#data
-<!doctype html><table><select>X<tr>
-#errors
-(1,30): unexpected-start-tag-implies-table-voodoo
-(1,35): unexpected-table-element-start-tag-in-select-in-table
-(1,35): eof-in-table
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <select>
-|       "X"
-|     <table>
-|       <tbody>
-|         <tr>
-
-#data
-<!doctype html><select>X</select>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <select>
-|       "X"
-
-#data
-<!DOCTYPE hTmL><html></html>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-
-#data
-<!DOCTYPE HTML><html></html>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-
-#data
-<body>X</body></body>
-#errors
-(1,21): unexpected-end-tag-after-body
-#document-fragment
-html
-#document
-| <head>
-| <body>
-|   "X"
-
-#data
-<div><p>a</x> b
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,13): unexpected-end-tag
-(1,15): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       <p>
-|         "a b"
-
-#data
-<table><tr><td><code></code> </table>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-|         <tr>
-|           <td>
-|             <code>
-|             " "
-
-#data
-<table><b><tr><td>aaa</td></tr>bbb</table>ccc
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,10): foster-parenting-start-tag
-(1,32): foster-parenting-character
-(1,33): foster-parenting-character
-(1,34): foster-parenting-character
-(1,45): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <b>
-|     <b>
-|       "bbb"
-|     <table>
-|       <tbody>
-|         <tr>
-|           <td>
-|             "aaa"
-|     <b>
-|       "ccc"
-
-#data
-A<table><tr> B</tr> B</table>
-#errors
-(1,1): expected-doctype-but-got-chars
-(1,13): foster-parenting-character
-(1,14): foster-parenting-character
-(1,20): foster-parenting-character
-(1,21): foster-parenting-character
-#document
-| <html>
-|   <head>
-|   <body>
-|     "A B B"
-|     <table>
-|       <tbody>
-|         <tr>
-
-#data
-A<table><tr> B</tr> </em>C</table>
-#errors
-(1,1): expected-doctype-but-got-chars
-(1,13): foster-parenting-character
-(1,14): foster-parenting-character
-(1,20): foster-parenting-character
-(1,25): unexpected-end-tag
-(1,25): unexpected-end-tag-in-special-element
-(1,26): foster-parenting-character
-#document
-| <html>
-|   <head>
-|   <body>
-|     "A BC"
-|     <table>
-|       <tbody>
-|         <tr>
-|         " "
-
-#data
-<select><keygen>
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,16): unexpected-input-in-select
-#document
-| <html>
-|   <head>
-|   <body>
-|     <select>
-|     <keygen>
diff --git a/html/testdata/webkit/tests8.dat b/html/testdata/webkit/tests8.dat
deleted file mode 100644 (file)
index ba2e63d..0000000
+++ /dev/null
@@ -1,162 +0,0 @@
-#data
-<div>
-<div></div>
-</span>x
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(3,7): unexpected-end-tag
-(3,8): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       "
-"
-|       <div>
-|       "
-x"
-
-#data
-<div>x<div></div>
-</span>x
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(2,7): unexpected-end-tag
-(2,8): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       "x"
-|       <div>
-|       "
-x"
-
-#data
-<div>x<div></div>x</span>x
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,25): unexpected-end-tag
-(1,26): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       "x"
-|       <div>
-|       "xx"
-
-#data
-<div>x<div></div>y</span>z
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,25): unexpected-end-tag
-(1,26): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       "x"
-|       <div>
-|       "yz"
-
-#data
-<table><div>x<div></div>x</span>x
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,12): foster-parenting-start-tag
-(1,13): foster-parenting-character
-(1,18): foster-parenting-start-tag
-(1,24): foster-parenting-end-tag
-(1,25): foster-parenting-start-tag
-(1,32): foster-parenting-end-tag
-(1,32): unexpected-end-tag
-(1,33): foster-parenting-character
-(1,33): eof-in-table
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       "x"
-|       <div>
-|       "xx"
-|     <table>
-
-#data
-<table><li><li></table>
-#errors
-#document
-| <html>
-|   <head>
-|   <body>
-|     <li>
-|     <li>
-|     <table>
-
-#data
-x<table>x
-#errors
-(1,1): expected-doctype-but-got-chars
-(1,9): foster-parenting-character
-(1,9): eof-in-table
-#document
-| <html>
-|   <head>
-|   <body>
-|     "xx"
-|     <table>
-
-#data
-x<table><table>x
-#errors
-(1,1): expected-doctype-but-got-chars
-(1,15): unexpected-start-tag-implies-end-tag
-(1,16): foster-parenting-character
-(1,16): eof-in-table
-#document
-| <html>
-|   <head>
-|   <body>
-|     "x"
-|     <table>
-|     "x"
-|     <table>
-
-#data
-<b>a<div></div><div></b>y
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,24): adoption-agency-1.3
-(1,25): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <b>
-|       "a"
-|       <div>
-|     <div>
-|       <b>
-|       "y"
-
-#data
-<a><div><p></a>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,15): adoption-agency-1.3
-(1,15): adoption-agency-1.3
-(1,15): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <a>
-|     <div>
-|       <a>
-|       <p>
-|         <a>
diff --git a/html/testdata/webkit/tests9.dat b/html/testdata/webkit/tests9.dat
deleted file mode 100644 (file)
index f8d04b2..0000000
+++ /dev/null
@@ -1,472 +0,0 @@
-#data
-<!DOCTYPE html><math></math>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-
-#data
-<!DOCTYPE html><body><math></math>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-
-#data
-<!DOCTYPE html><math><mi>
-#errors
-(1,25) expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       <math mi>
-
-#data
-<!DOCTYPE html><math><annotation-xml><svg><u>
-#errors
-(1,45) unexpected-html-element-in-foreign-content
-(1,45) expected-closing-tag-but-got-eof
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       <math annotation-xml>
-|         <svg svg>
-|     <u>
-
-#data
-<!DOCTYPE html><body><select><math></math></select>
-#errors
-(1,35) unexpected-start-tag-in-select
-(1,42) unexpected-end-tag-in-select
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <select>
-
-#data
-<!DOCTYPE html><body><select><option><math></math></option></select>
-#errors
-(1,43) unexpected-start-tag-in-select
-(1,50) unexpected-end-tag-in-select
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <select>
-|       <option>
-
-#data
-<!DOCTYPE html><body><table><math></math></table>
-#errors
-(1,34) unexpected-start-tag-implies-table-voodoo
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|     <table>
-
-#data
-<!DOCTYPE html><body><table><math><mi>foo</mi></math></table>
-#errors
-(1,34) foster-parenting-start-token
-(1,39) foster-parenting-character
-(1,40) foster-parenting-character
-(1,41) foster-parenting-character
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       <math mi>
-|         "foo"
-|     <table>
-
-#data
-<!DOCTYPE html><body><table><math><mi>foo</mi><mi>bar</mi></math></table>
-#errors
-(1,34) foster-parenting-start-tag
-(1,39) foster-parenting-character
-(1,40) foster-parenting-character
-(1,41) foster-parenting-character
-(1,51) foster-parenting-character
-(1,52) foster-parenting-character
-(1,53) foster-parenting-character
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       <math mi>
-|         "foo"
-|       <math mi>
-|         "bar"
-|     <table>
-
-#data
-<!DOCTYPE html><body><table><tbody><math><mi>foo</mi><mi>bar</mi></math></tbody></table>
-#errors
-(1,41) foster-parenting-start-tag
-(1,46) foster-parenting-character
-(1,47) foster-parenting-character
-(1,48) foster-parenting-character
-(1,58) foster-parenting-character
-(1,59) foster-parenting-character
-(1,60) foster-parenting-character
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       <math mi>
-|         "foo"
-|       <math mi>
-|         "bar"
-|     <table>
-|       <tbody>
-
-#data
-<!DOCTYPE html><body><table><tbody><tr><math><mi>foo</mi><mi>bar</mi></math></tr></tbody></table>
-#errors
-(1,45) foster-parenting-start-tag
-(1,50) foster-parenting-character
-(1,51) foster-parenting-character
-(1,52) foster-parenting-character
-(1,62) foster-parenting-character
-(1,63) foster-parenting-character
-(1,64) foster-parenting-character
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       <math mi>
-|         "foo"
-|       <math mi>
-|         "bar"
-|     <table>
-|       <tbody>
-|         <tr>
-
-#data
-<!DOCTYPE html><body><table><tbody><tr><td><math><mi>foo</mi><mi>bar</mi></math></td></tr></tbody></table>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-|         <tr>
-|           <td>
-|             <math math>
-|               <math mi>
-|                 "foo"
-|               <math mi>
-|                 "bar"
-
-#data
-<!DOCTYPE html><body><table><tbody><tr><td><math><mi>foo</mi><mi>bar</mi></math><p>baz</td></tr></tbody></table>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-|         <tr>
-|           <td>
-|             <math math>
-|               <math mi>
-|                 "foo"
-|               <math mi>
-|                 "bar"
-|             <p>
-|               "baz"
-
-#data
-<!DOCTYPE html><body><table><caption><math><mi>foo</mi><mi>bar</mi></math><p>baz</caption></table>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <caption>
-|         <math math>
-|           <math mi>
-|             "foo"
-|           <math mi>
-|             "bar"
-|         <p>
-|           "baz"
-
-#data
-<!DOCTYPE html><body><table><caption><math><mi>foo</mi><mi>bar</mi><p>baz</table><p>quux
-#errors
-(1,70) unexpected-html-element-in-foreign-content
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <caption>
-|         <math math>
-|           <math mi>
-|             "foo"
-|           <math mi>
-|             "bar"
-|         <p>
-|           "baz"
-|     <p>
-|       "quux"
-
-#data
-<!DOCTYPE html><body><table><caption><math><mi>foo</mi><mi>bar</mi>baz</table><p>quux
-#errors
-(1,78) unexpected-end-tag
-(1,78) expected-one-end-tag-but-got-another
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <caption>
-|         <math math>
-|           <math mi>
-|             "foo"
-|           <math mi>
-|             "bar"
-|           "baz"
-|     <p>
-|       "quux"
-
-#data
-<!DOCTYPE html><body><table><colgroup><math><mi>foo</mi><mi>bar</mi><p>baz</table><p>quux
-#errors
-(1,44) foster-parenting-start-tag
-(1,49) foster-parenting-character
-(1,50) foster-parenting-character
-(1,51) foster-parenting-character
-(1,61) foster-parenting-character
-(1,62) foster-parenting-character
-(1,63) foster-parenting-character
-(1,71) unexpected-html-element-in-foreign-content
-(1,71) foster-parenting-start-tag
-(1,63) foster-parenting-character
-(1,63) foster-parenting-character
-(1,63) foster-parenting-character
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       <math mi>
-|         "foo"
-|       <math mi>
-|         "bar"
-|     <p>
-|       "baz"
-|     <table>
-|       <colgroup>
-|     <p>
-|       "quux"
-
-#data
-<!DOCTYPE html><body><table><tr><td><select><math><mi>foo</mi><mi>bar</mi><p>baz</table><p>quux
-#errors
-(1,50) unexpected-start-tag-in-select
-(1,54) unexpected-start-tag-in-select
-(1,62) unexpected-end-tag-in-select
-(1,66) unexpected-start-tag-in-select
-(1,74) unexpected-end-tag-in-select
-(1,77) unexpected-start-tag-in-select
-(1,88) unexpected-table-element-end-tag-in-select-in-table
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-|         <tr>
-|           <td>
-|             <select>
-|               "foobarbaz"
-|     <p>
-|       "quux"
-
-#data
-<!DOCTYPE html><body><table><select><math><mi>foo</mi><mi>bar</mi><p>baz</table><p>quux
-#errors
-(1,36) unexpected-start-tag-implies-table-voodoo
-(1,42) unexpected-start-tag-in-select
-(1,46) unexpected-start-tag-in-select
-(1,54) unexpected-end-tag-in-select
-(1,58) unexpected-start-tag-in-select
-(1,66) unexpected-end-tag-in-select
-(1,69) unexpected-start-tag-in-select
-(1,80) unexpected-table-element-end-tag-in-select-in-table
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <select>
-|       "foobarbaz"
-|     <table>
-|     <p>
-|       "quux"
-
-#data
-<!DOCTYPE html><body></body></html><math><mi>foo</mi><mi>bar</mi><p>baz
-#errors
-(1,41) expected-eof-but-got-start-tag
-(1,68) unexpected-html-element-in-foreign-content
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       <math mi>
-|         "foo"
-|       <math mi>
-|         "bar"
-|     <p>
-|       "baz"
-
-#data
-<!DOCTYPE html><body></body><math><mi>foo</mi><mi>bar</mi><p>baz
-#errors
-(1,34) unexpected-start-tag-after-body
-(1,61) unexpected-html-element-in-foreign-content
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       <math mi>
-|         "foo"
-|       <math mi>
-|         "bar"
-|     <p>
-|       "baz"
-
-#data
-<!DOCTYPE html><frameset><math><mi></mi><mi></mi><p><span>
-#errors
-(1,31) unexpected-start-tag-in-frameset
-(1,35) unexpected-start-tag-in-frameset
-(1,40) unexpected-end-tag-in-frameset
-(1,44) unexpected-start-tag-in-frameset
-(1,49) unexpected-end-tag-in-frameset
-(1,52) unexpected-start-tag-in-frameset
-(1,58) unexpected-start-tag-in-frameset
-(1,58) eof-in-frameset
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <frameset>
-
-#data
-<!DOCTYPE html><frameset></frameset><math><mi></mi><mi></mi><p><span>
-#errors
-(1,42) unexpected-start-tag-after-frameset
-(1,46) unexpected-start-tag-after-frameset
-(1,51) unexpected-end-tag-after-frameset
-(1,55) unexpected-start-tag-after-frameset
-(1,60) unexpected-end-tag-after-frameset
-(1,63) unexpected-start-tag-after-frameset
-(1,69) unexpected-start-tag-after-frameset
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <frameset>
-
-#data
-<!DOCTYPE html><body xlink:href=foo><math xlink:href=foo></math>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     xlink:href="foo"
-|     <math math>
-|       xlink href="foo"
-
-#data
-<!DOCTYPE html><body xlink:href=foo xml:lang=en><math><mi xml:lang=en xlink:href=foo></mi></math>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     xlink:href="foo"
-|     xml:lang="en"
-|     <math math>
-|       <math mi>
-|         xlink href="foo"
-|         xml lang="en"
-
-#data
-<!DOCTYPE html><body xlink:href=foo xml:lang=en><math><mi xml:lang=en xlink:href=foo /></math>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     xlink:href="foo"
-|     xml:lang="en"
-|     <math math>
-|       <math mi>
-|         xlink href="foo"
-|         xml lang="en"
-
-#data
-<!DOCTYPE html><body xlink:href=foo xml:lang=en><math><mi xml:lang=en xlink:href=foo />bar</math>
-#errors
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     xlink:href="foo"
-|     xml:lang="en"
-|     <math math>
-|       <math mi>
-|         xlink href="foo"
-|         xml lang="en"
-|       "bar"
diff --git a/html/testdata/webkit/tests_innerHTML_1.dat b/html/testdata/webkit/tests_innerHTML_1.dat
deleted file mode 100644 (file)
index 54f4368..0000000
+++ /dev/null
@@ -1,887 +0,0 @@
-#data
-<body><span>
-#errors
-(1,6): unexpected-start-tag
-(1,12): expected-closing-tag-but-got-eof
-#document-fragment
-body
-#document
-| <span>
-
-#data
-<span><body>
-#errors
-(1,12): unexpected-start-tag
-(1,12): expected-closing-tag-but-got-eof
-#document-fragment
-body
-#document
-| <span>
-
-#data
-<span><body>
-#errors
-(1,12): unexpected-start-tag
-(1,12): expected-closing-tag-but-got-eof
-#document-fragment
-div
-#document
-| <span>
-
-#data
-<body><span>
-#errors
-(1,12): expected-closing-tag-but-got-eof
-#document-fragment
-html
-#document
-| <head>
-| <body>
-|   <span>
-
-#data
-<frameset><span>
-#errors
-(1,10): unexpected-start-tag
-(1,16): expected-closing-tag-but-got-eof
-#document-fragment
-body
-#document
-| <span>
-
-#data
-<span><frameset>
-#errors
-(1,16): unexpected-start-tag
-(1,16): expected-closing-tag-but-got-eof
-#document-fragment
-body
-#document
-| <span>
-
-#data
-<span><frameset>
-#errors
-(1,16): unexpected-start-tag
-(1,16): expected-closing-tag-but-got-eof
-#document-fragment
-div
-#document
-| <span>
-
-#data
-<frameset><span>
-#errors
-(1,16): unexpected-start-tag-in-frameset
-(1,16): eof-in-frameset
-#document-fragment
-html
-#document
-| <head>
-| <frameset>
-
-#data
-<table><tr>
-#errors
-(1,7): unexpected-start-tag
-#document-fragment
-table
-#document
-| <tbody>
-|   <tr>
-
-#data
-</table><tr>
-#errors
-(1,8): unexpected-end-tag
-#document-fragment
-table
-#document
-| <tbody>
-|   <tr>
-
-#data
-<a>
-#errors
-(1,3): unexpected-start-tag-implies-table-voodoo
-(1,3): eof-in-table
-#document-fragment
-table
-#document
-| <a>
-
-#data
-<a>
-#errors
-(1,3): unexpected-start-tag-implies-table-voodoo
-(1,3): eof-in-table
-#document-fragment
-table
-#document
-| <a>
-
-#data
-<a><caption>a
-#errors
-(1,3): unexpected-start-tag-implies-table-voodoo
-(1,13): expected-closing-tag-but-got-eof
-#document-fragment
-table
-#document
-| <a>
-| <caption>
-|   "a"
-
-#data
-<a><colgroup><col>
-#errors
-(1,3): foster-parenting-start-token
-(1,18): expected-closing-tag-but-got-eof
-#document-fragment
-table
-#document
-| <a>
-| <colgroup>
-|   <col>
-
-#data
-<a><tbody><tr>
-#errors
-(1,3): foster-parenting-start-tag
-#document-fragment
-table
-#document
-| <a>
-| <tbody>
-|   <tr>
-
-#data
-<a><tfoot><tr>
-#errors
-(1,3): foster-parenting-start-tag
-#document-fragment
-table
-#document
-| <a>
-| <tfoot>
-|   <tr>
-
-#data
-<a><thead><tr>
-#errors
-(1,3): foster-parenting-start-tag
-#document-fragment
-table
-#document
-| <a>
-| <thead>
-|   <tr>
-
-#data
-<a><tr>
-#errors
-(1,3): foster-parenting-start-tag
-#document-fragment
-table
-#document
-| <a>
-| <tbody>
-|   <tr>
-
-#data
-<a><th>
-#errors
-(1,3): unexpected-start-tag-implies-table-voodoo
-(1,7): unexpected-cell-in-table-body
-#document-fragment
-table
-#document
-| <a>
-| <tbody>
-|   <tr>
-|     <th>
-
-#data
-<a><td>
-#errors
-(1,3): unexpected-start-tag-implies-table-voodoo
-(1,7): unexpected-cell-in-table-body
-#document-fragment
-table
-#document
-| <a>
-| <tbody>
-|   <tr>
-|     <td>
-
-#data
-<table></table><tbody>
-#errors
-(1,22): unexpected-start-tag
-#document-fragment
-caption
-#document
-| <table>
-
-#data
-</table><span>
-#errors
-(1,8): unexpected-end-tag
-(1,14): expected-closing-tag-but-got-eof
-#document-fragment
-caption
-#document
-| <span>
-
-#data
-<span></table>
-#errors
-(1,14): unexpected-end-tag
-(1,14): expected-closing-tag-but-got-eof
-#document-fragment
-caption
-#document
-| <span>
-
-#data
-</caption><span>
-#errors
-(1,10): XXX-undefined-error
-(1,16): expected-closing-tag-but-got-eof
-#document-fragment
-caption
-#document
-| <span>
-
-#data
-<span></caption><span>
-#errors
-(1,16): XXX-undefined-error
-(1,22): expected-closing-tag-but-got-eof
-#document-fragment
-caption
-#document
-| <span>
-|   <span>
-
-#data
-<span><caption><span>
-#errors
-(1,15): unexpected-start-tag
-(1,21): expected-closing-tag-but-got-eof
-#document-fragment
-caption
-#document
-| <span>
-|   <span>
-
-#data
-<span><col><span>
-#errors
-(1,11): unexpected-start-tag
-(1,17): expected-closing-tag-but-got-eof
-#document-fragment
-caption
-#document
-| <span>
-|   <span>
-
-#data
-<span><colgroup><span>
-#errors
-(1,16): unexpected-start-tag
-(1,22): expected-closing-tag-but-got-eof
-#document-fragment
-caption
-#document
-| <span>
-|   <span>
-
-#data
-<span><html><span>
-#errors
-(1,12): non-html-root
-(1,18): expected-closing-tag-but-got-eof
-#document-fragment
-caption
-#document
-| <span>
-|   <span>
-
-#data
-<span><tbody><span>
-#errors
-(1,13): unexpected-start-tag
-(1,19): expected-closing-tag-but-got-eof
-#document-fragment
-caption
-#document
-| <span>
-|   <span>
-
-#data
-<span><td><span>
-#errors
-(1,10): unexpected-start-tag
-(1,16): expected-closing-tag-but-got-eof
-#document-fragment
-caption
-#document
-| <span>
-|   <span>
-
-#data
-<span><tfoot><span>
-#errors
-(1,13): unexpected-start-tag
-(1,19): expected-closing-tag-but-got-eof
-#document-fragment
-caption
-#document
-| <span>
-|   <span>
-
-#data
-<span><thead><span>
-#errors
-(1,13): unexpected-start-tag
-(1,19): expected-closing-tag-but-got-eof
-#document-fragment
-caption
-#document
-| <span>
-|   <span>
-
-#data
-<span><th><span>
-#errors
-(1,10): unexpected-start-tag
-(1,16): expected-closing-tag-but-got-eof
-#document-fragment
-caption
-#document
-| <span>
-|   <span>
-
-#data
-<span><tr><span>
-#errors
-(1,10): unexpected-start-tag
-(1,16): expected-closing-tag-but-got-eof
-#document-fragment
-caption
-#document
-| <span>
-|   <span>
-
-#data
-<span></table><span>
-#errors
-(1,14): unexpected-end-tag
-(1,20): expected-closing-tag-but-got-eof
-#document-fragment
-caption
-#document
-| <span>
-|   <span>
-
-#data
-</colgroup><col>
-#errors
-(1,11): XXX-undefined-error
-#document-fragment
-colgroup
-#document
-| <col>
-
-#data
-<a><col>
-#errors
-(1,3): XXX-undefined-error
-#document-fragment
-colgroup
-#document
-| <col>
-
-#data
-<caption><a>
-#errors
-(1,9): XXX-undefined-error
-(1,12): unexpected-start-tag-implies-table-voodoo
-(1,12): eof-in-table
-#document-fragment
-tbody
-#document
-| <a>
-
-#data
-<col><a>
-#errors
-(1,5): XXX-undefined-error
-(1,8): unexpected-start-tag-implies-table-voodoo
-(1,8): eof-in-table
-#document-fragment
-tbody
-#document
-| <a>
-
-#data
-<colgroup><a>
-#errors
-(1,10): XXX-undefined-error
-(1,13): unexpected-start-tag-implies-table-voodoo
-(1,13): eof-in-table
-#document-fragment
-tbody
-#document
-| <a>
-
-#data
-<tbody><a>
-#errors
-(1,7): XXX-undefined-error
-(1,10): unexpected-start-tag-implies-table-voodoo
-(1,10): eof-in-table
-#document-fragment
-tbody
-#document
-| <a>
-
-#data
-<tfoot><a>
-#errors
-(1,7): XXX-undefined-error
-(1,10): unexpected-start-tag-implies-table-voodoo
-(1,10): eof-in-table
-#document-fragment
-tbody
-#document
-| <a>
-
-#data
-<thead><a>
-#errors
-(1,7): XXX-undefined-error
-(1,10): unexpected-start-tag-implies-table-voodoo
-(1,10): eof-in-table
-#document-fragment
-tbody
-#document
-| <a>
-
-#data
-</table><a>
-#errors
-(1,8): XXX-undefined-error
-(1,11): unexpected-start-tag-implies-table-voodoo
-(1,11): eof-in-table
-#document-fragment
-tbody
-#document
-| <a>
-
-#data
-<a><tr>
-#errors
-(1,3): unexpected-start-tag-implies-table-voodoo
-#document-fragment
-tbody
-#document
-| <a>
-| <tr>
-
-#data
-<a><td>
-#errors
-(1,3): unexpected-start-tag-implies-table-voodoo
-(1,7): unexpected-cell-in-table-body
-#document-fragment
-tbody
-#document
-| <a>
-| <tr>
-|   <td>
-
-#data
-<a><td>
-#errors
-(1,3): unexpected-start-tag-implies-table-voodoo
-(1,7): unexpected-cell-in-table-body
-#document-fragment
-tbody
-#document
-| <a>
-| <tr>
-|   <td>
-
-#data
-<a><td>
-#errors
-(1,3): unexpected-start-tag-implies-table-voodoo
-(1,7): unexpected-cell-in-table-body
-#document-fragment
-tbody
-#document
-| <a>
-| <tr>
-|   <td>
-
-#data
-<td><table><tbody><a><tr>
-#errors
-(1,4): unexpected-cell-in-table-body
-(1,21): unexpected-start-tag-implies-table-voodoo
-(1,25): eof-in-table
-#document-fragment
-tbody
-#document
-| <tr>
-|   <td>
-|     <a>
-|     <table>
-|       <tbody>
-|         <tr>
-
-#data
-</tr><td>
-#errors
-(1,5): XXX-undefined-error
-#document-fragment
-tr
-#document
-| <td>
-
-#data
-<td><table><a><tr></tr><tr>
-#errors
-(1,14): unexpected-start-tag-implies-table-voodoo
-(1,27): eof-in-table
-#document-fragment
-tr
-#document
-| <td>
-|   <a>
-|   <table>
-|     <tbody>
-|       <tr>
-|       <tr>
-
-#data
-<caption><td>
-#errors
-(1,9): XXX-undefined-error
-#document-fragment
-tr
-#document
-| <td>
-
-#data
-<col><td>
-#errors
-(1,5): XXX-undefined-error
-#document-fragment
-tr
-#document
-| <td>
-
-#data
-<colgroup><td>
-#errors
-(1,10): XXX-undefined-error
-#document-fragment
-tr
-#document
-| <td>
-
-#data
-<tbody><td>
-#errors
-(1,7): XXX-undefined-error
-#document-fragment
-tr
-#document
-| <td>
-
-#data
-<tfoot><td>
-#errors
-(1,7): XXX-undefined-error
-#document-fragment
-tr
-#document
-| <td>
-
-#data
-<thead><td>
-#errors
-(1,7): XXX-undefined-error
-#document-fragment
-tr
-#document
-| <td>
-
-#data
-<tr><td>
-#errors
-(1,4): XXX-undefined-error
-#document-fragment
-tr
-#document
-| <td>
-
-#data
-</table><td>
-#errors
-(1,8): XXX-undefined-error
-#document-fragment
-tr
-#document
-| <td>
-
-#data
-<td><table></table><td>
-#errors
-#document-fragment
-tr
-#document
-| <td>
-|   <table>
-| <td>
-
-#data
-<td><table></table><td>
-#errors
-#document-fragment
-tr
-#document
-| <td>
-|   <table>
-| <td>
-
-#data
-<caption><a>
-#errors
-(1,9): XXX-undefined-error
-(1,12): expected-closing-tag-but-got-eof
-#document-fragment
-td
-#document
-| <a>
-
-#data
-<col><a>
-#errors
-(1,5): XXX-undefined-error
-(1,8): expected-closing-tag-but-got-eof
-#document-fragment
-td
-#document
-| <a>
-
-#data
-<colgroup><a>
-#errors
-(1,10): XXX-undefined-error
-(1,13): expected-closing-tag-but-got-eof
-#document-fragment
-td
-#document
-| <a>
-
-#data
-<tbody><a>
-#errors
-(1,7): XXX-undefined-error
-(1,10): expected-closing-tag-but-got-eof
-#document-fragment
-td
-#document
-| <a>
-
-#data
-<tfoot><a>
-#errors
-(1,7): XXX-undefined-error
-(1,10): expected-closing-tag-but-got-eof
-#document-fragment
-td
-#document
-| <a>
-
-#data
-<th><a>
-#errors
-(1,4): XXX-undefined-error
-(1,7): expected-closing-tag-but-got-eof
-#document-fragment
-td
-#document
-| <a>
-
-#data
-<thead><a>
-#errors
-(1,7): XXX-undefined-error
-(1,10): expected-closing-tag-but-got-eof
-#document-fragment
-td
-#document
-| <a>
-
-#data
-<tr><a>
-#errors
-(1,4): XXX-undefined-error
-(1,7): expected-closing-tag-but-got-eof
-#document-fragment
-td
-#document
-| <a>
-
-#data
-</table><a>
-#errors
-(1,8): XXX-undefined-error
-(1,11): expected-closing-tag-but-got-eof
-#document-fragment
-td
-#document
-| <a>
-
-#data
-</tbody><a>
-#errors
-(1,8): XXX-undefined-error
-(1,11): expected-closing-tag-but-got-eof
-#document-fragment
-td
-#document
-| <a>
-
-#data
-</td><a>
-#errors
-(1,5): unexpected-end-tag
-(1,8): expected-closing-tag-but-got-eof
-#document-fragment
-td
-#document
-| <a>
-
-#data
-</tfoot><a>
-#errors
-(1,8): XXX-undefined-error
-(1,11): expected-closing-tag-but-got-eof
-#document-fragment
-td
-#document
-| <a>
-
-#data
-</thead><a>
-#errors
-(1,8): XXX-undefined-error
-(1,11): expected-closing-tag-but-got-eof
-#document-fragment
-td
-#document
-| <a>
-
-#data
-</th><a>
-#errors
-(1,5): unexpected-end-tag
-(1,8): expected-closing-tag-but-got-eof
-#document-fragment
-td
-#document
-| <a>
-
-#data
-</tr><a>
-#errors
-(1,5): XXX-undefined-error
-(1,8): expected-closing-tag-but-got-eof
-#document-fragment
-td
-#document
-| <a>
-
-#data
-<table><td><td>
-#errors
-(1,11): unexpected-cell-in-table-body
-(1,15): expected-closing-tag-but-got-eof
-#document-fragment
-td
-#document
-| <table>
-|   <tbody>
-|     <tr>
-|       <td>
-|       <td>
-
-#data
-</select><option>
-#errors
-(1,9): XXX-undefined-error
-#document-fragment
-select
-#document
-| <option>
-
-#data
-<input><option>
-#errors
-(1,7): unexpected-input-in-select
-#document-fragment
-select
-#document
-| <option>
-
-#data
-<keygen><option>
-#errors
-(1,8): unexpected-input-in-select
-#document-fragment
-select
-#document
-| <option>
-
-#data
-<textarea><option>
-#errors
-(1,10): unexpected-input-in-select
-#document-fragment
-select
-#document
-| <option>
-
-#data
-</html><!--abc-->
-#errors
-(1,7): unexpected-end-tag-after-body-innerhtml
-#document-fragment
-html
-#document
-| <head>
-| <body>
-| <!-- abc -->
-
-#data
-</frameset><frame>
-#errors
-(1,11): unexpected-frameset-in-frameset-innerhtml
-#document-fragment
-frameset
-#document
-| <frame>
-
-#data
-#errors
-#document-fragment
-html
-#document
-| <head>
-| <body>
diff --git a/html/testdata/webkit/tricky01.dat b/html/testdata/webkit/tricky01.dat
deleted file mode 100644 (file)
index 753502a..0000000
+++ /dev/null
@@ -1,336 +0,0 @@
-#data
-<b><p>Bold </b> Not bold</p>
-Also not bold.
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,15): adoption-agency-1.3
-#document
-| <html>
-|   <head>
-|   <body>
-|     <b>
-|     <p>
-|       <b>
-|         "Bold "
-|       " Not bold"
-|     "
-Also not bold."
-
-#data
-<html>
-<font color=red><i>Italic and Red<p>Italic and Red </font> Just italic.</p> Italic only.</i> Plain
-<p>I should not be red. <font color=red>Red. <i>Italic and red.</p>
-<p>Italic and red. </i> Red.</font> I should not be red.</p>
-<b>Bold <i>Bold and italic</b> Only Italic </i> Plain
-#errors
-(1,6): expected-doctype-but-got-start-tag
-(2,58): adoption-agency-1.3
-(3,67): unexpected-end-tag
-(4,23): adoption-agency-1.3
-(4,35): adoption-agency-1.3
-(5,30): adoption-agency-1.3
-#document
-| <html>
-|   <head>
-|   <body>
-|     <font>
-|       color="red"
-|       <i>
-|         "Italic and Red"
-|     <i>
-|       <p>
-|         <font>
-|           color="red"
-|           "Italic and Red "
-|         " Just italic."
-|       " Italic only."
-|     " Plain
-"
-|     <p>
-|       "I should not be red. "
-|       <font>
-|         color="red"
-|         "Red. "
-|         <i>
-|           "Italic and red."
-|     <font>
-|       color="red"
-|       <i>
-|         "
-"
-|     <p>
-|       <font>
-|         color="red"
-|         <i>
-|           "Italic and red. "
-|         " Red."
-|       " I should not be red."
-|     "
-"
-|     <b>
-|       "Bold "
-|       <i>
-|         "Bold and italic"
-|     <i>
-|       " Only Italic "
-|     " Plain"
-
-#data
-<html><body>
-<p><font size="7">First paragraph.</p>
-<p>Second paragraph.</p></font>
-<b><p><i>Bold and Italic</b> Italic</p>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-(2,38): unexpected-end-tag
-(4,28): adoption-agency-1.3
-(4,28): adoption-agency-1.3
-(4,39): unexpected-end-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     "
-"
-|     <p>
-|       <font>
-|         size="7"
-|         "First paragraph."
-|     <font>
-|       size="7"
-|       "
-"
-|       <p>
-|         "Second paragraph."
-|     "
-"
-|     <b>
-|     <p>
-|       <b>
-|         <i>
-|           "Bold and Italic"
-|       <i>
-|         " Italic"
-
-#data
-<html>
-<dl>
-<dt><b>Boo
-<dd>Goo?
-</dl>
-</html>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-(4,4): end-tag-too-early
-(5,5): end-tag-too-early
-(6,7): expected-one-end-tag-but-got-another
-#document
-| <html>
-|   <head>
-|   <body>
-|     <dl>
-|       "
-"
-|       <dt>
-|         <b>
-|           "Boo
-"
-|       <dd>
-|         <b>
-|           "Goo?
-"
-|     <b>
-|       "
-"
-
-#data
-<html><body>
-<label><a><div>Hello<div>World</div></a></label>  
-</body></html>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-(2,40): adoption-agency-1.3
-(2,48): unexpected-end-tag
-(3,7): expected-one-end-tag-but-got-another
-#document
-| <html>
-|   <head>
-|   <body>
-|     "
-"
-|     <label>
-|       <a>
-|       <div>
-|         <a>
-|           "Hello"
-|           <div>
-|             "World"
-|         "  
-"
-
-#data
-<table><center> <font>a</center> <img> <tr><td> </td> </tr> </table>
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,15): foster-parenting-start-tag
-(1,16): foster-parenting-character
-(1,22): foster-parenting-start-tag
-(1,23): foster-parenting-character
-(1,32): foster-parenting-end-tag
-(1,32): end-tag-too-early
-(1,33): foster-parenting-character
-(1,38): foster-parenting-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <center>
-|       " "
-|       <font>
-|         "a"
-|     <font>
-|       <img>
-|       " "
-|     <table>
-|       " "
-|       <tbody>
-|         <tr>
-|           <td>
-|             " "
-|           " "
-|         " "
-
-#data
-<table><tr><p><a><p>You should see this text.
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,14): unexpected-start-tag-implies-table-voodoo
-(1,17): unexpected-start-tag-implies-table-voodoo
-(1,20): unexpected-start-tag-implies-table-voodoo
-(1,20): closing-non-current-p-element
-(1,21): foster-parenting-character
-(1,22): foster-parenting-character
-(1,23): foster-parenting-character
-(1,24): foster-parenting-character
-(1,25): foster-parenting-character
-(1,26): foster-parenting-character
-(1,27): foster-parenting-character
-(1,28): foster-parenting-character
-(1,29): foster-parenting-character
-(1,30): foster-parenting-character
-(1,31): foster-parenting-character
-(1,32): foster-parenting-character
-(1,33): foster-parenting-character
-(1,34): foster-parenting-character
-(1,35): foster-parenting-character
-(1,36): foster-parenting-character
-(1,37): foster-parenting-character
-(1,38): foster-parenting-character
-(1,39): foster-parenting-character
-(1,40): foster-parenting-character
-(1,41): foster-parenting-character
-(1,42): foster-parenting-character
-(1,43): foster-parenting-character
-(1,44): foster-parenting-character
-(1,45): foster-parenting-character
-(1,45): eof-in-table
-#document
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       <a>
-|     <p>
-|       <a>
-|         "You should see this text."
-|     <table>
-|       <tbody>
-|         <tr>
-
-#data
-<TABLE>
-<TR>
-<CENTER><CENTER><TD></TD></TR><TR>
-<FONT>
-<TABLE><tr></tr></TABLE>
-</P>
-<a></font><font></a>
-This page contains an insanely badly-nested tag sequence.
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(3,8): unexpected-start-tag-implies-table-voodoo
-(3,16): unexpected-start-tag-implies-table-voodoo
-(4,6): unexpected-start-tag-implies-table-voodoo
-(4,6): unexpected character token in table (the newline)
-(5,7): unexpected-start-tag-implies-end-tag
-(6,4): unexpected p end tag
-(7,10): adoption-agency-1.3
-(7,20): adoption-agency-1.3
-(8,57): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <center>
-|       <center>
-|     <font>
-|       "
-"
-|     <table>
-|       "
-"
-|       <tbody>
-|         <tr>
-|           "
-"
-|           <td>
-|         <tr>
-|           "
-"
-|     <table>
-|       <tbody>
-|         <tr>
-|     <font>
-|       "
-"
-|       <p>
-|       "
-"
-|       <a>
-|     <a>
-|       <font>
-|     <font>
-|       "
-This page contains an insanely badly-nested tag sequence."
-
-#data
-<html>
-<body>
-<b><nobr><div>This text is in a div inside a nobr</nobr>More text that should not be in the nobr, i.e., the
-nobr should have closed the div inside it implicitly. </b><pre>A pre tag outside everything else.</pre>
-</body>
-</html>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-(3,56): adoption-agency-1.3
-(4,58): adoption-agency-1.3
-(5,7): expected-one-end-tag-but-got-another
-#document
-| <html>
-|   <head>
-|   <body>
-|     "
-"
-|     <b>
-|       <nobr>
-|     <div>
-|       <b>
-|         <nobr>
-|           "This text is in a div inside a nobr"
-|         "More text that should not be in the nobr, i.e., the
-nobr should have closed the div inside it implicitly. "
-|       <pre>
-|         "A pre tag outside everything else."
-|       "
-
-"
diff --git a/html/testdata/webkit/webkit01.dat b/html/testdata/webkit/webkit01.dat
deleted file mode 100644 (file)
index 2127cfe..0000000
+++ /dev/null
@@ -1,755 +0,0 @@
-#data
-Test
-#errors
-(1,4): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "Test"
-
-#data
-<div></div>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-
-#data
-<div>Test</div>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       "Test"
-
-#data
-<di
-#errors
-(1,3): eof-in-tag-name
-(1,3): expected-doctype-but-got-eof
-#new-errors
-(1:4) eof-in-tag
-#document
-| <html>
-|   <head>
-|   <body>
-
-#data
-<div>Hello</div>
-<script>
-console.log("PASS");
-</script>
-<div>Bye</div>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       "Hello"
-|     "
-"
-|     <script>
-|       "
-console.log("PASS");
-"
-|     "
-"
-|     <div>
-|       "Bye"
-
-#data
-<div foo="bar">Hello</div>
-#errors
-(1,15): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       foo="bar"
-|       "Hello"
-
-#data
-<div>Hello</div>
-<script>
-console.log("FOO<span>BAR</span>BAZ");
-</script>
-<div>Bye</div>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       "Hello"
-|     "
-"
-|     <script>
-|       "
-console.log("FOO<span>BAR</span>BAZ");
-"
-|     "
-"
-|     <div>
-|       "Bye"
-
-#data
-<foo bar="baz"></foo><potato quack="duck"></potato>
-#errors
-(1,15): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <foo>
-|       bar="baz"
-|     <potato>
-|       quack="duck"
-
-#data
-<foo bar="baz"><potato quack="duck"></potato></foo>
-#errors
-(1,15): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <foo>
-|       bar="baz"
-|       <potato>
-|         quack="duck"
-
-#data
-<foo></foo bar="baz"><potato></potato quack="duck">
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,21): attributes-in-end-tag
-(1,51): attributes-in-end-tag
-#new-errors
-(1:21) end-tag-with-attributes
-(1:51) end-tag-with-attributes
-#document
-| <html>
-|   <head>
-|   <body>
-|     <foo>
-|     <potato>
-
-#data
-</ tttt>
-#errors
-(1,2): expected-closing-tag-but-got-char
-(1,8): expected-doctype-but-got-eof
-#new-errors
-(1:3) invalid-first-character-of-tag-name
-#document
-| <!--  tttt -->
-| <html>
-|   <head>
-|   <body>
-
-#data
-<div FOO ><img><img></div>
-#errors
-(1,10): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       foo=""
-|       <img>
-|       <img>
-
-#data
-<p>Test</p<p>Test2</p>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,13): unexpected-end-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       "TestTest2"
-
-#data
-<rdar://problem/6869687>
-#errors
-(1,7): unexpected-character-after-solidus-in-tag
-(1,8): unexpected-character-after-solidus-in-tag
-(1,16): unexpected-character-after-solidus-in-tag
-(1,24): expected-doctype-but-got-start-tag
-(1,24): expected-closing-tag-but-got-eof
-#new-errors
-(1:8) unexpected-solidus-in-tag
-(1:9) unexpected-solidus-in-tag
-(1:17) unexpected-solidus-in-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <rdar:>
-|       6869687=""
-|       problem=""
-
-#data
-<A>test< /A>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,8): expected-tag-name
-(1,12): expected-closing-tag-but-got-eof
-#new-errors
-(1:9) invalid-first-character-of-tag-name
-#document
-| <html>
-|   <head>
-|   <body>
-|     <a>
-|       "test< /A>"
-
-#data
-&lt;
-#errors
-(1,4): expected-doctype-but-got-chars
-#document
-| <html>
-|   <head>
-|   <body>
-|     "<"
-
-#data
-<body foo='bar'><body foo='baz' yo='mama'>
-#errors
-(1,16): expected-doctype-but-got-start-tag
-(1,42): unexpected-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     foo="bar"
-|     yo="mama"
-
-#data
-<body></br foo="bar"></body>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-(1,21): attributes-in-end-tag
-(1,21): unexpected-end-tag-treated-as
-#new-errors
-(1:21) end-tag-with-attributes
-#document
-| <html>
-|   <head>
-|   <body>
-|     <br>
-
-#data
-<bdy><br foo="bar"></body>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,26): expected-one-end-tag-but-got-another
-#document
-| <html>
-|   <head>
-|   <body>
-|     <bdy>
-|       <br>
-|         foo="bar"
-
-#data
-<body></body></br foo="bar">
-#errors
-(1,6): expected-doctype-but-got-start-tag
-(1,28): attributes-in-end-tag
-(1,28): unexpected-end-tag-after-body
-(1,28): unexpected-end-tag-treated-as
-#new-errors
-(1:28) end-tag-with-attributes
-#document
-| <html>
-|   <head>
-|   <body>
-|     <br>
-
-#data
-<bdy></body><br foo="bar">
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,12): expected-one-end-tag-but-got-another
-(1,26): unexpected-start-tag-after-body
-(1,26): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <bdy>
-|       <br>
-|         foo="bar"
-
-#data
-<html><body></body></html><!-- Hi there -->
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-| <!--  Hi there  -->
-
-#data
-<html><body></body></html><!-- Comment A --><!-- Comment B --><!-- Comment C --><!-- Comment D --><!-- Comment E -->
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-| <!--  Comment A  -->
-| <!--  Comment B  -->
-| <!--  Comment C  -->
-| <!--  Comment D  -->
-| <!--  Comment E  -->
-
-#data
-<html><body></body></html>x<!-- Hi there -->
-#errors
-(1,6): expected-doctype-but-got-start-tag
-(1,27): expected-eof-but-got-char
-#document
-| <html>
-|   <head>
-|   <body>
-|     "x"
-|     <!--  Hi there  -->
-
-#data
-<html><body></body></html>x<!-- Hi there --></html><!-- Again -->
-#errors
-(1,6): expected-doctype-but-got-start-tag
-(1,27): expected-eof-but-got-char
-#document
-| <html>
-|   <head>
-|   <body>
-|     "x"
-|     <!--  Hi there  -->
-| <!--  Again  -->
-
-#data
-<html><body></body></html>x<!-- Hi there --></body></html><!-- Again -->
-#errors
-(1,6): expected-doctype-but-got-start-tag
-(1,27): expected-eof-but-got-char
-#document
-| <html>
-|   <head>
-|   <body>
-|     "x"
-|     <!--  Hi there  -->
-| <!--  Again  -->
-
-#data
-<html><body><ruby><div><rp>xx</rp></div></ruby></body></html>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-(1,27): XXX-undefined-error
-#document
-| <html>
-|   <head>
-|   <body>
-|     <ruby>
-|       <div>
-|         <rp>
-|           "xx"
-
-#data
-<html><body><ruby><div><rt>xx</rt></div></ruby></body></html>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-(1,27): XXX-undefined-error
-#document
-| <html>
-|   <head>
-|   <body>
-|     <ruby>
-|       <div>
-|         <rt>
-|           "xx"
-
-#data
-<html><frameset><!--1--><noframes>A</noframes><!--2--></frameset><!--3--><noframes>B</noframes><!--4--></html><!--5--><noframes>C</noframes><!--6-->
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <frameset>
-|     <!-- 1 -->
-|     <noframes>
-|       "A"
-|     <!-- 2 -->
-|   <!-- 3 -->
-|   <noframes>
-|     "B"
-|   <!-- 4 -->
-|   <noframes>
-|     "C"
-| <!-- 5 -->
-| <!-- 6 -->
-
-#data
-<select><option>A<select><option>B<select><option>C<select><option>D<select><option>E<select><option>F<select><option>G<select>
-#errors
-(1,8): expected-doctype-but-got-start-tag
-(1,25): unexpected-select-in-select
-(1,59): unexpected-select-in-select
-(1,93): unexpected-select-in-select
-(1,127): unexpected-select-in-select
-#document
-| <html>
-|   <head>
-|   <body>
-|     <select>
-|       <option>
-|         "A"
-|     <option>
-|       "B"
-|       <select>
-|         <option>
-|           "C"
-|     <option>
-|       "D"
-|       <select>
-|         <option>
-|           "E"
-|     <option>
-|       "F"
-|       <select>
-|         <option>
-|           "G"
-
-#data
-<dd><dd><dt><dt><dd><li><li>
-#errors
-(1,4): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <dd>
-|     <dd>
-|     <dt>
-|     <dt>
-|     <dd>
-|       <li>
-|       <li>
-
-#data
-<div><b></div><div><nobr>a<nobr>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,14): end-tag-too-early
-(1,32): unexpected-start-tag-implies-end-tag
-(1,32): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       <b>
-|     <div>
-|       <b>
-|         <nobr>
-|           "a"
-|         <nobr>
-
-#data
-<head></head>
-<body></body>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   "
-"
-|   <body>
-
-#data
-<head></head> <style></style>ddd
-#errors
-(1,6): expected-doctype-but-got-start-tag
-(1,21): unexpected-start-tag-out-of-my-head
-#document
-| <html>
-|   <head>
-|     <style>
-|   " "
-|   <body>
-|     "ddd"
-
-#data
-<kbd><table></kbd><col><select><tr>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,18): unexpected-end-tag-implies-table-voodoo
-(1,18): unexpected-end-tag
-(1,31): unexpected-start-tag-implies-table-voodoo
-(1,35): unexpected-table-element-start-tag-in-select-in-table
-(1,35): eof-in-table
-#document
-| <html>
-|   <head>
-|   <body>
-|     <kbd>
-|       <select>
-|       <table>
-|         <colgroup>
-|           <col>
-|         <tbody>
-|           <tr>
-
-#data
-<kbd><table></kbd><col><select><tr></table><div>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,18): unexpected-end-tag-implies-table-voodoo
-(1,18): unexpected-end-tag
-(1,31): unexpected-start-tag-implies-table-voodoo
-(1,35): unexpected-table-element-start-tag-in-select-in-table
-(1,48): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <kbd>
-|       <select>
-|       <table>
-|         <colgroup>
-|           <col>
-|         <tbody>
-|           <tr>
-|       <div>
-
-#data
-<a><li><style></style><title></title></a>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,41): adoption-agency-1.3
-#document
-| <html>
-|   <head>
-|   <body>
-|     <a>
-|     <li>
-|       <a>
-|         <style>
-|         <title>
-
-#data
-<font></p><p><meta><title></title></font>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-(1,10): unexpected-end-tag
-(1,41): adoption-agency-1.3
-#document
-| <html>
-|   <head>
-|   <body>
-|     <font>
-|       <p>
-|     <p>
-|       <font>
-|         <meta>
-|         <title>
-
-#data
-<a><center><title></title><a>
-#errors
-(1,3): expected-doctype-but-got-start-tag
-(1,29): unexpected-start-tag-implies-end-tag
-(1,29): adoption-agency-1.3
-(1,29): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <a>
-|     <center>
-|       <a>
-|         <title>
-|       <a>
-
-#data
-<svg><title><div>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,17): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       <svg title>
-|         <div>
-
-#data
-<svg><title><rect><div>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,23): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       <svg title>
-|         <rect>
-|           <div>
-
-#data
-<svg><title><svg><div>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,22): unexpected-html-element-in-foreign-content
-(1,22): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       <svg title>
-|         <svg svg>
-|         <div>
-
-#data
-<img <="" FAIL>
-#errors
-(1,6): invalid-character-in-attribute-name
-(1,15): expected-doctype-but-got-start-tag
-#new-errors
-(1:6) unexpected-character-in-attribute-name
-#document
-| <html>
-|   <head>
-|   <body>
-|     <img>
-|       <=""
-|       fail=""
-
-#data
-<ul><li><div id='foo'/>A</li><li>B<div>C</div></li></ul>
-#errors
-(1,4): expected-doctype-but-got-start-tag
-(1,23): non-void-element-with-trailing-solidus
-(1,29): end-tag-too-early
-#new-errors
-(1:9-1:24) non-void-html-element-start-tag-with-trailing-solidus
-#document
-| <html>
-|   <head>
-|   <body>
-|     <ul>
-|       <li>
-|         <div>
-|           id="foo"
-|           "A"
-|       <li>
-|         "B"
-|         <div>
-|           "C"
-
-#data
-<svg><em><desc></em>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,9): unexpected-html-element-in-foreign-content
-(1,20): adoption-agency-1.3
-#document
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|     <em>
-|       <desc>
-
-#data
-<table><tr><td><svg><desc><td></desc><circle>
-#errors
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-|         <tr>
-|           <td>
-|             <svg svg>
-|               <svg desc>
-|           <td>
-|             <circle>
-
-#data
-<svg><tfoot></mi><td>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-(1,17): unexpected-end-tag
-(1,17): unexpected-end-tag
-(1,21): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       <svg tfoot>
-|         <svg td>
-
-#data
-<math><mrow><mrow><mn>1</mn></mrow><mi>a</mi></mrow></math>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <math math>
-|       <math mrow>
-|         <math mrow>
-|           <math mn>
-|             "1"
-|         <math mi>
-|           "a"
-
-#data
-<!doctype html><input type="hidden"><frameset>
-#errors
-(1,46): unexpected-start-tag
-(1,46): eof-in-frameset
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <frameset>
-
-#data
-<!doctype html><input type="button"><frameset>
-#errors
-(1,46): unexpected-start-tag
-#document
-| <!DOCTYPE html>
-| <html>
-|   <head>
-|   <body>
-|     <input>
-|       type="button"
diff --git a/html/testdata/webkit/webkit02.dat b/html/testdata/webkit/webkit02.dat
deleted file mode 100644 (file)
index 791991d..0000000
+++ /dev/null
@@ -1,303 +0,0 @@
-#data
-<foo bar=qux/>
-#errors
-(1,14): expected-doctype-but-got-start-tag
-(1,14): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <foo>
-|       bar="qux/"
-
-#data
-<p id="status"><noscript><strong>A</strong></noscript><span>B</span></p>
-#errors
-(1,15): expected-doctype-but-got-start-tag
-#script-on
-#document
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       id="status"
-|       <noscript>
-|         "<strong>A</strong>"
-|       <span>
-|         "B"
-
-#data
-<p id="status"><noscript><strong>A</strong></noscript><span>B</span></p>
-#errors
-(1,15): expected-doctype-but-got-start-tag
-#script-off
-#document
-| <html>
-|   <head>
-|   <body>
-|     <p>
-|       id="status"
-|       <noscript>
-|         <strong>
-|           "A"
-|       <span>
-|         "B"
-
-#data
-<div><sarcasm><div></div></sarcasm></div>
-#errors
-(1,5): expected-doctype-but-got-start-tag
-#document
-| <html>
-|   <head>
-|   <body>
-|     <div>
-|       <sarcasm>
-|         <div>
-
-#data
-<html><body><img src="" border="0" alt="><div>A</div></body></html>
-#errors
-(1,6): expected-doctype-but-got-start-tag
-(1,67): eof-in-attribute-value-double-quote
-#new-errors
-(1:68) eof-in-tag
-#document
-| <html>
-|   <head>
-|   <body>
-
-#data
-<table><td></tbody>A
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,11): unexpected-cell-in-table-body
-(1,20): foster-parenting-character
-(1,20): eof-in-table
-#document
-| <html>
-|   <head>
-|   <body>
-|     "A"
-|     <table>
-|       <tbody>
-|         <tr>
-|           <td>
-
-#data
-<table><td></thead>A
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,11): unexpected-cell-in-table-body
-(1,19): XXX-undefined-error
-(1,20): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-|         <tr>
-|           <td>
-|             "A"
-
-#data
-<table><td></tfoot>A
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,11): unexpected-cell-in-table-body
-(1,19): XXX-undefined-error
-(1,20): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <tbody>
-|         <tr>
-|           <td>
-|             "A"
-
-#data
-<table><thead><td></tbody>A
-#errors
-(1,7): expected-doctype-but-got-start-tag
-(1,18): unexpected-cell-in-table-body
-(1,26): XXX-undefined-error
-(1,27): expected-closing-tag-but-got-eof
-#document
-| <html>
-|   <head>
-|   <body>
-|     <table>
-|       <thead>
-|         <tr>
-|           <td>
-|             "A"
-
-#data
-<legend>test</legend>
-#errors
-#document
-| <html>
-|   <head>
-|   <body>
-|     <legend>
-|       "test"
-
-#data
-<table><input>
-#errors
-#document
-| <html>
-|   <head>
-|   <body>
-|     <input>
-|     <table>
-
-#data
-<b><em><foo><foo><aside></b>
-#errors
-#document
-| <html>
-|   <head>
-|   <body>
-|     <b>
-|       <em>
-|         <foo>
-|           <foo>
-|     <em>
-|       <aside>
-|         <b>
-
-#data
-<b><em><foo><foo><aside></b></em>
-#errors
-#document
-| <html>
-|   <head>
-|   <body>
-|     <b>
-|       <em>
-|         <foo>
-|           <foo>
-|     <em>
-|     <aside>
-|       <em>
-|         <b>
-
-#data
-<b><em><foo><foo><foo><aside></b>
-#errors
-#document
-| <html>
-|   <head>
-|   <body>
-|     <b>
-|       <em>
-|         <foo>
-|           <foo>
-|             <foo>
-|     <aside>
-|       <b>
-
-#data
-<b><em><foo><foo><foo><aside></b></em>
-#errors
-#document
-| <html>
-|   <head>
-|   <body>
-|     <b>
-|       <em>
-|         <foo>
-|           <foo>
-|             <foo>
-|     <aside>
-|       <b>
-
-#data
-<b><em><foo><foo><foo><foo><foo><foo><foo><foo><foo><foo><aside></b></em>
-#errors
-#document-fragment
-div
-#document
-| <b>
-|   <em>
-|     <foo>
-|       <foo>
-|         <foo>
-|           <foo>
-|             <foo>
-|               <foo>
-|                 <foo>
-|                   <foo>
-|                     <foo>
-|                       <foo>
-| <aside>
-|   <b>
-
-#data
-<b><em><foo><foob><foob><foob><foob><fooc><fooc><fooc><fooc><food><aside></b></em>
-#errors
-#document-fragment
-div
-#document
-| <b>
-|   <em>
-|     <foo>
-|       <foob>
-|         <foob>
-|           <foob>
-|             <foob>
-|               <fooc>
-|                 <fooc>
-|                   <fooc>
-|                     <fooc>
-|                       <food>
-| <aside>
-|   <b>
-
-#data
-<option><XH<optgroup></optgroup>
-#errors
-#document-fragment
-select
-#document
-| <option>
-
-#data
-<svg><foreignObject><div>foo</div><plaintext></foreignObject></svg><div>bar</div>
-#errors
-#document
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       <svg foreignObject>
-|         <div>
-|           "foo"
-|         <plaintext>
-|           "</foreignObject></svg><div>bar</div>"
-
-#data
-<svg><foreignObject></foreignObject><title></svg>foo
-#errors
-#document
-| <html>
-|   <head>
-|   <body>
-|     <svg svg>
-|       <svg foreignObject>
-|       <svg title>
-|     "foo"
-
-#data
-</foreignObject><plaintext><div>foo</div>
-#errors
-#document
-| <html>
-|   <head>
-|   <body>
-|     <plaintext>
-|       "<div>foo</div>"
diff --git a/html/token.go b/html/token.go
deleted file mode 100644 (file)
index b5a7caa..0000000
+++ /dev/null
@@ -1,1272 +0,0 @@
-// Copyright 2010 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package html
-
-import (
-       "bytes"
-       "errors"
-       "io"
-       "strconv"
-       "strings"
-
-       "git.earlybird.gay/today-engine/html/atom"
-)
-
-// A TokenType is the type of a Token.
-type TokenType uint32
-
-const (
-       // ErrorToken means that an error occurred during tokenization.
-       ErrorToken TokenType = iota
-       // TextToken means a text node.
-       TextToken
-       // A StartTagToken looks like <a>.
-       StartTagToken
-       // An EndTagToken looks like </a>.
-       EndTagToken
-       // A SelfClosingTagToken tag looks like <br/>.
-       SelfClosingTagToken
-       // A CommentToken looks like <!--x-->.
-       CommentToken
-       // A DoctypeToken looks like <!DOCTYPE x>
-       DoctypeToken
-)
-
-// ErrBufferExceeded means that the buffering limit was exceeded.
-var ErrBufferExceeded = errors.New("max buffer exceeded")
-
-// String returns a string representation of the TokenType.
-func (t TokenType) String() string {
-       switch t {
-       case ErrorToken:
-               return "Error"
-       case TextToken:
-               return "Text"
-       case StartTagToken:
-               return "StartTag"
-       case EndTagToken:
-               return "EndTag"
-       case SelfClosingTagToken:
-               return "SelfClosingTag"
-       case CommentToken:
-               return "Comment"
-       case DoctypeToken:
-               return "Doctype"
-       }
-       return "Invalid(" + strconv.Itoa(int(t)) + ")"
-}
-
-// An Attribute is an attribute namespace-key-value triple. Namespace is
-// non-empty for foreign attributes like xlink, Key is alphabetic (and hence
-// does not contain escapable characters like '&', '<' or '>'), and Val is
-// unescaped (it looks like "a<b" rather than "a&lt;b").
-//
-// Namespace is only used by the parser, not the tokenizer.
-type Attribute struct {
-       Namespace, Key, Val string
-}
-
-// A Token consists of a TokenType and some Data (tag name for start and end
-// tags, content for text, comments and doctypes). A tag Token may also contain
-// a slice of Attributes. Data is unescaped for all Tokens (it looks like "a<b"
-// rather than "a&lt;b"). For tag Tokens, DataAtom is the atom for Data, or
-// zero if Data is not a known tag name.
-type Token struct {
-       Type     TokenType
-       DataAtom atom.Atom
-       Data     string
-       Attr     []Attribute
-}
-
-// tagString returns a string representation of a tag Token's Data and Attr.
-func (t Token) tagString() string {
-       if len(t.Attr) == 0 {
-               return t.Data
-       }
-       buf := bytes.NewBufferString(t.Data)
-       for _, a := range t.Attr {
-               buf.WriteByte(' ')
-               buf.WriteString(a.Key)
-               buf.WriteString(`="`)
-               escape(buf, a.Val)
-               buf.WriteByte('"')
-       }
-       return buf.String()
-}
-
-// String returns a string representation of the Token.
-func (t Token) String() string {
-       switch t.Type {
-       case ErrorToken:
-               return ""
-       case TextToken:
-               return EscapeString(t.Data)
-       case StartTagToken:
-               return "<" + t.tagString() + ">"
-       case EndTagToken:
-               return "</" + t.tagString() + ">"
-       case SelfClosingTagToken:
-               return "<" + t.tagString() + "/>"
-       case CommentToken:
-               return "<!--" + escapeCommentString(t.Data) + "-->"
-       case DoctypeToken:
-               return "<!DOCTYPE " + EscapeString(t.Data) + ">"
-       }
-       return "Invalid(" + strconv.Itoa(int(t.Type)) + ")"
-}
-
-// span is a range of bytes in a Tokenizer's buffer. The start is inclusive,
-// the end is exclusive.
-type span struct {
-       start, end int
-}
-
-// A Tokenizer returns a stream of HTML Tokens.
-type Tokenizer struct {
-       // r is the source of the HTML text.
-       r io.Reader
-       // tt is the TokenType of the current token.
-       tt TokenType
-       // err is the first error encountered during tokenization. It is possible
-       // for tt != Error && err != nil to hold: this means that Next returned a
-       // valid token but the subsequent Next call will return an error token.
-       // For example, if the HTML text input was just "plain", then the first
-       // Next call would set z.err to io.EOF but return a TextToken, and all
-       // subsequent Next calls would return an ErrorToken.
-       // err is never reset. Once it becomes non-nil, it stays non-nil.
-       err error
-       // readErr is the error returned by the io.Reader r. It is separate from
-       // err because it is valid for an io.Reader to return (n int, err1 error)
-       // such that n > 0 && err1 != nil, and callers should always process the
-       // n > 0 bytes before considering the error err1.
-       readErr error
-       // buf[raw.start:raw.end] holds the raw bytes of the current token.
-       // buf[raw.end:] is buffered input that will yield future tokens.
-       raw span
-       buf []byte
-       // maxBuf limits the data buffered in buf. A value of 0 means unlimited.
-       maxBuf int
-       // buf[data.start:data.end] holds the raw bytes of the current token's data:
-       // a text token's text, a tag token's tag name, etc.
-       data span
-       // pendingAttr is the attribute key and value currently being tokenized.
-       // When complete, pendingAttr is pushed onto attr. nAttrReturned is
-       // incremented on each call to TagAttr.
-       pendingAttr   [2]span
-       attr          [][2]span
-       nAttrReturned int
-       // rawTag is the "script" in "</script>" that closes the next token. If
-       // non-empty, the subsequent call to Next will return a raw or RCDATA text
-       // token: one that treats "<p>" as text instead of an element.
-       // rawTag's contents are lower-cased.
-       rawTag string
-       // textIsRaw is whether the current text token's data is not escaped.
-       textIsRaw bool
-       // convertNUL is whether NUL bytes in the current token's data should
-       // be converted into \ufffd replacement characters.
-       convertNUL bool
-       // allowCDATA is whether CDATA sections are allowed in the current context.
-       allowCDATA bool
-}
-
-// AllowCDATA sets whether or not the tokenizer recognizes <![CDATA[foo]]> as
-// the text "foo". The default value is false, which means to recognize it as
-// a bogus comment "<!-- [CDATA[foo]] -->" instead.
-//
-// Strictly speaking, an HTML5 compliant tokenizer should allow CDATA if and
-// only if tokenizing foreign content, such as MathML and SVG. However,
-// tracking foreign-contentness is difficult to do purely in the tokenizer,
-// as opposed to the parser, due to HTML integration points: an <svg> element
-// can contain a <foreignObject> that is foreign-to-SVG but not foreign-to-
-// HTML. For strict compliance with the HTML5 tokenization algorithm, it is the
-// responsibility of the user of a tokenizer to call AllowCDATA as appropriate.
-// In practice, if using the tokenizer without caring whether MathML or SVG
-// CDATA is text or comments, such as tokenizing HTML to find all the anchor
-// text, it is acceptable to ignore this responsibility.
-func (z *Tokenizer) AllowCDATA(allowCDATA bool) {
-       z.allowCDATA = allowCDATA
-}
-
-// NextIsNotRawText instructs the tokenizer that the next token should not be
-// considered as 'raw text'. Some elements, such as script and title elements,
-// normally require the next token after the opening tag to be 'raw text' that
-// has no child elements. For example, tokenizing "<title>a<b>c</b>d</title>"
-// yields a start tag token for "<title>", a text token for "a<b>c</b>d", and
-// an end tag token for "</title>". There are no distinct start tag or end tag
-// tokens for the "<b>" and "</b>".
-//
-// This tokenizer implementation will generally look for raw text at the right
-// times. Strictly speaking, an HTML5 compliant tokenizer should not look for
-// raw text if in foreign content: <title> generally needs raw text, but a
-// <title> inside an <svg> does not. Another example is that a <textarea>
-// generally needs raw text, but a <textarea> is not allowed as an immediate
-// child of a <select>; in normal parsing, a <textarea> implies </select>, but
-// one cannot close the implicit element when parsing a <select>'s InnerHTML.
-// Similarly to AllowCDATA, tracking the correct moment to override raw-text-
-// ness is difficult to do purely in the tokenizer, as opposed to the parser.
-// For strict compliance with the HTML5 tokenization algorithm, it is the
-// responsibility of the user of a tokenizer to call NextIsNotRawText as
-// appropriate. In practice, like AllowCDATA, it is acceptable to ignore this
-// responsibility for basic usage.
-//
-// Note that this 'raw text' concept is different from the one offered by the
-// Tokenizer.Raw method.
-func (z *Tokenizer) NextIsNotRawText() {
-       z.rawTag = ""
-}
-
-// Err returns the error associated with the most recent ErrorToken token.
-// This is typically io.EOF, meaning the end of tokenization.
-func (z *Tokenizer) Err() error {
-       if z.tt != ErrorToken {
-               return nil
-       }
-       return z.err
-}
-
-// readByte returns the next byte from the input stream, doing a buffered read
-// from z.r into z.buf if necessary. z.buf[z.raw.start:z.raw.end] remains a contiguous byte
-// slice that holds all the bytes read so far for the current token.
-// It sets z.err if the underlying reader returns an error.
-// Pre-condition: z.err == nil.
-func (z *Tokenizer) readByte() byte {
-       if z.raw.end >= len(z.buf) {
-               // Our buffer is exhausted and we have to read from z.r. Check if the
-               // previous read resulted in an error.
-               if z.readErr != nil {
-                       z.err = z.readErr
-                       return 0
-               }
-               // We copy z.buf[z.raw.start:z.raw.end] to the beginning of z.buf. If the length
-               // z.raw.end - z.raw.start is more than half the capacity of z.buf, then we
-               // allocate a new buffer before the copy.
-               c := cap(z.buf)
-               d := z.raw.end - z.raw.start
-               var buf1 []byte
-               if 2*d > c {
-                       buf1 = make([]byte, d, 2*c)
-               } else {
-                       buf1 = z.buf[:d]
-               }
-               copy(buf1, z.buf[z.raw.start:z.raw.end])
-               if x := z.raw.start; x != 0 {
-                       // Adjust the data/attr spans to refer to the same contents after the copy.
-                       z.data.start -= x
-                       z.data.end -= x
-                       z.pendingAttr[0].start -= x
-                       z.pendingAttr[0].end -= x
-                       z.pendingAttr[1].start -= x
-                       z.pendingAttr[1].end -= x
-                       for i := range z.attr {
-                               z.attr[i][0].start -= x
-                               z.attr[i][0].end -= x
-                               z.attr[i][1].start -= x
-                               z.attr[i][1].end -= x
-                       }
-               }
-               z.raw.start, z.raw.end, z.buf = 0, d, buf1[:d]
-               // Now that we have copied the live bytes to the start of the buffer,
-               // we read from z.r into the remainder.
-               var n int
-               n, z.readErr = readAtLeastOneByte(z.r, buf1[d:cap(buf1)])
-               if n == 0 {
-                       z.err = z.readErr
-                       return 0
-               }
-               z.buf = buf1[:d+n]
-       }
-       x := z.buf[z.raw.end]
-       z.raw.end++
-       if z.maxBuf > 0 && z.raw.end-z.raw.start >= z.maxBuf {
-               z.err = ErrBufferExceeded
-               return 0
-       }
-       return x
-}
-
-// Buffered returns a slice containing data buffered but not yet tokenized.
-func (z *Tokenizer) Buffered() []byte {
-       return z.buf[z.raw.end:]
-}
-
-// readAtLeastOneByte wraps an io.Reader so that reading cannot return (0, nil).
-// It returns io.ErrNoProgress if the underlying r.Read method returns (0, nil)
-// too many times in succession.
-func readAtLeastOneByte(r io.Reader, b []byte) (int, error) {
-       for i := 0; i < 100; i++ {
-               if n, err := r.Read(b); n != 0 || err != nil {
-                       return n, err
-               }
-       }
-       return 0, io.ErrNoProgress
-}
-
-// skipWhiteSpace skips past any white space.
-func (z *Tokenizer) skipWhiteSpace() {
-       if z.err != nil {
-               return
-       }
-       for {
-               c := z.readByte()
-               if z.err != nil {
-                       return
-               }
-               switch c {
-               case ' ', '\n', '\r', '\t', '\f':
-                       // No-op.
-               default:
-                       z.raw.end--
-                       return
-               }
-       }
-}
-
-// readRawOrRCDATA reads until the next "</foo>", where "foo" is z.rawTag and
-// is typically something like "script" or "textarea".
-func (z *Tokenizer) readRawOrRCDATA() {
-       if z.rawTag == "script" {
-               z.readScript()
-               z.textIsRaw = true
-               z.rawTag = ""
-               return
-       }
-loop:
-       for {
-               c := z.readByte()
-               if z.err != nil {
-                       break loop
-               }
-               if c != '<' {
-                       continue loop
-               }
-               c = z.readByte()
-               if z.err != nil {
-                       break loop
-               }
-               if c != '/' {
-                       z.raw.end--
-                       continue loop
-               }
-               if z.readRawEndTag() || z.err != nil {
-                       break loop
-               }
-       }
-       z.data.end = z.raw.end
-       // A textarea's or title's RCDATA can contain escaped entities.
-       z.textIsRaw = z.rawTag != "textarea" && z.rawTag != "title"
-       z.rawTag = ""
-}
-
-// readRawEndTag attempts to read a tag like "</foo>", where "foo" is z.rawTag.
-// If it succeeds, it backs up the input position to reconsume the tag and
-// returns true. Otherwise it returns false. The opening "</" has already been
-// consumed.
-func (z *Tokenizer) readRawEndTag() bool {
-       for i := 0; i < len(z.rawTag); i++ {
-               c := z.readByte()
-               if z.err != nil {
-                       return false
-               }
-               if c != z.rawTag[i] && c != z.rawTag[i]-('a'-'A') {
-                       z.raw.end--
-                       return false
-               }
-       }
-       c := z.readByte()
-       if z.err != nil {
-               return false
-       }
-       switch c {
-       case ' ', '\n', '\r', '\t', '\f', '/', '>':
-               // The 3 is 2 for the leading "</" plus 1 for the trailing character c.
-               z.raw.end -= 3 + len(z.rawTag)
-               return true
-       }
-       z.raw.end--
-       return false
-}
-
-// readScript reads until the next </script> tag, following the byzantine
-// rules for escaping/hiding the closing tag.
-func (z *Tokenizer) readScript() {
-       defer func() {
-               z.data.end = z.raw.end
-       }()
-       var c byte
-
-scriptData:
-       c = z.readByte()
-       if z.err != nil {
-               return
-       }
-       if c == '<' {
-               goto scriptDataLessThanSign
-       }
-       goto scriptData
-
-scriptDataLessThanSign:
-       c = z.readByte()
-       if z.err != nil {
-               return
-       }
-       switch c {
-       case '/':
-               goto scriptDataEndTagOpen
-       case '!':
-               goto scriptDataEscapeStart
-       }
-       z.raw.end--
-       goto scriptData
-
-scriptDataEndTagOpen:
-       if z.readRawEndTag() || z.err != nil {
-               return
-       }
-       goto scriptData
-
-scriptDataEscapeStart:
-       c = z.readByte()
-       if z.err != nil {
-               return
-       }
-       if c == '-' {
-               goto scriptDataEscapeStartDash
-       }
-       z.raw.end--
-       goto scriptData
-
-scriptDataEscapeStartDash:
-       c = z.readByte()
-       if z.err != nil {
-               return
-       }
-       if c == '-' {
-               goto scriptDataEscapedDashDash
-       }
-       z.raw.end--
-       goto scriptData
-
-scriptDataEscaped:
-       c = z.readByte()
-       if z.err != nil {
-               return
-       }
-       switch c {
-       case '-':
-               goto scriptDataEscapedDash
-       case '<':
-               goto scriptDataEscapedLessThanSign
-       }
-       goto scriptDataEscaped
-
-scriptDataEscapedDash:
-       c = z.readByte()
-       if z.err != nil {
-               return
-       }
-       switch c {
-       case '-':
-               goto scriptDataEscapedDashDash
-       case '<':
-               goto scriptDataEscapedLessThanSign
-       }
-       goto scriptDataEscaped
-
-scriptDataEscapedDashDash:
-       c = z.readByte()
-       if z.err != nil {
-               return
-       }
-       switch c {
-       case '-':
-               goto scriptDataEscapedDashDash
-       case '<':
-               goto scriptDataEscapedLessThanSign
-       case '>':
-               goto scriptData
-       }
-       goto scriptDataEscaped
-
-scriptDataEscapedLessThanSign:
-       c = z.readByte()
-       if z.err != nil {
-               return
-       }
-       if c == '/' {
-               goto scriptDataEscapedEndTagOpen
-       }
-       if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' {
-               goto scriptDataDoubleEscapeStart
-       }
-       z.raw.end--
-       goto scriptData
-
-scriptDataEscapedEndTagOpen:
-       if z.readRawEndTag() || z.err != nil {
-               return
-       }
-       goto scriptDataEscaped
-
-scriptDataDoubleEscapeStart:
-       z.raw.end--
-       for i := 0; i < len("script"); i++ {
-               c = z.readByte()
-               if z.err != nil {
-                       return
-               }
-               if c != "script"[i] && c != "SCRIPT"[i] {
-                       z.raw.end--
-                       goto scriptDataEscaped
-               }
-       }
-       c = z.readByte()
-       if z.err != nil {
-               return
-       }
-       switch c {
-       case ' ', '\n', '\r', '\t', '\f', '/', '>':
-               goto scriptDataDoubleEscaped
-       }
-       z.raw.end--
-       goto scriptDataEscaped
-
-scriptDataDoubleEscaped:
-       c = z.readByte()
-       if z.err != nil {
-               return
-       }
-       switch c {
-       case '-':
-               goto scriptDataDoubleEscapedDash
-       case '<':
-               goto scriptDataDoubleEscapedLessThanSign
-       }
-       goto scriptDataDoubleEscaped
-
-scriptDataDoubleEscapedDash:
-       c = z.readByte()
-       if z.err != nil {
-               return
-       }
-       switch c {
-       case '-':
-               goto scriptDataDoubleEscapedDashDash
-       case '<':
-               goto scriptDataDoubleEscapedLessThanSign
-       }
-       goto scriptDataDoubleEscaped
-
-scriptDataDoubleEscapedDashDash:
-       c = z.readByte()
-       if z.err != nil {
-               return
-       }
-       switch c {
-       case '-':
-               goto scriptDataDoubleEscapedDashDash
-       case '<':
-               goto scriptDataDoubleEscapedLessThanSign
-       case '>':
-               goto scriptData
-       }
-       goto scriptDataDoubleEscaped
-
-scriptDataDoubleEscapedLessThanSign:
-       c = z.readByte()
-       if z.err != nil {
-               return
-       }
-       if c == '/' {
-               goto scriptDataDoubleEscapeEnd
-       }
-       z.raw.end--
-       goto scriptDataDoubleEscaped
-
-scriptDataDoubleEscapeEnd:
-       if z.readRawEndTag() {
-               z.raw.end += len("</script>")
-               goto scriptDataEscaped
-       }
-       if z.err != nil {
-               return
-       }
-       goto scriptDataDoubleEscaped
-}
-
-// readComment reads the next comment token starting with "<!--". The opening
-// "<!--" has already been consumed.
-func (z *Tokenizer) readComment() {
-       // When modifying this function, consider manually increasing the
-       // maxSuffixLen constant in func TestComments, from 6 to e.g. 9 or more.
-       // That increase should only be temporary, not committed, as it
-       // exponentially affects the test running time.
-
-       z.data.start = z.raw.end
-       defer func() {
-               if z.data.end < z.data.start {
-                       // It's a comment with no data, like <!-->.
-                       z.data.end = z.data.start
-               }
-       }()
-
-       var dashCount int
-       beginning := true
-       for {
-               c := z.readByte()
-               if z.err != nil {
-                       z.data.end = z.calculateAbruptCommentDataEnd()
-                       return
-               }
-               switch c {
-               case '-':
-                       dashCount++
-                       continue
-               case '>':
-                       if dashCount >= 2 || beginning {
-                               z.data.end = z.raw.end - len("-->")
-                               return
-                       }
-               case '!':
-                       if dashCount >= 2 {
-                               c = z.readByte()
-                               if z.err != nil {
-                                       z.data.end = z.calculateAbruptCommentDataEnd()
-                                       return
-                               } else if c == '>' {
-                                       z.data.end = z.raw.end - len("--!>")
-                                       return
-                               } else if c == '-' {
-                                       dashCount = 1
-                                       beginning = false
-                                       continue
-                               }
-                       }
-               }
-               dashCount = 0
-               beginning = false
-       }
-}
-
-func (z *Tokenizer) calculateAbruptCommentDataEnd() int {
-       raw := z.Raw()
-       const prefixLen = len("<!--")
-       if len(raw) >= prefixLen {
-               raw = raw[prefixLen:]
-               if hasSuffix(raw, "--!") {
-                       return z.raw.end - 3
-               } else if hasSuffix(raw, "--") {
-                       return z.raw.end - 2
-               } else if hasSuffix(raw, "-") {
-                       return z.raw.end - 1
-               }
-       }
-       return z.raw.end
-}
-
-func hasSuffix(b []byte, suffix string) bool {
-       if len(b) < len(suffix) {
-               return false
-       }
-       b = b[len(b)-len(suffix):]
-       for i := range b {
-               if b[i] != suffix[i] {
-                       return false
-               }
-       }
-       return true
-}
-
-// readUntilCloseAngle reads until the next ">".
-func (z *Tokenizer) readUntilCloseAngle() {
-       z.data.start = z.raw.end
-       for {
-               c := z.readByte()
-               if z.err != nil {
-                       z.data.end = z.raw.end
-                       return
-               }
-               if c == '>' {
-                       z.data.end = z.raw.end - len(">")
-                       return
-               }
-       }
-}
-
-// readMarkupDeclaration reads the next token starting with "<!". It might be
-// a "<!--comment-->", a "<!DOCTYPE foo>", a "<![CDATA[section]]>" or
-// "<!a bogus comment". The opening "<!" has already been consumed.
-func (z *Tokenizer) readMarkupDeclaration() TokenType {
-       z.data.start = z.raw.end
-       var c [2]byte
-       for i := 0; i < 2; i++ {
-               c[i] = z.readByte()
-               if z.err != nil {
-                       z.data.end = z.raw.end
-                       return CommentToken
-               }
-       }
-       if c[0] == '-' && c[1] == '-' {
-               z.readComment()
-               return CommentToken
-       }
-       z.raw.end -= 2
-       if z.readDoctype() {
-               return DoctypeToken
-       }
-       if z.allowCDATA && z.readCDATA() {
-               z.convertNUL = true
-               return TextToken
-       }
-       // It's a bogus comment.
-       z.readUntilCloseAngle()
-       return CommentToken
-}
-
-// readDoctype attempts to read a doctype declaration and returns true if
-// successful. The opening "<!" has already been consumed.
-func (z *Tokenizer) readDoctype() bool {
-       const s = "DOCTYPE"
-       for i := 0; i < len(s); i++ {
-               c := z.readByte()
-               if z.err != nil {
-                       z.data.end = z.raw.end
-                       return false
-               }
-               if c != s[i] && c != s[i]+('a'-'A') {
-                       // Back up to read the fragment of "DOCTYPE" again.
-                       z.raw.end = z.data.start
-                       return false
-               }
-       }
-       if z.skipWhiteSpace(); z.err != nil {
-               z.data.start = z.raw.end
-               z.data.end = z.raw.end
-               return true
-       }
-       z.readUntilCloseAngle()
-       return true
-}
-
-// readCDATA attempts to read a CDATA section and returns true if
-// successful. The opening "<!" has already been consumed.
-func (z *Tokenizer) readCDATA() bool {
-       const s = "[CDATA["
-       for i := 0; i < len(s); i++ {
-               c := z.readByte()
-               if z.err != nil {
-                       z.data.end = z.raw.end
-                       return false
-               }
-               if c != s[i] {
-                       // Back up to read the fragment of "[CDATA[" again.
-                       z.raw.end = z.data.start
-                       return false
-               }
-       }
-       z.data.start = z.raw.end
-       brackets := 0
-       for {
-               c := z.readByte()
-               if z.err != nil {
-                       z.data.end = z.raw.end
-                       return true
-               }
-               switch c {
-               case ']':
-                       brackets++
-               case '>':
-                       if brackets >= 2 {
-                               z.data.end = z.raw.end - len("]]>")
-                               return true
-                       }
-                       brackets = 0
-               default:
-                       brackets = 0
-               }
-       }
-}
-
-// startTagIn returns whether the start tag in z.buf[z.data.start:z.data.end]
-// case-insensitively matches any element of ss.
-func (z *Tokenizer) startTagIn(ss ...string) bool {
-loop:
-       for _, s := range ss {
-               if z.data.end-z.data.start != len(s) {
-                       continue loop
-               }
-               for i := 0; i < len(s); i++ {
-                       c := z.buf[z.data.start+i]
-                       if 'A' <= c && c <= 'Z' {
-                               c += 'a' - 'A'
-                       }
-                       if c != s[i] {
-                               continue loop
-                       }
-               }
-               return true
-       }
-       return false
-}
-
-// readStartTag reads the next start tag token. The opening "<a" has already
-// been consumed, where 'a' means anything in [A-Za-z].
-func (z *Tokenizer) readStartTag() TokenType {
-       z.readTag(true)
-       if z.err != nil {
-               return ErrorToken
-       }
-       // Several tags flag the tokenizer's next token as raw.
-       c, raw := z.buf[z.data.start], false
-       if 'A' <= c && c <= 'Z' {
-               c += 'a' - 'A'
-       }
-       switch c {
-       case 'i':
-               raw = z.startTagIn("iframe")
-       case 'n':
-               raw = z.startTagIn("noembed", "noframes", "noscript")
-       case 'p':
-               raw = z.startTagIn("plaintext")
-       case 's':
-               raw = z.startTagIn("script", "style")
-       case 't':
-               raw = z.startTagIn("textarea", "title")
-       case 'x':
-               raw = z.startTagIn("xmp")
-       }
-       if raw {
-               z.rawTag = strings.ToLower(string(z.buf[z.data.start:z.data.end]))
-       }
-       // Look for a self-closing token like "<br/>".
-       if z.err == nil && z.buf[z.raw.end-2] == '/' {
-               return SelfClosingTagToken
-       }
-       return StartTagToken
-}
-
-// readTag reads the next tag token and its attributes. If saveAttr, those
-// attributes are saved in z.attr, otherwise z.attr is set to an empty slice.
-// The opening "<a" or "</a" has already been consumed, where 'a' means anything
-// in [A-Za-z].
-func (z *Tokenizer) readTag(saveAttr bool) {
-       z.attr = z.attr[:0]
-       z.nAttrReturned = 0
-       // Read the tag name and attribute key/value pairs.
-       z.readTagName()
-       if z.skipWhiteSpace(); z.err != nil {
-               return
-       }
-       for {
-               c := z.readByte()
-               if z.err != nil || c == '>' {
-                       break
-               }
-               z.raw.end--
-               z.readTagAttrKey()
-               z.readTagAttrVal()
-               // Save pendingAttr if saveAttr and that attribute has a non-empty key.
-               if saveAttr && z.pendingAttr[0].start != z.pendingAttr[0].end {
-                       z.attr = append(z.attr, z.pendingAttr)
-               }
-               if z.skipWhiteSpace(); z.err != nil {
-                       break
-               }
-       }
-}
-
-// readTagName sets z.data to the "div" in "<div k=v>". The reader (z.raw.end)
-// is positioned such that the first byte of the tag name (the "d" in "<div")
-// has already been consumed.
-func (z *Tokenizer) readTagName() {
-       z.data.start = z.raw.end - 1
-       for {
-               c := z.readByte()
-               if z.err != nil {
-                       z.data.end = z.raw.end
-                       return
-               }
-               switch c {
-               case ' ', '\n', '\r', '\t', '\f':
-                       z.data.end = z.raw.end - 1
-                       return
-               case '/', '>':
-                       z.raw.end--
-                       z.data.end = z.raw.end
-                       return
-               }
-       }
-}
-
-// readTagAttrKey sets z.pendingAttr[0] to the "k" in "<div k=v>".
-// Precondition: z.err == nil.
-func (z *Tokenizer) readTagAttrKey() {
-       z.pendingAttr[0].start = z.raw.end
-       for {
-               c := z.readByte()
-               if z.err != nil {
-                       z.pendingAttr[0].end = z.raw.end
-                       return
-               }
-               switch c {
-               case '=':
-                       if z.pendingAttr[0].start+1 == z.raw.end {
-                               // WHATWG 13.2.5.32, if we see an equals sign before the attribute name
-                               // begins, we treat it as a character in the attribute name and continue.
-                               continue
-                       }
-                       fallthrough
-               case ' ', '\n', '\r', '\t', '\f', '/', '>':
-                       // WHATWG 13.2.5.33 Attribute name state
-                       // We need to reconsume the char in the after attribute name state to support the / character
-                       z.raw.end--
-                       z.pendingAttr[0].end = z.raw.end
-                       return
-               }
-       }
-}
-
-// readTagAttrVal sets z.pendingAttr[1] to the "v" in "<div k=v>".
-func (z *Tokenizer) readTagAttrVal() {
-       z.pendingAttr[1].start = z.raw.end
-       z.pendingAttr[1].end = z.raw.end
-       if z.skipWhiteSpace(); z.err != nil {
-               return
-       }
-       c := z.readByte()
-       if z.err != nil {
-               return
-       }
-       if c == '/' {
-               // WHATWG 13.2.5.34 After attribute name state
-               // U+002F SOLIDUS (/) - Switch to the self-closing start tag state.
-               return
-       }
-       if c != '=' {
-               z.raw.end--
-               return
-       }
-       if z.skipWhiteSpace(); z.err != nil {
-               return
-       }
-       quote := z.readByte()
-       if z.err != nil {
-               return
-       }
-       switch quote {
-       case '>':
-               z.raw.end--
-               return
-
-       case '\'', '"':
-               z.pendingAttr[1].start = z.raw.end
-               for {
-                       c := z.readByte()
-                       if z.err != nil {
-                               z.pendingAttr[1].end = z.raw.end
-                               return
-                       }
-                       if c == quote {
-                               z.pendingAttr[1].end = z.raw.end - 1
-                               return
-                       }
-               }
-
-       default:
-               z.pendingAttr[1].start = z.raw.end - 1
-               for {
-                       c := z.readByte()
-                       if z.err != nil {
-                               z.pendingAttr[1].end = z.raw.end
-                               return
-                       }
-                       switch c {
-                       case ' ', '\n', '\r', '\t', '\f':
-                               z.pendingAttr[1].end = z.raw.end - 1
-                               return
-                       case '>':
-                               z.raw.end--
-                               z.pendingAttr[1].end = z.raw.end
-                               return
-                       }
-               }
-       }
-}
-
-// Next scans the next token and returns its type.
-func (z *Tokenizer) Next() TokenType {
-       z.raw.start = z.raw.end
-       z.data.start = z.raw.end
-       z.data.end = z.raw.end
-       if z.err != nil {
-               z.tt = ErrorToken
-               return z.tt
-       }
-       if z.rawTag != "" {
-               if z.rawTag == "plaintext" {
-                       // Read everything up to EOF.
-                       for z.err == nil {
-                               z.readByte()
-                       }
-                       z.data.end = z.raw.end
-                       z.textIsRaw = true
-               } else {
-                       z.readRawOrRCDATA()
-               }
-               if z.data.end > z.data.start {
-                       z.tt = TextToken
-                       z.convertNUL = true
-                       return z.tt
-               }
-       }
-       z.textIsRaw = false
-       z.convertNUL = false
-
-loop:
-       for {
-               c := z.readByte()
-               if z.err != nil {
-                       break loop
-               }
-               if c != '<' {
-                       continue loop
-               }
-
-               // Check if the '<' we have just read is part of a tag, comment
-               // or doctype. If not, it's part of the accumulated text token.
-               c = z.readByte()
-               if z.err != nil {
-                       break loop
-               }
-               var tokenType TokenType
-               switch {
-               case 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z':
-                       tokenType = StartTagToken
-               case c == '/':
-                       tokenType = EndTagToken
-               case c == '!' || c == '?':
-                       // We use CommentToken to mean any of "<!--actual comments-->",
-                       // "<!DOCTYPE declarations>" and "<?xml processing instructions?>".
-                       tokenType = CommentToken
-               default:
-                       // Reconsume the current character.
-                       z.raw.end--
-                       continue
-               }
-
-               // We have a non-text token, but we might have accumulated some text
-               // before that. If so, we return the text first, and return the non-
-               // text token on the subsequent call to Next.
-               if x := z.raw.end - len("<a"); z.raw.start < x {
-                       z.raw.end = x
-                       z.data.end = x
-                       z.tt = TextToken
-                       return z.tt
-               }
-               switch tokenType {
-               case StartTagToken:
-                       z.tt = z.readStartTag()
-                       return z.tt
-               case EndTagToken:
-                       c = z.readByte()
-                       if z.err != nil {
-                               break loop
-                       }
-                       if c == '>' {
-                               // "</>" does not generate a token at all. Generate an empty comment
-                               // to allow passthrough clients to pick up the data using Raw.
-                               // Reset the tokenizer state and start again.
-                               z.tt = CommentToken
-                               return z.tt
-                       }
-                       if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' {
-                               z.readTag(false)
-                               if z.err != nil {
-                                       z.tt = ErrorToken
-                               } else {
-                                       z.tt = EndTagToken
-                               }
-                               return z.tt
-                       }
-                       z.raw.end--
-                       z.readUntilCloseAngle()
-                       z.tt = CommentToken
-                       return z.tt
-               case CommentToken:
-                       if c == '!' {
-                               z.tt = z.readMarkupDeclaration()
-                               return z.tt
-                       }
-                       z.raw.end--
-                       z.readUntilCloseAngle()
-                       z.tt = CommentToken
-                       return z.tt
-               }
-       }
-       if z.raw.start < z.raw.end {
-               z.data.end = z.raw.end
-               z.tt = TextToken
-               return z.tt
-       }
-       z.tt = ErrorToken
-       return z.tt
-}
-
-// Raw returns the unmodified text of the current token. Calling Next, Token,
-// Text, TagName or TagAttr may change the contents of the returned slice.
-//
-// The token stream's raw bytes partition the byte stream (up until an
-// ErrorToken). There are no overlaps or gaps between two consecutive token's
-// raw bytes. One implication is that the byte offset of the current token is
-// the sum of the lengths of all previous tokens' raw bytes.
-func (z *Tokenizer) Raw() []byte {
-       return z.buf[z.raw.start:z.raw.end]
-}
-
-// convertNewlines converts "\r" and "\r\n" in s to "\n".
-// The conversion happens in place, but the resulting slice may be shorter.
-func convertNewlines(s []byte) []byte {
-       for i, c := range s {
-               if c != '\r' {
-                       continue
-               }
-
-               src := i + 1
-               if src >= len(s) || s[src] != '\n' {
-                       s[i] = '\n'
-                       continue
-               }
-
-               dst := i
-               for src < len(s) {
-                       if s[src] == '\r' {
-                               if src+1 < len(s) && s[src+1] == '\n' {
-                                       src++
-                               }
-                               s[dst] = '\n'
-                       } else {
-                               s[dst] = s[src]
-                       }
-                       src++
-                       dst++
-               }
-               return s[:dst]
-       }
-       return s
-}
-
-var (
-       nul         = []byte("\x00")
-       replacement = []byte("\ufffd")
-)
-
-// Text returns the unescaped text of a text, comment or doctype token. The
-// contents of the returned slice may change on the next call to Next.
-func (z *Tokenizer) Text() []byte {
-       switch z.tt {
-       case TextToken, CommentToken, DoctypeToken:
-               s := z.buf[z.data.start:z.data.end]
-               z.data.start = z.raw.end
-               z.data.end = z.raw.end
-               s = convertNewlines(s)
-               if (z.convertNUL || z.tt == CommentToken) && bytes.Contains(s, nul) {
-                       s = bytes.Replace(s, nul, replacement, -1)
-               }
-               if !z.textIsRaw {
-                       s = unescape(s, false)
-               }
-               return s
-       }
-       return nil
-}
-
-// TagName returns the lower-cased name of a tag token (the `img` out of
-// `<IMG SRC="foo">`) and whether the tag has attributes.
-// The contents of the returned slice may change on the next call to Next.
-func (z *Tokenizer) TagName() (name []byte, hasAttr bool) {
-       if z.data.start < z.data.end {
-               switch z.tt {
-               case StartTagToken, EndTagToken, SelfClosingTagToken:
-                       s := z.buf[z.data.start:z.data.end]
-                       z.data.start = z.raw.end
-                       z.data.end = z.raw.end
-                       return lower(s), z.nAttrReturned < len(z.attr)
-               }
-       }
-       return nil, false
-}
-
-// TagAttr returns the lower-cased key and unescaped value of the next unparsed
-// attribute for the current tag token and whether there are more attributes.
-// The contents of the returned slices may change on the next call to Next.
-func (z *Tokenizer) TagAttr() (key, val []byte, moreAttr bool) {
-       if z.nAttrReturned < len(z.attr) {
-               switch z.tt {
-               case StartTagToken, SelfClosingTagToken:
-                       x := z.attr[z.nAttrReturned]
-                       z.nAttrReturned++
-                       key = z.buf[x[0].start:x[0].end]
-                       val = z.buf[x[1].start:x[1].end]
-                       return key, unescape(convertNewlines(val), true), z.nAttrReturned < len(z.attr)
-               }
-       }
-       return nil, nil, false
-}
-
-// Token returns the current Token. The result's Data and Attr values remain
-// valid after subsequent Next calls.
-func (z *Tokenizer) Token() Token {
-       t := Token{Type: z.tt}
-       switch z.tt {
-       case TextToken, CommentToken, DoctypeToken:
-               t.Data = string(z.Text())
-       case StartTagToken, SelfClosingTagToken, EndTagToken:
-               name, moreAttr := z.TagName()
-               for moreAttr {
-                       var key, val []byte
-                       key, val, moreAttr = z.TagAttr()
-                       t.Attr = append(t.Attr, Attribute{"", atom.String(key), string(val)})
-               }
-               if a := atom.Lookup(name); a != 0 {
-                       t.DataAtom, t.Data = a, a.String()
-               } else {
-                       t.DataAtom, t.Data = 0, string(name)
-               }
-       }
-       return t
-}
-
-// SetMaxBuf sets a limit on the amount of data buffered during tokenization.
-// A value of 0 means unlimited.
-func (z *Tokenizer) SetMaxBuf(n int) {
-       z.maxBuf = n
-}
-
-// NewTokenizer returns a new HTML Tokenizer for the given Reader.
-// The input is assumed to be UTF-8 encoded.
-func NewTokenizer(r io.Reader) *Tokenizer {
-       return NewTokenizerFragment(r, "")
-}
-
-// NewTokenizerFragment returns a new HTML Tokenizer for the given Reader, for
-// tokenizing an existing element's InnerHTML fragment. contextTag is that
-// element's tag, such as "div" or "iframe".
-//
-// For example, how the InnerHTML "a<b" is tokenized depends on whether it is
-// for a <p> tag or a <script> tag.
-//
-// The input is assumed to be UTF-8 encoded.
-func NewTokenizerFragment(r io.Reader, contextTag string) *Tokenizer {
-       z := &Tokenizer{
-               r:   r,
-               buf: make([]byte, 0, 4096),
-       }
-       if contextTag != "" {
-               switch s := strings.ToLower(contextTag); s {
-               case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "title", "textarea", "xmp":
-                       z.rawTag = s
-               }
-       }
-       return z
-}
diff --git a/html/token_test.go b/html/token_test.go
deleted file mode 100644 (file)
index a36d112..0000000
+++ /dev/null
@@ -1,917 +0,0 @@
-// Copyright 2010 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package html
-
-import (
-       "bytes"
-       "io"
-       "os"
-       "reflect"
-       "runtime"
-       "strings"
-       "testing"
-)
-
-// https://github.com/golang/go/issues/58246
-const issue58246 = `<!--[if gte mso 12]>
-  <xml>
-      <o:OfficeDocumentSettings>
-      <o:AllowPNG/>
-      <o:PixelsPerInch>96</o:PixelsPerInch>
-      </o:OfficeDocumentSettings>
-    </xml>
-<![endif]-->`
-
-type tokenTest struct {
-       // A short description of the test case.
-       desc string
-       // The HTML to parse.
-       html string
-       // The string representations of the expected tokens, joined by '$'.
-       golden string
-}
-
-var tokenTests = []tokenTest{
-       {
-               "empty",
-               "",
-               "",
-       },
-       // A single text node. The tokenizer should not break text nodes on whitespace,
-       // nor should it normalize whitespace within a text node.
-       {
-               "text",
-               "foo  bar",
-               "foo  bar",
-       },
-       // An entity.
-       {
-               "entity",
-               "one &lt; two",
-               "one &lt; two",
-       },
-       // A start, self-closing and end tag. The tokenizer does not care if the start
-       // and end tokens don't match; that is the job of the parser.
-       {
-               "tags",
-               "<a>b<c/>d</e>",
-               "<a>$b$<c/>$d$</e>",
-       },
-       // Angle brackets that aren't a tag.
-       {
-               "not a tag #0",
-               "<",
-               "&lt;",
-       },
-       {
-               "not a tag #1",
-               "</",
-               "&lt;/",
-       },
-       {
-               "not a tag #2",
-               "</>",
-               "<!---->",
-       },
-       {
-               "not a tag #3",
-               "a</>b",
-               "a$<!---->$b",
-       },
-       {
-               "not a tag #4",
-               "</ >",
-               "<!-- -->",
-       },
-       {
-               "not a tag #5",
-               "</.",
-               "<!--.-->",
-       },
-       {
-               "not a tag #6",
-               "</.>",
-               "<!--.-->",
-       },
-       {
-               "not a tag #7",
-               "a < b",
-               "a &lt; b",
-       },
-       {
-               "not a tag #8",
-               "<.>",
-               "&lt;.&gt;",
-       },
-       {
-               "not a tag #9",
-               "a<<<b>>>c",
-               "a&lt;&lt;$<b>$&gt;&gt;c",
-       },
-       {
-               "not a tag #10",
-               "if x<0 and y < 0 then x*y>0",
-               "if x&lt;0 and y &lt; 0 then x*y&gt;0",
-       },
-       {
-               "not a tag #11",
-               "<<p>",
-               "&lt;$<p>",
-       },
-       // EOF in a tag name.
-       {
-               "tag name eof #0",
-               "<a",
-               "",
-       },
-       {
-               "tag name eof #1",
-               "<a ",
-               "",
-       },
-       {
-               "tag name eof #2",
-               "a<b",
-               "a",
-       },
-       {
-               "tag name eof #3",
-               "<a><b",
-               "<a>",
-       },
-       {
-               "tag name eof #4",
-               `<a x`,
-               ``,
-       },
-       // Some malformed tags that are missing a '>'.
-       {
-               "malformed tag #0",
-               `<p</p>`,
-               `<p< p="">`,
-       },
-       {
-               "malformed tag #1",
-               `<p </p>`,
-               `<p <="" p="">`,
-       },
-       {
-               "malformed tag #2",
-               `<p id`,
-               ``,
-       },
-       {
-               "malformed tag #3",
-               `<p id=`,
-               ``,
-       },
-       {
-               "malformed tag #4",
-               `<p id=>`,
-               `<p id="">`,
-       },
-       {
-               "malformed tag #5",
-               `<p id=0`,
-               ``,
-       },
-       {
-               "malformed tag #6",
-               `<p id=0</p>`,
-               `<p id="0&lt;/p">`,
-       },
-       {
-               "malformed tag #7",
-               `<p id="0</p>`,
-               ``,
-       },
-       {
-               "malformed tag #8",
-               `<p id="0"</p>`,
-               `<p id="0" <="" p="">`,
-       },
-       {
-               "malformed tag #9",
-               `<p></p id`,
-               `<p>`,
-       },
-       // Raw text and RCDATA.
-       {
-               "basic raw text",
-               "<script><a></b></script>",
-               "<script>$&lt;a&gt;&lt;/b&gt;$</script>",
-       },
-       {
-               "unfinished script end tag",
-               "<SCRIPT>a</SCR",
-               "<script>$a&lt;/SCR",
-       },
-       {
-               "broken script end tag",
-               "<SCRIPT>a</SCR ipt>",
-               "<script>$a&lt;/SCR ipt&gt;",
-       },
-       {
-               "EOF in script end tag",
-               "<SCRIPT>a</SCRipt",
-               "<script>$a&lt;/SCRipt",
-       },
-       {
-               "scriptx end tag",
-               "<SCRIPT>a</SCRiptx",
-               "<script>$a&lt;/SCRiptx",
-       },
-       {
-               "' ' completes script end tag",
-               "<SCRIPT>a</SCRipt ",
-               "<script>$a",
-       },
-       {
-               "'>' completes script end tag",
-               "<SCRIPT>a</SCRipt>",
-               "<script>$a$</script>",
-       },
-       {
-               "self-closing script end tag",
-               "<SCRIPT>a</SCRipt/>",
-               "<script>$a$</script>",
-       },
-       {
-               "nested script tag",
-               "<SCRIPT>a</SCRipt<script>",
-               "<script>$a&lt;/SCRipt&lt;script&gt;",
-       },
-       {
-               "script end tag after unfinished",
-               "<SCRIPT>a</SCRipt</script>",
-               "<script>$a&lt;/SCRipt$</script>",
-       },
-       {
-               "script/style mismatched tags",
-               "<script>a</style>",
-               "<script>$a&lt;/style&gt;",
-       },
-       {
-               "style element with entity",
-               "<style>&apos;",
-               "<style>$&amp;apos;",
-       },
-       {
-               "textarea with tag",
-               "<textarea><div></textarea>",
-               "<textarea>$&lt;div&gt;$</textarea>",
-       },
-       {
-               "title with tag and entity",
-               "<title><b>K&amp;R C</b></title>",
-               "<title>$&lt;b&gt;K&amp;R C&lt;/b&gt;$</title>",
-       },
-       {
-               "title with trailing '&lt;' entity",
-               "<title>foobar<</title>",
-               "<title>$foobar&lt;$</title>",
-       },
-       // DOCTYPE tests.
-       {
-               "Proper DOCTYPE",
-               "<!DOCTYPE html>",
-               "<!DOCTYPE html>",
-       },
-       {
-               "DOCTYPE with no space",
-               "<!doctypehtml>",
-               "<!DOCTYPE html>",
-       },
-       {
-               "DOCTYPE with two spaces",
-               "<!doctype  html>",
-               "<!DOCTYPE html>",
-       },
-       {
-               "looks like DOCTYPE but isn't",
-               "<!DOCUMENT html>",
-               "<!--DOCUMENT html-->",
-       },
-       {
-               "DOCTYPE at EOF",
-               "<!DOCtype",
-               "<!DOCTYPE >",
-       },
-       // XML processing instructions.
-       {
-               "XML processing instruction",
-               "<?xml?>",
-               "<!--?xml?-->",
-       },
-       // Comments. See also func TestComments.
-       {
-               "comment0",
-               "abc<b><!-- skipme --></b>def",
-               "abc$<b>$<!-- skipme -->$</b>$def",
-       },
-       {
-               "comment1",
-               "a<!-->z",
-               "a$<!---->$z",
-       },
-       {
-               "comment2",
-               "a<!--->z",
-               "a$<!---->$z",
-       },
-       {
-               "comment3",
-               "a<!--x>-->z",
-               "a$<!--x>-->$z",
-       },
-       {
-               "comment4",
-               "a<!--x->-->z",
-               "a$<!--x-&gt;-->$z",
-       },
-       {
-               "comment5",
-               "a<!>z",
-               "a$<!---->$z",
-       },
-       {
-               "comment6",
-               "a<!->z",
-               "a$<!----->$z",
-       },
-       {
-               "comment7",
-               "a<!---<>z",
-               "a$<!---<>z-->",
-       },
-       {
-               "comment8",
-               "a<!--z",
-               "a$<!--z-->",
-       },
-       {
-               "comment9",
-               "a<!--z-",
-               "a$<!--z-->",
-       },
-       {
-               "comment10",
-               "a<!--z--",
-               "a$<!--z-->",
-       },
-       {
-               "comment11",
-               "a<!--z---",
-               "a$<!--z--->",
-       },
-       {
-               "comment12",
-               "a<!--z----",
-               "a$<!--z---->",
-       },
-       {
-               "comment13",
-               "a<!--x--!>z",
-               "a$<!--x-->$z",
-       },
-       {
-               "comment14",
-               "a<!--!-->z",
-               "a$<!--!-->$z",
-       },
-       {
-               "comment15",
-               "a<!-- !-->z",
-               "a$<!-- !-->$z",
-       },
-       {
-               "comment16",
-               "a<!--i\x00j-->z",
-               "a$<!--i\uFFFDj-->$z",
-       },
-       {
-               "comment17",
-               "a<!--\x00",
-               "a$<!--\uFFFD-->",
-       },
-       {
-               "comment18",
-               "a<!--<!-->z",
-               "a$<!--<!-->$z",
-       },
-       {
-               "comment19",
-               "a<!--<!--",
-               "a$<!--<!-->",
-       },
-       {
-               "comment20",
-               "a<!--ij--kl-->z",
-               "a$<!--ij--kl-->$z",
-       },
-       {
-               "comment21",
-               "a<!--ij--kl--!>z",
-               "a$<!--ij--kl-->$z",
-       },
-       {
-               "comment22",
-               "a<!--!--!<--!-->z",
-               "a$<!--!--!<--!-->$z",
-       },
-       {
-               "comment23",
-               "a<!--&gt;-->z",
-               "a$<!--&gt;-->$z",
-       },
-       {
-               "comment24",
-               "a<!--&gt;>x",
-               "a$<!--&gt;>x-->",
-       },
-       {
-               "comment25",
-               "a<!--&gt;&gt;",
-               "a$<!--&gt;>-->",
-       },
-       {
-               "comment26",
-               "a<!--&gt;&gt;-",
-               "a$<!--&gt;>-->",
-       },
-       {
-               "comment27",
-               "a<!--&gt;&gt;-->z",
-               "a$<!--&gt;>-->$z",
-       },
-       {
-               "comment28",
-               "a<!--&amp;&gt;-->z",
-               "a$<!--&amp;>-->$z",
-       },
-       {
-               "comment29",
-               "a<!--&amp;gt;-->z",
-               "a$<!--&amp;gt;-->$z",
-       },
-       {
-               "comment30",
-               "a<!--&nosuchentity;-->z",
-               "a$<!--&amp;nosuchentity;-->$z",
-       },
-       {
-               "comment31",
-               "a<!--i>>j-->z",
-               "a$<!--i>>j-->$z",
-       },
-       {
-               "comment32",
-               "a<!--i!>>j-->z",
-               "a$<!--i!&gt;>j-->$z",
-       },
-       // https://stackoverflow.design/email/base/mso/#targeting-specific-outlook-versions
-       // says "[For] Windows Outlook 2003 and above... conditional comments allow
-       // us to add bits of HTML that are only read by the Word-based versions of
-       // Outlook". These comments (with angle brackets) should pass through
-       // unchanged (by this Go package) when rendering.
-       //
-       // We should also still escape ">" as "&gt;" when necessary.
-       // https://github.com/golang/go/issues/48237
-       //
-       // The "your code" example below comes from that stackoverflow.design link
-       // above but note that it can contain angle-bracket-rich XML.
-       // https://github.com/golang/go/issues/58246
-       {
-               "issue48237CommentWithAmpgtsemi1",
-               "a<!--<p></p>&lt;!--[video]--&gt;-->z",
-               "a$<!--<p></p><!--[video]--&gt;-->$z",
-       },
-       {
-               "issue48237CommentWithAmpgtsemi2",
-               "a<!--<p></p>&lt;!--[video]--!&gt;-->z",
-               "a$<!--<p></p><!--[video]--!&gt;-->$z",
-       },
-       {
-               "issue58246MicrosoftOutlookComment1",
-               "a<!--[if mso]> your code <![endif]-->z",
-               "a$<!--[if mso]> your code <![endif]-->$z",
-       },
-       {
-               "issue58246MicrosoftOutlookComment2",
-               "a" + issue58246 + "z",
-               "a$" + issue58246 + "$z",
-       },
-       // An attribute with a backslash.
-       {
-               "backslash",
-               `<p id="a\"b">`,
-               `<p id="a\" b"="">`,
-       },
-       // Entities, tag name and attribute key lower-casing, and whitespace
-       // normalization within a tag.
-       {
-               "tricky",
-               "<p \t\n iD=\"a&quot;B\"  foo=\"bar\"><EM>te&lt;&amp;;xt</em></p>",
-               `<p id="a&#34;B" foo="bar">$<em>$te&lt;&amp;;xt$</em>$</p>`,
-       },
-       // A nonexistent entity. Tokenizing and converting back to a string should
-       // escape the "&" to become "&amp;".
-       {
-               "noSuchEntity",
-               `<a b="c&noSuchEntity;d">&lt;&alsoDoesntExist;&`,
-               `<a b="c&amp;noSuchEntity;d">$&lt;&amp;alsoDoesntExist;&amp;`,
-       },
-       {
-               "entity without semicolon",
-               `&notit;&notin;<a b="q=z&amp=5&notice=hello&not;=world">`,
-               `¬it;∉$<a b="q=z&amp;amp=5&amp;notice=hello¬=world">`,
-       },
-       {
-               "entity with digits",
-               "&frac12;",
-               "½",
-       },
-       // Attribute tests:
-       // http://dev.w3.org/html5/pf-summary/Overview.html#attributes
-       {
-               "Empty attribute",
-               `<input disabled FOO>`,
-               `<input disabled="" foo="">`,
-       },
-       {
-               "Empty attribute, whitespace",
-               `<input disabled FOO >`,
-               `<input disabled="" foo="">`,
-       },
-       {
-               "Unquoted attribute value",
-               `<input value=yes FOO=BAR>`,
-               `<input value="yes" foo="BAR">`,
-       },
-       {
-               "Unquoted attribute value, spaces",
-               `<input value = yes FOO = BAR>`,
-               `<input value="yes" foo="BAR">`,
-       },
-       {
-               "Unquoted attribute value, trailing space",
-               `<input value=yes FOO=BAR >`,
-               `<input value="yes" foo="BAR">`,
-       },
-       {
-               "Single-quoted attribute value",
-               `<input value='yes' FOO='BAR'>`,
-               `<input value="yes" foo="BAR">`,
-       },
-       {
-               "Single-quoted attribute value, trailing space",
-               `<input value='yes' FOO='BAR' >`,
-               `<input value="yes" foo="BAR">`,
-       },
-       {
-               "Double-quoted attribute value",
-               `<input value="I'm an attribute" FOO="BAR">`,
-               `<input value="I&#39;m an attribute" foo="BAR">`,
-       },
-       {
-               "Attribute name characters",
-               `<meta http-equiv="content-type">`,
-               `<meta http-equiv="content-type">`,
-       },
-       {
-               "Mixed attributes",
-               `a<P V="0 1" w='2' X=3 y>z`,
-               `a$<p v="0 1" w="2" x="3" y="">$z`,
-       },
-       {
-               "Attributes with a solitary single quote",
-               `<p id=can't><p id=won't>`,
-               `<p id="can&#39;t">$<p id="won&#39;t">`,
-       },
-       // WHATWG 13.2.5.32 equals sign before attribute name state
-       {
-               "equals sign before attribute name",
-               `<p  =>`,
-               `<p =="">`,
-       },
-       {
-               "equals sign before attribute name, extra cruft",
-               `<p  =asd>`,
-               `<p =asd="">`,
-       },
-       {
-               "forward slash before attribute name",
-               `<p/=">`,
-               `<p ="="">`,
-       },
-       {
-               "forward slash before attribute name with spaces around",
-               `<p / =">`,
-               `<p ="="">`,
-       },
-       {
-               "forward slash after attribute name followed by a character",
-               `<p a/ ="">`,
-               `<p a="" =""="">`,
-       },
-}
-
-func TestTokenizer(t *testing.T) {
-       for _, tt := range tokenTests {
-               t.Run(tt.desc, func(t *testing.T) {
-                       z := NewTokenizer(strings.NewReader(tt.html))
-                       if tt.golden != "" {
-                               for i, s := range strings.Split(tt.golden, "$") {
-                                       if z.Next() == ErrorToken {
-                                               t.Errorf("%s token %d: want %q got error %v", tt.desc, i, s, z.Err())
-                                               return
-                                       }
-                                       actual := z.Token().String()
-                                       if s != actual {
-                                               t.Errorf("%s token %d: want %q got %q", tt.desc, i, s, actual)
-                                               return
-                                       }
-                               }
-                       }
-                       z.Next()
-                       if z.Err() != io.EOF {
-                               t.Errorf("%s: want EOF got %q", tt.desc, z.Err())
-                       }
-               })
-       }
-}
-
-func TestMaxBuffer(t *testing.T) {
-       // Exceeding the maximum buffer size generates ErrBufferExceeded.
-       z := NewTokenizer(strings.NewReader("<" + strings.Repeat("t", 10)))
-       z.SetMaxBuf(5)
-       tt := z.Next()
-       if got, want := tt, ErrorToken; got != want {
-               t.Fatalf("token type: got: %v want: %v", got, want)
-       }
-       if got, want := z.Err(), ErrBufferExceeded; got != want {
-               t.Errorf("error type: got: %v want: %v", got, want)
-       }
-       if got, want := string(z.Raw()), "<tttt"; got != want {
-               t.Fatalf("buffered before overflow: got: %q want: %q", got, want)
-       }
-}
-
-func TestMaxBufferReconstruction(t *testing.T) {
-       // Exceeding the maximum buffer size at any point while tokenizing permits
-       // reconstructing the original input.
-tests:
-       for _, test := range tokenTests {
-               for maxBuf := 1; ; maxBuf++ {
-                       r := strings.NewReader(test.html)
-                       z := NewTokenizer(r)
-                       z.SetMaxBuf(maxBuf)
-                       var tokenized bytes.Buffer
-                       for {
-                               tt := z.Next()
-                               tokenized.Write(z.Raw())
-                               if tt == ErrorToken {
-                                       if err := z.Err(); err != io.EOF && err != ErrBufferExceeded {
-                                               t.Errorf("%s: unexpected error: %v", test.desc, err)
-                                       }
-                                       break
-                               }
-                       }
-                       // Anything tokenized along with untokenized input or data left in the reader.
-                       assembled, err := io.ReadAll(io.MultiReader(&tokenized, bytes.NewReader(z.Buffered()), r))
-                       if err != nil {
-                               t.Errorf("%s: ReadAll: %v", test.desc, err)
-                               continue tests
-                       }
-                       if got, want := string(assembled), test.html; got != want {
-                               t.Errorf("%s: reassembled html:\n got: %q\nwant: %q", test.desc, got, want)
-                               continue tests
-                       }
-                       // EOF indicates that we completed tokenization and hence found the max
-                       // maxBuf that generates ErrBufferExceeded, so continue to the next test.
-                       if z.Err() == io.EOF {
-                               break
-                       }
-               } // buffer sizes
-       } // tests
-}
-
-func TestPassthrough(t *testing.T) {
-       // Accumulating the raw output for each parse event should reconstruct the
-       // original input.
-       for _, test := range tokenTests {
-               z := NewTokenizer(strings.NewReader(test.html))
-               var parsed bytes.Buffer
-               for {
-                       tt := z.Next()
-                       parsed.Write(z.Raw())
-                       if tt == ErrorToken {
-                               break
-                       }
-               }
-               if got, want := parsed.String(), test.html; got != want {
-                       t.Errorf("%s: parsed output:\n got: %q\nwant: %q", test.desc, got, want)
-               }
-       }
-}
-
-func TestBufAPI(t *testing.T) {
-       s := "0<a>1</a>2<b>3<a>4<a>5</a>6</b>7</a>8<a/>9"
-       z := NewTokenizer(bytes.NewBufferString(s))
-       var result bytes.Buffer
-       depth := 0
-loop:
-       for {
-               tt := z.Next()
-               switch tt {
-               case ErrorToken:
-                       if z.Err() != io.EOF {
-                               t.Error(z.Err())
-                       }
-                       break loop
-               case TextToken:
-                       if depth > 0 {
-                               result.Write(z.Text())
-                       }
-               case StartTagToken, EndTagToken:
-                       tn, _ := z.TagName()
-                       if len(tn) == 1 && tn[0] == 'a' {
-                               if tt == StartTagToken {
-                                       depth++
-                               } else {
-                                       depth--
-                               }
-                       }
-               }
-       }
-       u := "14567"
-       v := string(result.Bytes())
-       if u != v {
-               t.Errorf("TestBufAPI: want %q got %q", u, v)
-       }
-}
-
-func TestConvertNewlines(t *testing.T) {
-       testCases := map[string]string{
-               "Mac\rDOS\r\nUnix\n":    "Mac\nDOS\nUnix\n",
-               "Unix\nMac\rDOS\r\n":    "Unix\nMac\nDOS\n",
-               "DOS\r\nDOS\r\nDOS\r\n": "DOS\nDOS\nDOS\n",
-               "":                      "",
-               "\n":                    "\n",
-               "\n\r":                  "\n\n",
-               "\r":                    "\n",
-               "\r\n":                  "\n",
-               "\r\n\n":                "\n\n",
-               "\r\n\r":                "\n\n",
-               "\r\n\r\n":              "\n\n",
-               "\r\r":                  "\n\n",
-               "\r\r\n":                "\n\n",
-               "\r\r\n\n":              "\n\n\n",
-               "\r\r\r\n":              "\n\n\n",
-               "\r \n":                 "\n \n",
-               "xyz":                   "xyz",
-       }
-       for in, want := range testCases {
-               if got := string(convertNewlines([]byte(in))); got != want {
-                       t.Errorf("input %q: got %q, want %q", in, got, want)
-               }
-       }
-}
-
-func TestReaderEdgeCases(t *testing.T) {
-       const s = "<p>An io.Reader can return (0, nil) or (n, io.EOF).</p>"
-       testCases := []io.Reader{
-               &zeroOneByteReader{s: s},
-               &eofStringsReader{s: s},
-               &stuckReader{},
-       }
-       for i, tc := range testCases {
-               got := []TokenType{}
-               z := NewTokenizer(tc)
-               for {
-                       tt := z.Next()
-                       if tt == ErrorToken {
-                               break
-                       }
-                       got = append(got, tt)
-               }
-               if err := z.Err(); err != nil && err != io.EOF {
-                       if err != io.ErrNoProgress {
-                               t.Errorf("i=%d: %v", i, err)
-                       }
-                       continue
-               }
-               want := []TokenType{
-                       StartTagToken,
-                       TextToken,
-                       EndTagToken,
-               }
-               if !reflect.DeepEqual(got, want) {
-                       t.Errorf("i=%d: got %v, want %v", i, got, want)
-                       continue
-               }
-       }
-}
-
-// zeroOneByteReader is like a strings.Reader that alternates between
-// returning 0 bytes and 1 byte at a time.
-type zeroOneByteReader struct {
-       s string
-       n int
-}
-
-func (r *zeroOneByteReader) Read(p []byte) (int, error) {
-       if len(p) == 0 {
-               return 0, nil
-       }
-       if len(r.s) == 0 {
-               return 0, io.EOF
-       }
-       r.n++
-       if r.n%2 != 0 {
-               return 0, nil
-       }
-       p[0], r.s = r.s[0], r.s[1:]
-       return 1, nil
-}
-
-// eofStringsReader is like a strings.Reader but can return an (n, err) where
-// n > 0 && err != nil.
-type eofStringsReader struct {
-       s string
-}
-
-func (r *eofStringsReader) Read(p []byte) (int, error) {
-       n := copy(p, r.s)
-       r.s = r.s[n:]
-       if r.s != "" {
-               return n, nil
-       }
-       return n, io.EOF
-}
-
-// stuckReader is an io.Reader that always returns no data and no error.
-type stuckReader struct{}
-
-func (*stuckReader) Read(p []byte) (int, error) {
-       return 0, nil
-}
-
-const (
-       rawLevel = iota
-       lowLevel
-       highLevel
-)
-
-func benchmarkTokenizer(b *testing.B, level int) {
-       buf, err := os.ReadFile("testdata/go1.html")
-       if err != nil {
-               b.Fatalf("could not read testdata/go1.html: %v", err)
-       }
-       b.SetBytes(int64(len(buf)))
-       runtime.GC()
-       b.ReportAllocs()
-       b.ResetTimer()
-       for i := 0; i < b.N; i++ {
-               z := NewTokenizer(bytes.NewBuffer(buf))
-               for {
-                       tt := z.Next()
-                       if tt == ErrorToken {
-                               if err := z.Err(); err != nil && err != io.EOF {
-                                       b.Fatalf("tokenizer error: %v", err)
-                               }
-                               break
-                       }
-                       switch level {
-                       case rawLevel:
-                               // Calling z.Raw just returns the raw bytes of the token. It does
-                               // not unescape &lt; to <, or lower-case tag names and attribute keys.
-                               z.Raw()
-                       case lowLevel:
-                               // Caling z.Text, z.TagName and z.TagAttr returns []byte values
-                               // whose contents may change on the next call to z.Next.
-                               switch tt {
-                               case TextToken, CommentToken, DoctypeToken:
-                                       z.Text()
-                               case StartTagToken, SelfClosingTagToken:
-                                       _, more := z.TagName()
-                                       for more {
-                                               _, _, more = z.TagAttr()
-                                       }
-                               case EndTagToken:
-                                       z.TagName()
-                               }
-                       case highLevel:
-                               // Calling z.Token converts []byte values to strings whose validity
-                               // extend beyond the next call to z.Next.
-                               z.Token()
-                       }
-               }
-       }
-}
-
-func BenchmarkRawLevelTokenizer(b *testing.B)  { benchmarkTokenizer(b, rawLevel) }
-func BenchmarkLowLevelTokenizer(b *testing.B)  { benchmarkTokenizer(b, lowLevel) }
-func BenchmarkHighLevelTokenizer(b *testing.B) { benchmarkTokenizer(b, highLevel) }
index 3233a2eec7437a80d1ef8d5a0e52ef9fcb14888b..4d9ab2e9edcafe4dc0a1d3ab7b92e8babc6bc24b 100644 (file)
@@ -1,7 +1,7 @@
 // Copyright (C) 2024 early (LGPL)
 package htmltree
 
-import "git.earlybird.gay/today-engine/html"
+import "git.earlybird.gay/today-engine/internal/html"
 
 func GetAttr(n *html.Node, name string) string {
        for _, attr := range n.Attr {
index e3e5ea26051cd838bd9c5054b2c7a08967b6d92e..7bd4865e79938b734fb1229234540920dfceacab 100644 (file)
@@ -6,7 +6,7 @@ import (
        "regexp"
        "strings"
 
-       "git.earlybird.gay/today-engine/html"
+       "git.earlybird.gay/today-engine/internal/html"
 )
 
 func Walk(root *html.Node, f func(*html.Node) (bool, error)) error {
index 132fab2eca9e00a4b86b2b34615effbcf59376bc..f5937bb947b65572745bd6acca75cf82faa3e021 100644 (file)
@@ -8,9 +8,9 @@ import (
        "regexp"
        "strings"
 
-       "git.earlybird.gay/today-engine/html"
        "git.earlybird.gay/today-engine/htmltree"
        "git.earlybird.gay/today-engine/include"
+       "git.earlybird.gay/today-engine/internal/html"
        "git.earlybird.gay/today-engine/render"
 )
 
index a4f16e8bcf8de6e35396e2ec553319851b010cfa..0c2aaff850f6cc8ae496209e1067b56ffa6d1faa 100644 (file)
@@ -3,9 +3,9 @@ package compile
 import (
        "errors"
 
-       "git.earlybird.gay/today-engine/html"
-       "git.earlybird.gay/today-engine/html/atom"
        "git.earlybird.gay/today-engine/htmltree"
+       "git.earlybird.gay/today-engine/internal/html"
+       "git.earlybird.gay/today-engine/internal/html/atom"
 )
 
 var ErrBadComponentFormat = errors.New("web components must either be a script or a template and script")
index dc602e093fdd6468006da2b1ee7c4c51b76b4f19..894e66763b9d9a75d89f6ac38521487b81f9c740 100644 (file)
@@ -8,8 +8,8 @@ import (
        "slices"
        "strings"
 
-       "git.earlybird.gay/today-engine/html"
        "git.earlybird.gay/today-engine/htmltree"
+       "git.earlybird.gay/today-engine/internal/html"
 )
 
 const (
diff --git a/internal/html/LICENSE b/internal/html/LICENSE
new file mode 100644 (file)
index 0000000..2a7cf70
--- /dev/null
@@ -0,0 +1,27 @@
+Copyright 2009 The Go Authors.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+   * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+   * Neither the name of Google LLC nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/internal/html/PATENTS b/internal/html/PATENTS
new file mode 100644 (file)
index 0000000..7330990
--- /dev/null
@@ -0,0 +1,22 @@
+Additional IP Rights Grant (Patents)
+
+"This implementation" means the copyrightable works distributed by
+Google as part of the Go project.
+
+Google hereby grants to You a perpetual, worldwide, non-exclusive,
+no-charge, royalty-free, irrevocable (except as stated in this section)
+patent license to make, have made, use, offer to sell, sell, import,
+transfer and otherwise run, modify and propagate the contents of this
+implementation of Go, where such license applies only to those patent
+claims, both currently owned or controlled by Google and acquired in
+the future, licensable by Google that are necessarily infringed by this
+implementation of Go.  This grant does not include claims that would be
+infringed only as a consequence of further modification of this
+implementation.  If you or your agent or exclusive licensee institute or
+order or agree to the institution of patent litigation against any
+entity (including a cross-claim or counterclaim in a lawsuit) alleging
+that this implementation of Go or any code incorporated within this
+implementation of Go constitutes direct or contributory patent
+infringement, or inducement of patent infringement, then any patent
+rights granted to you under this License for this implementation of Go
+shall terminate as of the date such litigation is filed.
diff --git a/internal/html/README.md b/internal/html/README.md
new file mode 100644 (file)
index 0000000..ad72cd7
--- /dev/null
@@ -0,0 +1,14 @@
+# HTML
+
+## This is a fork.
+
+This repo forks `golang.org/x/net/html` and makes limited changes:
+
+- Foster parenting is disabled.
+- Attribute keys are not automatically set to lowercase.
+
+These changes are made to support Today's use of `x/net/html` to parse and
+re-render Go templates. They have the intended side effect of allowing invalid
+HTML to be rendered, which is almost definitely not what you want.
+
+Please see the LICENSE and PATENTS file for this directory.
diff --git a/internal/html/atom/atom.go b/internal/html/atom/atom.go
new file mode 100644 (file)
index 0000000..cb79263
--- /dev/null
@@ -0,0 +1,78 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package atom provides integer codes (also known as atoms) for a fixed set of
+// frequently occurring HTML strings: tag names and attribute keys such as "p"
+// and "id".
+//
+// Sharing an atom's name between all elements with the same tag can result in
+// fewer string allocations when tokenizing and parsing HTML. Integer
+// comparisons are also generally faster than string comparisons.
+//
+// The value of an atom's particular code is not guaranteed to stay the same
+// between versions of this package. Neither is any ordering guaranteed:
+// whether atom.H1 < atom.H2 may also change. The codes are not guaranteed to
+// be dense. The only guarantees are that e.g. looking up "div" will yield
+// atom.Div, calling atom.Div.String will return "div", and atom.Div != 0.
+package atom // import "git.earlybird.gay/today-engine/internal/html/atom"
+
+// Atom is an integer code for a string. The zero value maps to "".
+type Atom uint32
+
+// String returns the atom's name.
+func (a Atom) String() string {
+       start := uint32(a >> 8)
+       n := uint32(a & 0xff)
+       if start+n > uint32(len(atomText)) {
+               return ""
+       }
+       return atomText[start : start+n]
+}
+
+func (a Atom) string() string {
+       return atomText[a>>8 : a>>8+a&0xff]
+}
+
+// fnv computes the FNV hash with an arbitrary starting value h.
+func fnv(h uint32, s []byte) uint32 {
+       for i := range s {
+               h ^= uint32(s[i])
+               h *= 16777619
+       }
+       return h
+}
+
+func match(s string, t []byte) bool {
+       for i, c := range t {
+               if s[i] != c {
+                       return false
+               }
+       }
+       return true
+}
+
+// Lookup returns the atom whose name is s. It returns zero if there is no
+// such atom. The lookup is case sensitive.
+func Lookup(s []byte) Atom {
+       if len(s) == 0 || len(s) > maxAtomLen {
+               return 0
+       }
+       h := fnv(hash0, s)
+       if a := table[h&uint32(len(table)-1)]; int(a&0xff) == len(s) && match(a.string(), s) {
+               return a
+       }
+       if a := table[(h>>16)&uint32(len(table)-1)]; int(a&0xff) == len(s) && match(a.string(), s) {
+               return a
+       }
+       return 0
+}
+
+// String returns a string whose contents are equal to s. In that sense, it is
+// equivalent to string(s) but may be more efficient.
+func String(s []byte) string {
+       if a := Lookup(s); a != 0 {
+               return a.String()
+       }
+       return string(s)
+}
diff --git a/internal/html/atom/atom_test.go b/internal/html/atom/atom_test.go
new file mode 100644 (file)
index 0000000..6e33704
--- /dev/null
@@ -0,0 +1,109 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package atom
+
+import (
+       "sort"
+       "testing"
+)
+
+func TestKnown(t *testing.T) {
+       for _, s := range testAtomList {
+               if atom := Lookup([]byte(s)); atom.String() != s {
+                       t.Errorf("Lookup(%q) = %#x (%q)", s, uint32(atom), atom.String())
+               }
+       }
+}
+
+func TestHits(t *testing.T) {
+       for _, a := range table {
+               if a == 0 {
+                       continue
+               }
+               got := Lookup([]byte(a.String()))
+               if got != a {
+                       t.Errorf("Lookup(%q) = %#x, want %#x", a.String(), uint32(got), uint32(a))
+               }
+       }
+}
+
+func TestMisses(t *testing.T) {
+       testCases := []string{
+               "",
+               "\x00",
+               "\xff",
+               "A",
+               "DIV",
+               "Div",
+               "dIV",
+               "aa",
+               "a\x00",
+               "ab",
+               "abb",
+               "abbr0",
+               "abbr ",
+               " abbr",
+               " a",
+               "acceptcharset",
+               "acceptCharset",
+               "accept_charset",
+               "h0",
+               "h1h2",
+               "h7",
+               "onClick",
+               "λ",
+               // The following string has the same hash (0xa1d7fab7) as "onmouseover".
+               "\x00\x00\x00\x00\x00\x50\x18\xae\x38\xd0\xb7",
+       }
+       for _, tc := range testCases {
+               got := Lookup([]byte(tc))
+               if got != 0 {
+                       t.Errorf("Lookup(%q): got %d, want 0", tc, got)
+               }
+       }
+}
+
+func TestForeignObject(t *testing.T) {
+       const (
+               afo = Foreignobject
+               afO = ForeignObject
+               sfo = "foreignobject"
+               sfO = "foreignObject"
+       )
+       if got := Lookup([]byte(sfo)); got != afo {
+               t.Errorf("Lookup(%q): got %#v, want %#v", sfo, got, afo)
+       }
+       if got := Lookup([]byte(sfO)); got != afO {
+               t.Errorf("Lookup(%q): got %#v, want %#v", sfO, got, afO)
+       }
+       if got := afo.String(); got != sfo {
+               t.Errorf("Atom(%#v).String(): got %q, want %q", afo, got, sfo)
+       }
+       if got := afO.String(); got != sfO {
+               t.Errorf("Atom(%#v).String(): got %q, want %q", afO, got, sfO)
+       }
+}
+
+func BenchmarkLookup(b *testing.B) {
+       sortedTable := make([]string, 0, len(table))
+       for _, a := range table {
+               if a != 0 {
+                       sortedTable = append(sortedTable, a.String())
+               }
+       }
+       sort.Strings(sortedTable)
+
+       x := make([][]byte, 1000)
+       for i := range x {
+               x[i] = []byte(sortedTable[i%len(sortedTable)])
+       }
+
+       b.ResetTimer()
+       for i := 0; i < b.N; i++ {
+               for _, s := range x {
+                       Lookup(s)
+               }
+       }
+}
diff --git a/internal/html/atom/gen.go b/internal/html/atom/gen.go
new file mode 100644 (file)
index 0000000..1e249d1
--- /dev/null
@@ -0,0 +1,711 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build ignore
+
+//go:generate go run gen.go
+//go:generate go run gen.go -test
+
+package main
+
+import (
+       "bytes"
+       "flag"
+       "fmt"
+       "go/format"
+       "math/rand"
+       "os"
+       "sort"
+       "strings"
+)
+
+// identifier converts s to a Go exported identifier.
+// It converts "div" to "Div" and "accept-charset" to "AcceptCharset".
+func identifier(s string) string {
+       b := make([]byte, 0, len(s))
+       cap := true
+       for _, c := range s {
+               if c == '-' {
+                       cap = true
+                       continue
+               }
+               if cap && 'a' <= c && c <= 'z' {
+                       c -= 'a' - 'A'
+               }
+               cap = false
+               b = append(b, byte(c))
+       }
+       return string(b)
+}
+
+var test = flag.Bool("test", false, "generate table_test.go")
+
+func genFile(name string, buf *bytes.Buffer) {
+       b, err := format.Source(buf.Bytes())
+       if err != nil {
+               fmt.Fprintln(os.Stderr, err)
+               os.Exit(1)
+       }
+       if err := os.WriteFile(name, b, 0644); err != nil {
+               fmt.Fprintln(os.Stderr, err)
+               os.Exit(1)
+       }
+}
+
+func main() {
+       flag.Parse()
+
+       var all []string
+       all = append(all, elements...)
+       all = append(all, attributes...)
+       all = append(all, eventHandlers...)
+       all = append(all, extra...)
+       sort.Strings(all)
+
+       // uniq - lists have dups
+       w := 0
+       for _, s := range all {
+               if w == 0 || all[w-1] != s {
+                       all[w] = s
+                       w++
+               }
+       }
+       all = all[:w]
+
+       if *test {
+               var buf bytes.Buffer
+               fmt.Fprintln(&buf, "// Code generated by go generate gen.go; DO NOT EDIT.\n")
+               fmt.Fprintln(&buf, "//go:generate go run gen.go -test\n")
+               fmt.Fprintln(&buf, "package atom\n")
+               fmt.Fprintln(&buf, "var testAtomList = []string{")
+               for _, s := range all {
+                       fmt.Fprintf(&buf, "\t%q,\n", s)
+               }
+               fmt.Fprintln(&buf, "}")
+
+               genFile("table_test.go", &buf)
+               return
+       }
+
+       // Find hash that minimizes table size.
+       var best *table
+       for i := 0; i < 1000000; i++ {
+               if best != nil && 1<<(best.k-1) < len(all) {
+                       break
+               }
+               h := rand.Uint32()
+               for k := uint(0); k <= 16; k++ {
+                       if best != nil && k >= best.k {
+                               break
+                       }
+                       var t table
+                       if t.init(h, k, all) {
+                               best = &t
+                               break
+                       }
+               }
+       }
+       if best == nil {
+               fmt.Fprintf(os.Stderr, "failed to construct string table\n")
+               os.Exit(1)
+       }
+
+       // Lay out strings, using overlaps when possible.
+       layout := append([]string{}, all...)
+
+       // Remove strings that are substrings of other strings
+       for changed := true; changed; {
+               changed = false
+               for i, s := range layout {
+                       if s == "" {
+                               continue
+                       }
+                       for j, t := range layout {
+                               if i != j && t != "" && strings.Contains(s, t) {
+                                       changed = true
+                                       layout[j] = ""
+                               }
+                       }
+               }
+       }
+
+       // Join strings where one suffix matches another prefix.
+       for {
+               // Find best i, j, k such that layout[i][len-k:] == layout[j][:k],
+               // maximizing overlap length k.
+               besti := -1
+               bestj := -1
+               bestk := 0
+               for i, s := range layout {
+                       if s == "" {
+                               continue
+                       }
+                       for j, t := range layout {
+                               if i == j {
+                                       continue
+                               }
+                               for k := bestk + 1; k <= len(s) && k <= len(t); k++ {
+                                       if s[len(s)-k:] == t[:k] {
+                                               besti = i
+                                               bestj = j
+                                               bestk = k
+                                       }
+                               }
+                       }
+               }
+               if bestk > 0 {
+                       layout[besti] += layout[bestj][bestk:]
+                       layout[bestj] = ""
+                       continue
+               }
+               break
+       }
+
+       text := strings.Join(layout, "")
+
+       atom := map[string]uint32{}
+       for _, s := range all {
+               off := strings.Index(text, s)
+               if off < 0 {
+                       panic("lost string " + s)
+               }
+               atom[s] = uint32(off<<8 | len(s))
+       }
+
+       var buf bytes.Buffer
+       // Generate the Go code.
+       fmt.Fprintln(&buf, "// Code generated by go generate gen.go; DO NOT EDIT.\n")
+       fmt.Fprintln(&buf, "//go:generate go run gen.go\n")
+       fmt.Fprintln(&buf, "package atom\n\nconst (")
+
+       // compute max len
+       maxLen := 0
+       for _, s := range all {
+               if maxLen < len(s) {
+                       maxLen = len(s)
+               }
+               fmt.Fprintf(&buf, "\t%s Atom = %#x\n", identifier(s), atom[s])
+       }
+       fmt.Fprintln(&buf, ")\n")
+
+       fmt.Fprintf(&buf, "const hash0 = %#x\n\n", best.h0)
+       fmt.Fprintf(&buf, "const maxAtomLen = %d\n\n", maxLen)
+
+       fmt.Fprintf(&buf, "var table = [1<<%d]Atom{\n", best.k)
+       for i, s := range best.tab {
+               if s == "" {
+                       continue
+               }
+               fmt.Fprintf(&buf, "\t%#x: %#x, // %s\n", i, atom[s], s)
+       }
+       fmt.Fprintf(&buf, "}\n")
+       datasize := (1 << best.k) * 4
+
+       fmt.Fprintln(&buf, "const atomText =")
+       textsize := len(text)
+       for len(text) > 60 {
+               fmt.Fprintf(&buf, "\t%q +\n", text[:60])
+               text = text[60:]
+       }
+       fmt.Fprintf(&buf, "\t%q\n\n", text)
+
+       genFile("table.go", &buf)
+
+       fmt.Fprintf(os.Stdout, "%d atoms; %d string bytes + %d tables = %d total data\n", len(all), textsize, datasize, textsize+datasize)
+}
+
+type byLen []string
+
+func (x byLen) Less(i, j int) bool { return len(x[i]) > len(x[j]) }
+func (x byLen) Swap(i, j int)      { x[i], x[j] = x[j], x[i] }
+func (x byLen) Len() int           { return len(x) }
+
+// fnv computes the FNV hash with an arbitrary starting value h.
+func fnv(h uint32, s string) uint32 {
+       for i := 0; i < len(s); i++ {
+               h ^= uint32(s[i])
+               h *= 16777619
+       }
+       return h
+}
+
+// A table represents an attempt at constructing the lookup table.
+// The lookup table uses cuckoo hashing, meaning that each string
+// can be found in one of two positions.
+type table struct {
+       h0   uint32
+       k    uint
+       mask uint32
+       tab  []string
+}
+
+// hash returns the two hashes for s.
+func (t *table) hash(s string) (h1, h2 uint32) {
+       h := fnv(t.h0, s)
+       h1 = h & t.mask
+       h2 = (h >> 16) & t.mask
+       return
+}
+
+// init initializes the table with the given parameters.
+// h0 is the initial hash value,
+// k is the number of bits of hash value to use, and
+// x is the list of strings to store in the table.
+// init returns false if the table cannot be constructed.
+func (t *table) init(h0 uint32, k uint, x []string) bool {
+       t.h0 = h0
+       t.k = k
+       t.tab = make([]string, 1<<k)
+       t.mask = 1<<k - 1
+       for _, s := range x {
+               if !t.insert(s) {
+                       return false
+               }
+       }
+       return true
+}
+
+// insert inserts s in the table.
+func (t *table) insert(s string) bool {
+       h1, h2 := t.hash(s)
+       if t.tab[h1] == "" {
+               t.tab[h1] = s
+               return true
+       }
+       if t.tab[h2] == "" {
+               t.tab[h2] = s
+               return true
+       }
+       if t.push(h1, 0) {
+               t.tab[h1] = s
+               return true
+       }
+       if t.push(h2, 0) {
+               t.tab[h2] = s
+               return true
+       }
+       return false
+}
+
+// push attempts to push aside the entry in slot i.
+func (t *table) push(i uint32, depth int) bool {
+       if depth > len(t.tab) {
+               return false
+       }
+       s := t.tab[i]
+       h1, h2 := t.hash(s)
+       j := h1 + h2 - i
+       if t.tab[j] != "" && !t.push(j, depth+1) {
+               return false
+       }
+       t.tab[j] = s
+       return true
+}
+
+// The lists of element names and attribute keys were taken from
+// https://html.spec.whatwg.org/multipage/indices.html#index
+// as of the "HTML Living Standard - Last Updated 16 April 2018" version.
+
+// "command", "keygen" and "menuitem" have been removed from the spec,
+// but are kept here for backwards compatibility.
+var elements = []string{
+       "a",
+       "abbr",
+       "address",
+       "area",
+       "article",
+       "aside",
+       "audio",
+       "b",
+       "base",
+       "bdi",
+       "bdo",
+       "blockquote",
+       "body",
+       "br",
+       "button",
+       "canvas",
+       "caption",
+       "cite",
+       "code",
+       "col",
+       "colgroup",
+       "command",
+       "data",
+       "datalist",
+       "dd",
+       "del",
+       "details",
+       "dfn",
+       "dialog",
+       "div",
+       "dl",
+       "dt",
+       "em",
+       "embed",
+       "fieldset",
+       "figcaption",
+       "figure",
+       "footer",
+       "form",
+       "h1",
+       "h2",
+       "h3",
+       "h4",
+       "h5",
+       "h6",
+       "head",
+       "header",
+       "hgroup",
+       "hr",
+       "html",
+       "i",
+       "iframe",
+       "img",
+       "input",
+       "ins",
+       "kbd",
+       "keygen",
+       "label",
+       "legend",
+       "li",
+       "link",
+       "main",
+       "map",
+       "mark",
+       "menu",
+       "menuitem",
+       "meta",
+       "meter",
+       "nav",
+       "noscript",
+       "object",
+       "ol",
+       "optgroup",
+       "option",
+       "output",
+       "p",
+       "param",
+       "picture",
+       "pre",
+       "progress",
+       "q",
+       "rp",
+       "rt",
+       "ruby",
+       "s",
+       "samp",
+       "script",
+       "section",
+       "select",
+       "slot",
+       "small",
+       "source",
+       "span",
+       "strong",
+       "style",
+       "sub",
+       "summary",
+       "sup",
+       "table",
+       "tbody",
+       "td",
+       "template",
+       "textarea",
+       "tfoot",
+       "th",
+       "thead",
+       "time",
+       "title",
+       "tr",
+       "track",
+       "u",
+       "ul",
+       "var",
+       "video",
+       "wbr",
+}
+
+// https://html.spec.whatwg.org/multipage/indices.html#attributes-3
+//
+// "challenge", "command", "contextmenu", "dropzone", "icon", "keytype", "mediagroup",
+// "radiogroup", "spellcheck", "scoped", "seamless", "sortable" and "sorted" have been removed from the spec,
+// but are kept here for backwards compatibility.
+var attributes = []string{
+       "abbr",
+       "accept",
+       "accept-charset",
+       "accesskey",
+       "action",
+       "allowfullscreen",
+       "allowpaymentrequest",
+       "allowusermedia",
+       "alt",
+       "as",
+       "async",
+       "autocomplete",
+       "autofocus",
+       "autoplay",
+       "challenge",
+       "charset",
+       "checked",
+       "cite",
+       "class",
+       "color",
+       "cols",
+       "colspan",
+       "command",
+       "content",
+       "contenteditable",
+       "contextmenu",
+       "controls",
+       "coords",
+       "crossorigin",
+       "data",
+       "datetime",
+       "default",
+       "defer",
+       "dir",
+       "dirname",
+       "disabled",
+       "download",
+       "draggable",
+       "dropzone",
+       "enctype",
+       "for",
+       "form",
+       "formaction",
+       "formenctype",
+       "formmethod",
+       "formnovalidate",
+       "formtarget",
+       "headers",
+       "height",
+       "hidden",
+       "high",
+       "href",
+       "hreflang",
+       "http-equiv",
+       "icon",
+       "id",
+       "inputmode",
+       "integrity",
+       "is",
+       "ismap",
+       "itemid",
+       "itemprop",
+       "itemref",
+       "itemscope",
+       "itemtype",
+       "keytype",
+       "kind",
+       "label",
+       "lang",
+       "list",
+       "loop",
+       "low",
+       "manifest",
+       "max",
+       "maxlength",
+       "media",
+       "mediagroup",
+       "method",
+       "min",
+       "minlength",
+       "multiple",
+       "muted",
+       "name",
+       "nomodule",
+       "nonce",
+       "novalidate",
+       "open",
+       "optimum",
+       "pattern",
+       "ping",
+       "placeholder",
+       "playsinline",
+       "poster",
+       "preload",
+       "radiogroup",
+       "readonly",
+       "referrerpolicy",
+       "rel",
+       "required",
+       "reversed",
+       "rows",
+       "rowspan",
+       "sandbox",
+       "spellcheck",
+       "scope",
+       "scoped",
+       "seamless",
+       "selected",
+       "shape",
+       "size",
+       "sizes",
+       "sortable",
+       "sorted",
+       "slot",
+       "span",
+       "spellcheck",
+       "src",
+       "srcdoc",
+       "srclang",
+       "srcset",
+       "start",
+       "step",
+       "style",
+       "tabindex",
+       "target",
+       "title",
+       "translate",
+       "type",
+       "typemustmatch",
+       "updateviacache",
+       "usemap",
+       "value",
+       "width",
+       "workertype",
+       "wrap",
+}
+
+// "onautocomplete", "onautocompleteerror", "onmousewheel",
+// "onshow" and "onsort" have been removed from the spec,
+// but are kept here for backwards compatibility.
+var eventHandlers = []string{
+       "onabort",
+       "onautocomplete",
+       "onautocompleteerror",
+       "onauxclick",
+       "onafterprint",
+       "onbeforeprint",
+       "onbeforeunload",
+       "onblur",
+       "oncancel",
+       "oncanplay",
+       "oncanplaythrough",
+       "onchange",
+       "onclick",
+       "onclose",
+       "oncontextmenu",
+       "oncopy",
+       "oncuechange",
+       "oncut",
+       "ondblclick",
+       "ondrag",
+       "ondragend",
+       "ondragenter",
+       "ondragexit",
+       "ondragleave",
+       "ondragover",
+       "ondragstart",
+       "ondrop",
+       "ondurationchange",
+       "onemptied",
+       "onended",
+       "onerror",
+       "onfocus",
+       "onhashchange",
+       "oninput",
+       "oninvalid",
+       "onkeydown",
+       "onkeypress",
+       "onkeyup",
+       "onlanguagechange",
+       "onload",
+       "onloadeddata",
+       "onloadedmetadata",
+       "onloadend",
+       "onloadstart",
+       "onmessage",
+       "onmessageerror",
+       "onmousedown",
+       "onmouseenter",
+       "onmouseleave",
+       "onmousemove",
+       "onmouseout",
+       "onmouseover",
+       "onmouseup",
+       "onmousewheel",
+       "onwheel",
+       "onoffline",
+       "ononline",
+       "onpagehide",
+       "onpageshow",
+       "onpaste",
+       "onpause",
+       "onplay",
+       "onplaying",
+       "onpopstate",
+       "onprogress",
+       "onratechange",
+       "onreset",
+       "onresize",
+       "onrejectionhandled",
+       "onscroll",
+       "onsecuritypolicyviolation",
+       "onseeked",
+       "onseeking",
+       "onselect",
+       "onshow",
+       "onsort",
+       "onstalled",
+       "onstorage",
+       "onsubmit",
+       "onsuspend",
+       "ontimeupdate",
+       "ontoggle",
+       "onunhandledrejection",
+       "onunload",
+       "onvolumechange",
+       "onwaiting",
+}
+
+// extra are ad-hoc values not covered by any of the lists above.
+var extra = []string{
+       "acronym",
+       "align",
+       "annotation",
+       "annotation-xml",
+       "applet",
+       "basefont",
+       "bgsound",
+       "big",
+       "blink",
+       "center",
+       "color",
+       "desc",
+       "face",
+       "font",
+       "foreignObject", // HTML is case-insensitive, but SVG-embedded-in-HTML is case-sensitive.
+       "foreignobject",
+       "frame",
+       "frameset",
+       "image",
+       "isindex", // "isindex" has been removed from the spec, but are kept here for backwards compatibility.
+       "listing",
+       "malignmark",
+       "marquee",
+       "math",
+       "mglyph",
+       "mi",
+       "mn",
+       "mo",
+       "ms",
+       "mtext",
+       "nobr",
+       "noembed",
+       "noframes",
+       "plaintext",
+       "prompt",
+       "public",
+       "rb",
+       "rtc",
+       "spacer",
+       "strike",
+       "svg",
+       "system",
+       "tt",
+       "xmp",
+}
diff --git a/internal/html/atom/table.go b/internal/html/atom/table.go
new file mode 100644 (file)
index 0000000..2a93886
--- /dev/null
@@ -0,0 +1,783 @@
+// Code generated by go generate gen.go; DO NOT EDIT.
+
+//go:generate go run gen.go
+
+package atom
+
+const (
+       A                         Atom = 0x1
+       Abbr                      Atom = 0x4
+       Accept                    Atom = 0x1a06
+       AcceptCharset             Atom = 0x1a0e
+       Accesskey                 Atom = 0x2c09
+       Acronym                   Atom = 0xaa07
+       Action                    Atom = 0x27206
+       Address                   Atom = 0x6f307
+       Align                     Atom = 0xb105
+       Allowfullscreen           Atom = 0x2080f
+       Allowpaymentrequest       Atom = 0xc113
+       Allowusermedia            Atom = 0xdd0e
+       Alt                       Atom = 0xf303
+       Annotation                Atom = 0x1c90a
+       AnnotationXml             Atom = 0x1c90e
+       Applet                    Atom = 0x31906
+       Area                      Atom = 0x35604
+       Article                   Atom = 0x3fc07
+       As                        Atom = 0x3c02
+       Aside                     Atom = 0x10705
+       Async                     Atom = 0xff05
+       Audio                     Atom = 0x11505
+       Autocomplete              Atom = 0x2780c
+       Autofocus                 Atom = 0x12109
+       Autoplay                  Atom = 0x13c08
+       B                         Atom = 0x101
+       Base                      Atom = 0x3b04
+       Basefont                  Atom = 0x3b08
+       Bdi                       Atom = 0xba03
+       Bdo                       Atom = 0x14b03
+       Bgsound                   Atom = 0x15e07
+       Big                       Atom = 0x17003
+       Blink                     Atom = 0x17305
+       Blockquote                Atom = 0x1870a
+       Body                      Atom = 0x2804
+       Br                        Atom = 0x202
+       Button                    Atom = 0x19106
+       Canvas                    Atom = 0x10306
+       Caption                   Atom = 0x23107
+       Center                    Atom = 0x22006
+       Challenge                 Atom = 0x29b09
+       Charset                   Atom = 0x2107
+       Checked                   Atom = 0x47907
+       Cite                      Atom = 0x19c04
+       Class                     Atom = 0x56405
+       Code                      Atom = 0x5c504
+       Col                       Atom = 0x1ab03
+       Colgroup                  Atom = 0x1ab08
+       Color                     Atom = 0x1bf05
+       Cols                      Atom = 0x1c404
+       Colspan                   Atom = 0x1c407
+       Command                   Atom = 0x1d707
+       Content                   Atom = 0x58b07
+       Contenteditable           Atom = 0x58b0f
+       Contextmenu               Atom = 0x3800b
+       Controls                  Atom = 0x1de08
+       Coords                    Atom = 0x1ea06
+       Crossorigin               Atom = 0x1fb0b
+       Data                      Atom = 0x4a504
+       Datalist                  Atom = 0x4a508
+       Datetime                  Atom = 0x2b808
+       Dd                        Atom = 0x2d702
+       Default                   Atom = 0x10a07
+       Defer                     Atom = 0x5c705
+       Del                       Atom = 0x45203
+       Desc                      Atom = 0x56104
+       Details                   Atom = 0x7207
+       Dfn                       Atom = 0x8703
+       Dialog                    Atom = 0xbb06
+       Dir                       Atom = 0x9303
+       Dirname                   Atom = 0x9307
+       Disabled                  Atom = 0x16408
+       Div                       Atom = 0x16b03
+       Dl                        Atom = 0x5e602
+       Download                  Atom = 0x46308
+       Draggable                 Atom = 0x17a09
+       Dropzone                  Atom = 0x40508
+       Dt                        Atom = 0x64b02
+       Em                        Atom = 0x6e02
+       Embed                     Atom = 0x6e05
+       Enctype                   Atom = 0x28d07
+       Face                      Atom = 0x21e04
+       Fieldset                  Atom = 0x22608
+       Figcaption                Atom = 0x22e0a
+       Figure                    Atom = 0x24806
+       Font                      Atom = 0x3f04
+       Footer                    Atom = 0xf606
+       For                       Atom = 0x25403
+       ForeignObject             Atom = 0x2540d
+       Foreignobject             Atom = 0x2610d
+       Form                      Atom = 0x26e04
+       Formaction                Atom = 0x26e0a
+       Formenctype               Atom = 0x2890b
+       Formmethod                Atom = 0x2a40a
+       Formnovalidate            Atom = 0x2ae0e
+       Formtarget                Atom = 0x2c00a
+       Frame                     Atom = 0x8b05
+       Frameset                  Atom = 0x8b08
+       H1                        Atom = 0x15c02
+       H2                        Atom = 0x2de02
+       H3                        Atom = 0x30d02
+       H4                        Atom = 0x34502
+       H5                        Atom = 0x34f02
+       H6                        Atom = 0x64d02
+       Head                      Atom = 0x33104
+       Header                    Atom = 0x33106
+       Headers                   Atom = 0x33107
+       Height                    Atom = 0x5206
+       Hgroup                    Atom = 0x2ca06
+       Hidden                    Atom = 0x2d506
+       High                      Atom = 0x2db04
+       Hr                        Atom = 0x15702
+       Href                      Atom = 0x2e004
+       Hreflang                  Atom = 0x2e008
+       Html                      Atom = 0x5604
+       HttpEquiv                 Atom = 0x2e80a
+       I                         Atom = 0x601
+       Icon                      Atom = 0x58a04
+       Id                        Atom = 0x10902
+       Iframe                    Atom = 0x2fc06
+       Image                     Atom = 0x30205
+       Img                       Atom = 0x30703
+       Input                     Atom = 0x44b05
+       Inputmode                 Atom = 0x44b09
+       Ins                       Atom = 0x20403
+       Integrity                 Atom = 0x23f09
+       Is                        Atom = 0x16502
+       Isindex                   Atom = 0x30f07
+       Ismap                     Atom = 0x31605
+       Itemid                    Atom = 0x38b06
+       Itemprop                  Atom = 0x19d08
+       Itemref                   Atom = 0x3cd07
+       Itemscope                 Atom = 0x67109
+       Itemtype                  Atom = 0x31f08
+       Kbd                       Atom = 0xb903
+       Keygen                    Atom = 0x3206
+       Keytype                   Atom = 0xd607
+       Kind                      Atom = 0x17704
+       Label                     Atom = 0x5905
+       Lang                      Atom = 0x2e404
+       Legend                    Atom = 0x18106
+       Li                        Atom = 0xb202
+       Link                      Atom = 0x17404
+       List                      Atom = 0x4a904
+       Listing                   Atom = 0x4a907
+       Loop                      Atom = 0x5d04
+       Low                       Atom = 0xc303
+       Main                      Atom = 0x1004
+       Malignmark                Atom = 0xb00a
+       Manifest                  Atom = 0x6d708
+       Map                       Atom = 0x31803
+       Mark                      Atom = 0xb604
+       Marquee                   Atom = 0x32707
+       Math                      Atom = 0x32e04
+       Max                       Atom = 0x33d03
+       Maxlength                 Atom = 0x33d09
+       Media                     Atom = 0xe605
+       Mediagroup                Atom = 0xe60a
+       Menu                      Atom = 0x38704
+       Menuitem                  Atom = 0x38708
+       Meta                      Atom = 0x4b804
+       Meter                     Atom = 0x9805
+       Method                    Atom = 0x2a806
+       Mglyph                    Atom = 0x30806
+       Mi                        Atom = 0x34702
+       Min                       Atom = 0x34703
+       Minlength                 Atom = 0x34709
+       Mn                        Atom = 0x2b102
+       Mo                        Atom = 0xa402
+       Ms                        Atom = 0x67402
+       Mtext                     Atom = 0x35105
+       Multiple                  Atom = 0x35f08
+       Muted                     Atom = 0x36705
+       Name                      Atom = 0x9604
+       Nav                       Atom = 0x1303
+       Nobr                      Atom = 0x3704
+       Noembed                   Atom = 0x6c07
+       Noframes                  Atom = 0x8908
+       Nomodule                  Atom = 0xa208
+       Nonce                     Atom = 0x1a605
+       Noscript                  Atom = 0x21608
+       Novalidate                Atom = 0x2b20a
+       Object                    Atom = 0x26806
+       Ol                        Atom = 0x13702
+       Onabort                   Atom = 0x19507
+       Onafterprint              Atom = 0x2360c
+       Onautocomplete            Atom = 0x2760e
+       Onautocompleteerror       Atom = 0x27613
+       Onauxclick                Atom = 0x61f0a
+       Onbeforeprint             Atom = 0x69e0d
+       Onbeforeunload            Atom = 0x6e70e
+       Onblur                    Atom = 0x56d06
+       Oncancel                  Atom = 0x11908
+       Oncanplay                 Atom = 0x14d09
+       Oncanplaythrough          Atom = 0x14d10
+       Onchange                  Atom = 0x41b08
+       Onclick                   Atom = 0x2f507
+       Onclose                   Atom = 0x36c07
+       Oncontextmenu             Atom = 0x37e0d
+       Oncopy                    Atom = 0x39106
+       Oncuechange               Atom = 0x3970b
+       Oncut                     Atom = 0x3a205
+       Ondblclick                Atom = 0x3a70a
+       Ondrag                    Atom = 0x3b106
+       Ondragend                 Atom = 0x3b109
+       Ondragenter               Atom = 0x3ba0b
+       Ondragexit                Atom = 0x3c50a
+       Ondragleave               Atom = 0x3df0b
+       Ondragover                Atom = 0x3ea0a
+       Ondragstart               Atom = 0x3f40b
+       Ondrop                    Atom = 0x40306
+       Ondurationchange          Atom = 0x41310
+       Onemptied                 Atom = 0x40a09
+       Onended                   Atom = 0x42307
+       Onerror                   Atom = 0x42a07
+       Onfocus                   Atom = 0x43107
+       Onhashchange              Atom = 0x43d0c
+       Oninput                   Atom = 0x44907
+       Oninvalid                 Atom = 0x45509
+       Onkeydown                 Atom = 0x45e09
+       Onkeypress                Atom = 0x46b0a
+       Onkeyup                   Atom = 0x48007
+       Onlanguagechange          Atom = 0x48d10
+       Onload                    Atom = 0x49d06
+       Onloadeddata              Atom = 0x49d0c
+       Onloadedmetadata          Atom = 0x4b010
+       Onloadend                 Atom = 0x4c609
+       Onloadstart               Atom = 0x4cf0b
+       Onmessage                 Atom = 0x4da09
+       Onmessageerror            Atom = 0x4da0e
+       Onmousedown               Atom = 0x4e80b
+       Onmouseenter              Atom = 0x4f30c
+       Onmouseleave              Atom = 0x4ff0c
+       Onmousemove               Atom = 0x50b0b
+       Onmouseout                Atom = 0x5160a
+       Onmouseover               Atom = 0x5230b
+       Onmouseup                 Atom = 0x52e09
+       Onmousewheel              Atom = 0x53c0c
+       Onoffline                 Atom = 0x54809
+       Ononline                  Atom = 0x55108
+       Onpagehide                Atom = 0x5590a
+       Onpageshow                Atom = 0x5730a
+       Onpaste                   Atom = 0x57f07
+       Onpause                   Atom = 0x59a07
+       Onplay                    Atom = 0x5a406
+       Onplaying                 Atom = 0x5a409
+       Onpopstate                Atom = 0x5ad0a
+       Onprogress                Atom = 0x5b70a
+       Onratechange              Atom = 0x5cc0c
+       Onrejectionhandled        Atom = 0x5d812
+       Onreset                   Atom = 0x5ea07
+       Onresize                  Atom = 0x5f108
+       Onscroll                  Atom = 0x60008
+       Onsecuritypolicyviolation Atom = 0x60819
+       Onseeked                  Atom = 0x62908
+       Onseeking                 Atom = 0x63109
+       Onselect                  Atom = 0x63a08
+       Onshow                    Atom = 0x64406
+       Onsort                    Atom = 0x64f06
+       Onstalled                 Atom = 0x65909
+       Onstorage                 Atom = 0x66209
+       Onsubmit                  Atom = 0x66b08
+       Onsuspend                 Atom = 0x67b09
+       Ontimeupdate              Atom = 0x400c
+       Ontoggle                  Atom = 0x68408
+       Onunhandledrejection      Atom = 0x68c14
+       Onunload                  Atom = 0x6ab08
+       Onvolumechange            Atom = 0x6b30e
+       Onwaiting                 Atom = 0x6c109
+       Onwheel                   Atom = 0x6ca07
+       Open                      Atom = 0x1a304
+       Optgroup                  Atom = 0x5f08
+       Optimum                   Atom = 0x6d107
+       Option                    Atom = 0x6e306
+       Output                    Atom = 0x51d06
+       P                         Atom = 0xc01
+       Param                     Atom = 0xc05
+       Pattern                   Atom = 0x6607
+       Picture                   Atom = 0x7b07
+       Ping                      Atom = 0xef04
+       Placeholder               Atom = 0x1310b
+       Plaintext                 Atom = 0x1b209
+       Playsinline               Atom = 0x1400b
+       Poster                    Atom = 0x2cf06
+       Pre                       Atom = 0x47003
+       Preload                   Atom = 0x48607
+       Progress                  Atom = 0x5b908
+       Prompt                    Atom = 0x53606
+       Public                    Atom = 0x58606
+       Q                         Atom = 0xcf01
+       Radiogroup                Atom = 0x30a
+       Rb                        Atom = 0x3a02
+       Readonly                  Atom = 0x35708
+       Referrerpolicy            Atom = 0x3d10e
+       Rel                       Atom = 0x48703
+       Required                  Atom = 0x24c08
+       Reversed                  Atom = 0x8008
+       Rows                      Atom = 0x9c04
+       Rowspan                   Atom = 0x9c07
+       Rp                        Atom = 0x23c02
+       Rt                        Atom = 0x19a02
+       Rtc                       Atom = 0x19a03
+       Ruby                      Atom = 0xfb04
+       S                         Atom = 0x2501
+       Samp                      Atom = 0x7804
+       Sandbox                   Atom = 0x12907
+       Scope                     Atom = 0x67505
+       Scoped                    Atom = 0x67506
+       Script                    Atom = 0x21806
+       Seamless                  Atom = 0x37108
+       Section                   Atom = 0x56807
+       Select                    Atom = 0x63c06
+       Selected                  Atom = 0x63c08
+       Shape                     Atom = 0x1e505
+       Size                      Atom = 0x5f504
+       Sizes                     Atom = 0x5f505
+       Slot                      Atom = 0x1ef04
+       Small                     Atom = 0x20605
+       Sortable                  Atom = 0x65108
+       Sorted                    Atom = 0x33706
+       Source                    Atom = 0x37806
+       Spacer                    Atom = 0x43706
+       Span                      Atom = 0x9f04
+       Spellcheck                Atom = 0x4740a
+       Src                       Atom = 0x5c003
+       Srcdoc                    Atom = 0x5c006
+       Srclang                   Atom = 0x5f907
+       Srcset                    Atom = 0x6f906
+       Start                     Atom = 0x3fa05
+       Step                      Atom = 0x58304
+       Strike                    Atom = 0xd206
+       Strong                    Atom = 0x6dd06
+       Style                     Atom = 0x6ff05
+       Sub                       Atom = 0x66d03
+       Summary                   Atom = 0x70407
+       Sup                       Atom = 0x70b03
+       Svg                       Atom = 0x70e03
+       System                    Atom = 0x71106
+       Tabindex                  Atom = 0x4be08
+       Table                     Atom = 0x59505
+       Target                    Atom = 0x2c406
+       Tbody                     Atom = 0x2705
+       Td                        Atom = 0x9202
+       Template                  Atom = 0x71408
+       Textarea                  Atom = 0x35208
+       Tfoot                     Atom = 0xf505
+       Th                        Atom = 0x15602
+       Thead                     Atom = 0x33005
+       Time                      Atom = 0x4204
+       Title                     Atom = 0x11005
+       Tr                        Atom = 0xcc02
+       Track                     Atom = 0x1ba05
+       Translate                 Atom = 0x1f209
+       Tt                        Atom = 0x6802
+       Type                      Atom = 0xd904
+       Typemustmatch             Atom = 0x2900d
+       U                         Atom = 0xb01
+       Ul                        Atom = 0xa702
+       Updateviacache            Atom = 0x460e
+       Usemap                    Atom = 0x59e06
+       Value                     Atom = 0x1505
+       Var                       Atom = 0x16d03
+       Video                     Atom = 0x2f105
+       Wbr                       Atom = 0x57c03
+       Width                     Atom = 0x64905
+       Workertype                Atom = 0x71c0a
+       Wrap                      Atom = 0x72604
+       Xmp                       Atom = 0x12f03
+)
+
+const hash0 = 0x81cdf10e
+
+const maxAtomLen = 25
+
+var table = [1 << 9]Atom{
+       0x1:   0xe60a,  // mediagroup
+       0x2:   0x2e404, // lang
+       0x4:   0x2c09,  // accesskey
+       0x5:   0x8b08,  // frameset
+       0x7:   0x63a08, // onselect
+       0x8:   0x71106, // system
+       0xa:   0x64905, // width
+       0xc:   0x2890b, // formenctype
+       0xd:   0x13702, // ol
+       0xe:   0x3970b, // oncuechange
+       0x10:  0x14b03, // bdo
+       0x11:  0x11505, // audio
+       0x12:  0x17a09, // draggable
+       0x14:  0x2f105, // video
+       0x15:  0x2b102, // mn
+       0x16:  0x38704, // menu
+       0x17:  0x2cf06, // poster
+       0x19:  0xf606,  // footer
+       0x1a:  0x2a806, // method
+       0x1b:  0x2b808, // datetime
+       0x1c:  0x19507, // onabort
+       0x1d:  0x460e,  // updateviacache
+       0x1e:  0xff05,  // async
+       0x1f:  0x49d06, // onload
+       0x21:  0x11908, // oncancel
+       0x22:  0x62908, // onseeked
+       0x23:  0x30205, // image
+       0x24:  0x5d812, // onrejectionhandled
+       0x26:  0x17404, // link
+       0x27:  0x51d06, // output
+       0x28:  0x33104, // head
+       0x29:  0x4ff0c, // onmouseleave
+       0x2a:  0x57f07, // onpaste
+       0x2b:  0x5a409, // onplaying
+       0x2c:  0x1c407, // colspan
+       0x2f:  0x1bf05, // color
+       0x30:  0x5f504, // size
+       0x31:  0x2e80a, // http-equiv
+       0x33:  0x601,   // i
+       0x34:  0x5590a, // onpagehide
+       0x35:  0x68c14, // onunhandledrejection
+       0x37:  0x42a07, // onerror
+       0x3a:  0x3b08,  // basefont
+       0x3f:  0x1303,  // nav
+       0x40:  0x17704, // kind
+       0x41:  0x35708, // readonly
+       0x42:  0x30806, // mglyph
+       0x44:  0xb202,  // li
+       0x46:  0x2d506, // hidden
+       0x47:  0x70e03, // svg
+       0x48:  0x58304, // step
+       0x49:  0x23f09, // integrity
+       0x4a:  0x58606, // public
+       0x4c:  0x1ab03, // col
+       0x4d:  0x1870a, // blockquote
+       0x4e:  0x34f02, // h5
+       0x50:  0x5b908, // progress
+       0x51:  0x5f505, // sizes
+       0x52:  0x34502, // h4
+       0x56:  0x33005, // thead
+       0x57:  0xd607,  // keytype
+       0x58:  0x5b70a, // onprogress
+       0x59:  0x44b09, // inputmode
+       0x5a:  0x3b109, // ondragend
+       0x5d:  0x3a205, // oncut
+       0x5e:  0x43706, // spacer
+       0x5f:  0x1ab08, // colgroup
+       0x62:  0x16502, // is
+       0x65:  0x3c02,  // as
+       0x66:  0x54809, // onoffline
+       0x67:  0x33706, // sorted
+       0x69:  0x48d10, // onlanguagechange
+       0x6c:  0x43d0c, // onhashchange
+       0x6d:  0x9604,  // name
+       0x6e:  0xf505,  // tfoot
+       0x6f:  0x56104, // desc
+       0x70:  0x33d03, // max
+       0x72:  0x1ea06, // coords
+       0x73:  0x30d02, // h3
+       0x74:  0x6e70e, // onbeforeunload
+       0x75:  0x9c04,  // rows
+       0x76:  0x63c06, // select
+       0x77:  0x9805,  // meter
+       0x78:  0x38b06, // itemid
+       0x79:  0x53c0c, // onmousewheel
+       0x7a:  0x5c006, // srcdoc
+       0x7d:  0x1ba05, // track
+       0x7f:  0x31f08, // itemtype
+       0x82:  0xa402,  // mo
+       0x83:  0x41b08, // onchange
+       0x84:  0x33107, // headers
+       0x85:  0x5cc0c, // onratechange
+       0x86:  0x60819, // onsecuritypolicyviolation
+       0x88:  0x4a508, // datalist
+       0x89:  0x4e80b, // onmousedown
+       0x8a:  0x1ef04, // slot
+       0x8b:  0x4b010, // onloadedmetadata
+       0x8c:  0x1a06,  // accept
+       0x8d:  0x26806, // object
+       0x91:  0x6b30e, // onvolumechange
+       0x92:  0x2107,  // charset
+       0x93:  0x27613, // onautocompleteerror
+       0x94:  0xc113,  // allowpaymentrequest
+       0x95:  0x2804,  // body
+       0x96:  0x10a07, // default
+       0x97:  0x63c08, // selected
+       0x98:  0x21e04, // face
+       0x99:  0x1e505, // shape
+       0x9b:  0x68408, // ontoggle
+       0x9e:  0x64b02, // dt
+       0x9f:  0xb604,  // mark
+       0xa1:  0xb01,   // u
+       0xa4:  0x6ab08, // onunload
+       0xa5:  0x5d04,  // loop
+       0xa6:  0x16408, // disabled
+       0xaa:  0x42307, // onended
+       0xab:  0xb00a,  // malignmark
+       0xad:  0x67b09, // onsuspend
+       0xae:  0x35105, // mtext
+       0xaf:  0x64f06, // onsort
+       0xb0:  0x19d08, // itemprop
+       0xb3:  0x67109, // itemscope
+       0xb4:  0x17305, // blink
+       0xb6:  0x3b106, // ondrag
+       0xb7:  0xa702,  // ul
+       0xb8:  0x26e04, // form
+       0xb9:  0x12907, // sandbox
+       0xba:  0x8b05,  // frame
+       0xbb:  0x1505,  // value
+       0xbc:  0x66209, // onstorage
+       0xbf:  0xaa07,  // acronym
+       0xc0:  0x19a02, // rt
+       0xc2:  0x202,   // br
+       0xc3:  0x22608, // fieldset
+       0xc4:  0x2900d, // typemustmatch
+       0xc5:  0xa208,  // nomodule
+       0xc6:  0x6c07,  // noembed
+       0xc7:  0x69e0d, // onbeforeprint
+       0xc8:  0x19106, // button
+       0xc9:  0x2f507, // onclick
+       0xca:  0x70407, // summary
+       0xcd:  0xfb04,  // ruby
+       0xce:  0x56405, // class
+       0xcf:  0x3f40b, // ondragstart
+       0xd0:  0x23107, // caption
+       0xd4:  0xdd0e,  // allowusermedia
+       0xd5:  0x4cf0b, // onloadstart
+       0xd9:  0x16b03, // div
+       0xda:  0x4a904, // list
+       0xdb:  0x32e04, // math
+       0xdc:  0x44b05, // input
+       0xdf:  0x3ea0a, // ondragover
+       0xe0:  0x2de02, // h2
+       0xe2:  0x1b209, // plaintext
+       0xe4:  0x4f30c, // onmouseenter
+       0xe7:  0x47907, // checked
+       0xe8:  0x47003, // pre
+       0xea:  0x35f08, // multiple
+       0xeb:  0xba03,  // bdi
+       0xec:  0x33d09, // maxlength
+       0xed:  0xcf01,  // q
+       0xee:  0x61f0a, // onauxclick
+       0xf0:  0x57c03, // wbr
+       0xf2:  0x3b04,  // base
+       0xf3:  0x6e306, // option
+       0xf5:  0x41310, // ondurationchange
+       0xf7:  0x8908,  // noframes
+       0xf9:  0x40508, // dropzone
+       0xfb:  0x67505, // scope
+       0xfc:  0x8008,  // reversed
+       0xfd:  0x3ba0b, // ondragenter
+       0xfe:  0x3fa05, // start
+       0xff:  0x12f03, // xmp
+       0x100: 0x5f907, // srclang
+       0x101: 0x30703, // img
+       0x104: 0x101,   // b
+       0x105: 0x25403, // for
+       0x106: 0x10705, // aside
+       0x107: 0x44907, // oninput
+       0x108: 0x35604, // area
+       0x109: 0x2a40a, // formmethod
+       0x10a: 0x72604, // wrap
+       0x10c: 0x23c02, // rp
+       0x10d: 0x46b0a, // onkeypress
+       0x10e: 0x6802,  // tt
+       0x110: 0x34702, // mi
+       0x111: 0x36705, // muted
+       0x112: 0xf303,  // alt
+       0x113: 0x5c504, // code
+       0x114: 0x6e02,  // em
+       0x115: 0x3c50a, // ondragexit
+       0x117: 0x9f04,  // span
+       0x119: 0x6d708, // manifest
+       0x11a: 0x38708, // menuitem
+       0x11b: 0x58b07, // content
+       0x11d: 0x6c109, // onwaiting
+       0x11f: 0x4c609, // onloadend
+       0x121: 0x37e0d, // oncontextmenu
+       0x123: 0x56d06, // onblur
+       0x124: 0x3fc07, // article
+       0x125: 0x9303,  // dir
+       0x126: 0xef04,  // ping
+       0x127: 0x24c08, // required
+       0x128: 0x45509, // oninvalid
+       0x129: 0xb105,  // align
+       0x12b: 0x58a04, // icon
+       0x12c: 0x64d02, // h6
+       0x12d: 0x1c404, // cols
+       0x12e: 0x22e0a, // figcaption
+       0x12f: 0x45e09, // onkeydown
+       0x130: 0x66b08, // onsubmit
+       0x131: 0x14d09, // oncanplay
+       0x132: 0x70b03, // sup
+       0x133: 0xc01,   // p
+       0x135: 0x40a09, // onemptied
+       0x136: 0x39106, // oncopy
+       0x137: 0x19c04, // cite
+       0x138: 0x3a70a, // ondblclick
+       0x13a: 0x50b0b, // onmousemove
+       0x13c: 0x66d03, // sub
+       0x13d: 0x48703, // rel
+       0x13e: 0x5f08,  // optgroup
+       0x142: 0x9c07,  // rowspan
+       0x143: 0x37806, // source
+       0x144: 0x21608, // noscript
+       0x145: 0x1a304, // open
+       0x146: 0x20403, // ins
+       0x147: 0x2540d, // foreignObject
+       0x148: 0x5ad0a, // onpopstate
+       0x14a: 0x28d07, // enctype
+       0x14b: 0x2760e, // onautocomplete
+       0x14c: 0x35208, // textarea
+       0x14e: 0x2780c, // autocomplete
+       0x14f: 0x15702, // hr
+       0x150: 0x1de08, // controls
+       0x151: 0x10902, // id
+       0x153: 0x2360c, // onafterprint
+       0x155: 0x2610d, // foreignobject
+       0x156: 0x32707, // marquee
+       0x157: 0x59a07, // onpause
+       0x158: 0x5e602, // dl
+       0x159: 0x5206,  // height
+       0x15a: 0x34703, // min
+       0x15b: 0x9307,  // dirname
+       0x15c: 0x1f209, // translate
+       0x15d: 0x5604,  // html
+       0x15e: 0x34709, // minlength
+       0x15f: 0x48607, // preload
+       0x160: 0x71408, // template
+       0x161: 0x3df0b, // ondragleave
+       0x162: 0x3a02,  // rb
+       0x164: 0x5c003, // src
+       0x165: 0x6dd06, // strong
+       0x167: 0x7804,  // samp
+       0x168: 0x6f307, // address
+       0x169: 0x55108, // ononline
+       0x16b: 0x1310b, // placeholder
+       0x16c: 0x2c406, // target
+       0x16d: 0x20605, // small
+       0x16e: 0x6ca07, // onwheel
+       0x16f: 0x1c90a, // annotation
+       0x170: 0x4740a, // spellcheck
+       0x171: 0x7207,  // details
+       0x172: 0x10306, // canvas
+       0x173: 0x12109, // autofocus
+       0x174: 0xc05,   // param
+       0x176: 0x46308, // download
+       0x177: 0x45203, // del
+       0x178: 0x36c07, // onclose
+       0x179: 0xb903,  // kbd
+       0x17a: 0x31906, // applet
+       0x17b: 0x2e004, // href
+       0x17c: 0x5f108, // onresize
+       0x17e: 0x49d0c, // onloadeddata
+       0x180: 0xcc02,  // tr
+       0x181: 0x2c00a, // formtarget
+       0x182: 0x11005, // title
+       0x183: 0x6ff05, // style
+       0x184: 0xd206,  // strike
+       0x185: 0x59e06, // usemap
+       0x186: 0x2fc06, // iframe
+       0x187: 0x1004,  // main
+       0x189: 0x7b07,  // picture
+       0x18c: 0x31605, // ismap
+       0x18e: 0x4a504, // data
+       0x18f: 0x5905,  // label
+       0x191: 0x3d10e, // referrerpolicy
+       0x192: 0x15602, // th
+       0x194: 0x53606, // prompt
+       0x195: 0x56807, // section
+       0x197: 0x6d107, // optimum
+       0x198: 0x2db04, // high
+       0x199: 0x15c02, // h1
+       0x19a: 0x65909, // onstalled
+       0x19b: 0x16d03, // var
+       0x19c: 0x4204,  // time
+       0x19e: 0x67402, // ms
+       0x19f: 0x33106, // header
+       0x1a0: 0x4da09, // onmessage
+       0x1a1: 0x1a605, // nonce
+       0x1a2: 0x26e0a, // formaction
+       0x1a3: 0x22006, // center
+       0x1a4: 0x3704,  // nobr
+       0x1a5: 0x59505, // table
+       0x1a6: 0x4a907, // listing
+       0x1a7: 0x18106, // legend
+       0x1a9: 0x29b09, // challenge
+       0x1aa: 0x24806, // figure
+       0x1ab: 0xe605,  // media
+       0x1ae: 0xd904,  // type
+       0x1af: 0x3f04,  // font
+       0x1b0: 0x4da0e, // onmessageerror
+       0x1b1: 0x37108, // seamless
+       0x1b2: 0x8703,  // dfn
+       0x1b3: 0x5c705, // defer
+       0x1b4: 0xc303,  // low
+       0x1b5: 0x19a03, // rtc
+       0x1b6: 0x5230b, // onmouseover
+       0x1b7: 0x2b20a, // novalidate
+       0x1b8: 0x71c0a, // workertype
+       0x1ba: 0x3cd07, // itemref
+       0x1bd: 0x1,     // a
+       0x1be: 0x31803, // map
+       0x1bf: 0x400c,  // ontimeupdate
+       0x1c0: 0x15e07, // bgsound
+       0x1c1: 0x3206,  // keygen
+       0x1c2: 0x2705,  // tbody
+       0x1c5: 0x64406, // onshow
+       0x1c7: 0x2501,  // s
+       0x1c8: 0x6607,  // pattern
+       0x1cc: 0x14d10, // oncanplaythrough
+       0x1ce: 0x2d702, // dd
+       0x1cf: 0x6f906, // srcset
+       0x1d0: 0x17003, // big
+       0x1d2: 0x65108, // sortable
+       0x1d3: 0x48007, // onkeyup
+       0x1d5: 0x5a406, // onplay
+       0x1d7: 0x4b804, // meta
+       0x1d8: 0x40306, // ondrop
+       0x1da: 0x60008, // onscroll
+       0x1db: 0x1fb0b, // crossorigin
+       0x1dc: 0x5730a, // onpageshow
+       0x1dd: 0x4,     // abbr
+       0x1de: 0x9202,  // td
+       0x1df: 0x58b0f, // contenteditable
+       0x1e0: 0x27206, // action
+       0x1e1: 0x1400b, // playsinline
+       0x1e2: 0x43107, // onfocus
+       0x1e3: 0x2e008, // hreflang
+       0x1e5: 0x5160a, // onmouseout
+       0x1e6: 0x5ea07, // onreset
+       0x1e7: 0x13c08, // autoplay
+       0x1e8: 0x63109, // onseeking
+       0x1ea: 0x67506, // scoped
+       0x1ec: 0x30a,   // radiogroup
+       0x1ee: 0x3800b, // contextmenu
+       0x1ef: 0x52e09, // onmouseup
+       0x1f1: 0x2ca06, // hgroup
+       0x1f2: 0x2080f, // allowfullscreen
+       0x1f3: 0x4be08, // tabindex
+       0x1f6: 0x30f07, // isindex
+       0x1f7: 0x1a0e,  // accept-charset
+       0x1f8: 0x2ae0e, // formnovalidate
+       0x1fb: 0x1c90e, // annotation-xml
+       0x1fc: 0x6e05,  // embed
+       0x1fd: 0x21806, // script
+       0x1fe: 0xbb06,  // dialog
+       0x1ff: 0x1d707, // command
+}
+
+const atomText = "abbradiogrouparamainavalueaccept-charsetbodyaccesskeygenobrb" +
+       "asefontimeupdateviacacheightmlabelooptgroupatternoembedetail" +
+       "sampictureversedfnoframesetdirnameterowspanomoduleacronymali" +
+       "gnmarkbdialogallowpaymentrequestrikeytypeallowusermediagroup" +
+       "ingaltfooterubyasyncanvasidefaultitleaudioncancelautofocusan" +
+       "dboxmplaceholderautoplaysinlinebdoncanplaythrough1bgsoundisa" +
+       "bledivarbigblinkindraggablegendblockquotebuttonabortcitempro" +
+       "penoncecolgrouplaintextrackcolorcolspannotation-xmlcommandco" +
+       "ntrolshapecoordslotranslatecrossoriginsmallowfullscreenoscri" +
+       "ptfacenterfieldsetfigcaptionafterprintegrityfigurequiredfore" +
+       "ignObjectforeignobjectformactionautocompleteerrorformenctype" +
+       "mustmatchallengeformmethodformnovalidatetimeformtargethgroup" +
+       "osterhiddenhigh2hreflanghttp-equivideonclickiframeimageimgly" +
+       "ph3isindexismappletitemtypemarqueematheadersortedmaxlength4m" +
+       "inlength5mtextareadonlymultiplemutedoncloseamlessourceoncont" +
+       "extmenuitemidoncopyoncuechangeoncutondblclickondragendondrag" +
+       "enterondragexitemreferrerpolicyondragleaveondragoverondragst" +
+       "articleondropzonemptiedondurationchangeonendedonerroronfocus" +
+       "paceronhashchangeoninputmodeloninvalidonkeydownloadonkeypres" +
+       "spellcheckedonkeyupreloadonlanguagechangeonloadeddatalisting" +
+       "onloadedmetadatabindexonloadendonloadstartonmessageerroronmo" +
+       "usedownonmouseenteronmouseleaveonmousemoveonmouseoutputonmou" +
+       "seoveronmouseupromptonmousewheelonofflineononlineonpagehides" +
+       "classectionbluronpageshowbronpastepublicontenteditableonpaus" +
+       "emaponplayingonpopstateonprogressrcdocodeferonratechangeonre" +
+       "jectionhandledonresetonresizesrclangonscrollonsecuritypolicy" +
+       "violationauxclickonseekedonseekingonselectedonshowidth6onsor" +
+       "tableonstalledonstorageonsubmitemscopedonsuspendontoggleonun" +
+       "handledrejectionbeforeprintonunloadonvolumechangeonwaitingon" +
+       "wheeloptimumanifestrongoptionbeforeunloaddressrcsetstylesumm" +
+       "arysupsvgsystemplateworkertypewrap"
diff --git a/internal/html/atom/table_test.go b/internal/html/atom/table_test.go
new file mode 100644 (file)
index 0000000..8a30762
--- /dev/null
@@ -0,0 +1,376 @@
+// Code generated by go generate gen.go; DO NOT EDIT.
+
+//go:generate go run gen.go -test
+
+package atom
+
+var testAtomList = []string{
+       "a",
+       "abbr",
+       "accept",
+       "accept-charset",
+       "accesskey",
+       "acronym",
+       "action",
+       "address",
+       "align",
+       "allowfullscreen",
+       "allowpaymentrequest",
+       "allowusermedia",
+       "alt",
+       "annotation",
+       "annotation-xml",
+       "applet",
+       "area",
+       "article",
+       "as",
+       "aside",
+       "async",
+       "audio",
+       "autocomplete",
+       "autofocus",
+       "autoplay",
+       "b",
+       "base",
+       "basefont",
+       "bdi",
+       "bdo",
+       "bgsound",
+       "big",
+       "blink",
+       "blockquote",
+       "body",
+       "br",
+       "button",
+       "canvas",
+       "caption",
+       "center",
+       "challenge",
+       "charset",
+       "checked",
+       "cite",
+       "class",
+       "code",
+       "col",
+       "colgroup",
+       "color",
+       "cols",
+       "colspan",
+       "command",
+       "content",
+       "contenteditable",
+       "contextmenu",
+       "controls",
+       "coords",
+       "crossorigin",
+       "data",
+       "datalist",
+       "datetime",
+       "dd",
+       "default",
+       "defer",
+       "del",
+       "desc",
+       "details",
+       "dfn",
+       "dialog",
+       "dir",
+       "dirname",
+       "disabled",
+       "div",
+       "dl",
+       "download",
+       "draggable",
+       "dropzone",
+       "dt",
+       "em",
+       "embed",
+       "enctype",
+       "face",
+       "fieldset",
+       "figcaption",
+       "figure",
+       "font",
+       "footer",
+       "for",
+       "foreignObject",
+       "foreignobject",
+       "form",
+       "formaction",
+       "formenctype",
+       "formmethod",
+       "formnovalidate",
+       "formtarget",
+       "frame",
+       "frameset",
+       "h1",
+       "h2",
+       "h3",
+       "h4",
+       "h5",
+       "h6",
+       "head",
+       "header",
+       "headers",
+       "height",
+       "hgroup",
+       "hidden",
+       "high",
+       "hr",
+       "href",
+       "hreflang",
+       "html",
+       "http-equiv",
+       "i",
+       "icon",
+       "id",
+       "iframe",
+       "image",
+       "img",
+       "input",
+       "inputmode",
+       "ins",
+       "integrity",
+       "is",
+       "isindex",
+       "ismap",
+       "itemid",
+       "itemprop",
+       "itemref",
+       "itemscope",
+       "itemtype",
+       "kbd",
+       "keygen",
+       "keytype",
+       "kind",
+       "label",
+       "lang",
+       "legend",
+       "li",
+       "link",
+       "list",
+       "listing",
+       "loop",
+       "low",
+       "main",
+       "malignmark",
+       "manifest",
+       "map",
+       "mark",
+       "marquee",
+       "math",
+       "max",
+       "maxlength",
+       "media",
+       "mediagroup",
+       "menu",
+       "menuitem",
+       "meta",
+       "meter",
+       "method",
+       "mglyph",
+       "mi",
+       "min",
+       "minlength",
+       "mn",
+       "mo",
+       "ms",
+       "mtext",
+       "multiple",
+       "muted",
+       "name",
+       "nav",
+       "nobr",
+       "noembed",
+       "noframes",
+       "nomodule",
+       "nonce",
+       "noscript",
+       "novalidate",
+       "object",
+       "ol",
+       "onabort",
+       "onafterprint",
+       "onautocomplete",
+       "onautocompleteerror",
+       "onauxclick",
+       "onbeforeprint",
+       "onbeforeunload",
+       "onblur",
+       "oncancel",
+       "oncanplay",
+       "oncanplaythrough",
+       "onchange",
+       "onclick",
+       "onclose",
+       "oncontextmenu",
+       "oncopy",
+       "oncuechange",
+       "oncut",
+       "ondblclick",
+       "ondrag",
+       "ondragend",
+       "ondragenter",
+       "ondragexit",
+       "ondragleave",
+       "ondragover",
+       "ondragstart",
+       "ondrop",
+       "ondurationchange",
+       "onemptied",
+       "onended",
+       "onerror",
+       "onfocus",
+       "onhashchange",
+       "oninput",
+       "oninvalid",
+       "onkeydown",
+       "onkeypress",
+       "onkeyup",
+       "onlanguagechange",
+       "onload",
+       "onloadeddata",
+       "onloadedmetadata",
+       "onloadend",
+       "onloadstart",
+       "onmessage",
+       "onmessageerror",
+       "onmousedown",
+       "onmouseenter",
+       "onmouseleave",
+       "onmousemove",
+       "onmouseout",
+       "onmouseover",
+       "onmouseup",
+       "onmousewheel",
+       "onoffline",
+       "ononline",
+       "onpagehide",
+       "onpageshow",
+       "onpaste",
+       "onpause",
+       "onplay",
+       "onplaying",
+       "onpopstate",
+       "onprogress",
+       "onratechange",
+       "onrejectionhandled",
+       "onreset",
+       "onresize",
+       "onscroll",
+       "onsecuritypolicyviolation",
+       "onseeked",
+       "onseeking",
+       "onselect",
+       "onshow",
+       "onsort",
+       "onstalled",
+       "onstorage",
+       "onsubmit",
+       "onsuspend",
+       "ontimeupdate",
+       "ontoggle",
+       "onunhandledrejection",
+       "onunload",
+       "onvolumechange",
+       "onwaiting",
+       "onwheel",
+       "open",
+       "optgroup",
+       "optimum",
+       "option",
+       "output",
+       "p",
+       "param",
+       "pattern",
+       "picture",
+       "ping",
+       "placeholder",
+       "plaintext",
+       "playsinline",
+       "poster",
+       "pre",
+       "preload",
+       "progress",
+       "prompt",
+       "public",
+       "q",
+       "radiogroup",
+       "rb",
+       "readonly",
+       "referrerpolicy",
+       "rel",
+       "required",
+       "reversed",
+       "rows",
+       "rowspan",
+       "rp",
+       "rt",
+       "rtc",
+       "ruby",
+       "s",
+       "samp",
+       "sandbox",
+       "scope",
+       "scoped",
+       "script",
+       "seamless",
+       "section",
+       "select",
+       "selected",
+       "shape",
+       "size",
+       "sizes",
+       "slot",
+       "small",
+       "sortable",
+       "sorted",
+       "source",
+       "spacer",
+       "span",
+       "spellcheck",
+       "src",
+       "srcdoc",
+       "srclang",
+       "srcset",
+       "start",
+       "step",
+       "strike",
+       "strong",
+       "style",
+       "sub",
+       "summary",
+       "sup",
+       "svg",
+       "system",
+       "tabindex",
+       "table",
+       "target",
+       "tbody",
+       "td",
+       "template",
+       "textarea",
+       "tfoot",
+       "th",
+       "thead",
+       "time",
+       "title",
+       "tr",
+       "track",
+       "translate",
+       "tt",
+       "type",
+       "typemustmatch",
+       "u",
+       "ul",
+       "updateviacache",
+       "usemap",
+       "value",
+       "var",
+       "video",
+       "wbr",
+       "width",
+       "workertype",
+       "wrap",
+       "xmp",
+}
diff --git a/internal/html/charset/charset.go b/internal/html/charset/charset.go
new file mode 100644 (file)
index 0000000..f61c6c3
--- /dev/null
@@ -0,0 +1,257 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package charset provides common text encodings for HTML documents.
+//
+// The mapping from encoding labels to encodings is defined at
+// https://encoding.spec.whatwg.org/.
+package charset // import "git.earlybird.gay/today-engine/internal/html/charset"
+
+import (
+       "bytes"
+       "fmt"
+       "io"
+       "mime"
+       "strings"
+       "unicode/utf8"
+
+       "git.earlybird.gay/today-engine/internal/html"
+       "golang.org/x/text/encoding"
+       "golang.org/x/text/encoding/charmap"
+       "golang.org/x/text/encoding/htmlindex"
+       "golang.org/x/text/transform"
+)
+
+// Lookup returns the encoding with the specified label, and its canonical
+// name. It returns nil and the empty string if label is not one of the
+// standard encodings for HTML. Matching is case-insensitive and ignores
+// leading and trailing whitespace. Encoders will use HTML escape sequences for
+// runes that are not supported by the character set.
+func Lookup(label string) (e encoding.Encoding, name string) {
+       e, err := htmlindex.Get(label)
+       if err != nil {
+               return nil, ""
+       }
+       name, _ = htmlindex.Name(e)
+       return &htmlEncoding{e}, name
+}
+
+type htmlEncoding struct{ encoding.Encoding }
+
+func (h *htmlEncoding) NewEncoder() *encoding.Encoder {
+       // HTML requires a non-terminating legacy encoder. We use HTML escapes to
+       // substitute unsupported code points.
+       return encoding.HTMLEscapeUnsupported(h.Encoding.NewEncoder())
+}
+
+// DetermineEncoding determines the encoding of an HTML document by examining
+// up to the first 1024 bytes of content and the declared Content-Type.
+//
+// See http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#determining-the-character-encoding
+func DetermineEncoding(content []byte, contentType string) (e encoding.Encoding, name string, certain bool) {
+       if len(content) > 1024 {
+               content = content[:1024]
+       }
+
+       for _, b := range boms {
+               if bytes.HasPrefix(content, b.bom) {
+                       e, name = Lookup(b.enc)
+                       return e, name, true
+               }
+       }
+
+       if _, params, err := mime.ParseMediaType(contentType); err == nil {
+               if cs, ok := params["charset"]; ok {
+                       if e, name = Lookup(cs); e != nil {
+                               return e, name, true
+                       }
+               }
+       }
+
+       if len(content) > 0 {
+               e, name = prescan(content)
+               if e != nil {
+                       return e, name, false
+               }
+       }
+
+       // Try to detect UTF-8.
+       // First eliminate any partial rune at the end.
+       for i := len(content) - 1; i >= 0 && i > len(content)-4; i-- {
+               b := content[i]
+               if b < 0x80 {
+                       break
+               }
+               if utf8.RuneStart(b) {
+                       content = content[:i]
+                       break
+               }
+       }
+       hasHighBit := false
+       for _, c := range content {
+               if c >= 0x80 {
+                       hasHighBit = true
+                       break
+               }
+       }
+       if hasHighBit && utf8.Valid(content) {
+               return encoding.Nop, "utf-8", false
+       }
+
+       // TODO: change default depending on user's locale?
+       return charmap.Windows1252, "windows-1252", false
+}
+
+// NewReader returns an io.Reader that converts the content of r to UTF-8.
+// It calls DetermineEncoding to find out what r's encoding is.
+func NewReader(r io.Reader, contentType string) (io.Reader, error) {
+       preview := make([]byte, 1024)
+       n, err := io.ReadFull(r, preview)
+       switch {
+       case err == io.ErrUnexpectedEOF:
+               preview = preview[:n]
+               r = bytes.NewReader(preview)
+       case err != nil:
+               return nil, err
+       default:
+               r = io.MultiReader(bytes.NewReader(preview), r)
+       }
+
+       if e, _, _ := DetermineEncoding(preview, contentType); e != encoding.Nop {
+               r = transform.NewReader(r, e.NewDecoder())
+       }
+       return r, nil
+}
+
+// NewReaderLabel returns a reader that converts from the specified charset to
+// UTF-8. It uses Lookup to find the encoding that corresponds to label, and
+// returns an error if Lookup returns nil. It is suitable for use as
+// encoding/xml.Decoder's CharsetReader function.
+func NewReaderLabel(label string, input io.Reader) (io.Reader, error) {
+       e, _ := Lookup(label)
+       if e == nil {
+               return nil, fmt.Errorf("unsupported charset: %q", label)
+       }
+       return transform.NewReader(input, e.NewDecoder()), nil
+}
+
+func prescan(content []byte) (e encoding.Encoding, name string) {
+       z := html.NewTokenizer(bytes.NewReader(content))
+       for {
+               switch z.Next() {
+               case html.ErrorToken:
+                       return nil, ""
+
+               case html.StartTagToken, html.SelfClosingTagToken:
+                       tagName, hasAttr := z.TagName()
+                       if !bytes.Equal(tagName, []byte("meta")) {
+                               continue
+                       }
+                       attrList := make(map[string]bool)
+                       gotPragma := false
+
+                       const (
+                               dontKnow = iota
+                               doNeedPragma
+                               doNotNeedPragma
+                       )
+                       needPragma := dontKnow
+
+                       name = ""
+                       e = nil
+                       for hasAttr {
+                               var key, val []byte
+                               key, val, hasAttr = z.TagAttr()
+                               ks := string(key)
+                               if attrList[ks] {
+                                       continue
+                               }
+                               attrList[ks] = true
+                               for i, c := range val {
+                                       if 'A' <= c && c <= 'Z' {
+                                               val[i] = c + 0x20
+                                       }
+                               }
+
+                               switch ks {
+                               case "http-equiv":
+                                       if bytes.Equal(val, []byte("content-type")) {
+                                               gotPragma = true
+                                       }
+
+                               case "content":
+                                       if e == nil {
+                                               name = fromMetaElement(string(val))
+                                               if name != "" {
+                                                       e, name = Lookup(name)
+                                                       if e != nil {
+                                                               needPragma = doNeedPragma
+                                                       }
+                                               }
+                                       }
+
+                               case "charset":
+                                       e, name = Lookup(string(val))
+                                       needPragma = doNotNeedPragma
+                               }
+                       }
+
+                       if needPragma == dontKnow || needPragma == doNeedPragma && !gotPragma {
+                               continue
+                       }
+
+                       if strings.HasPrefix(name, "utf-16") {
+                               name = "utf-8"
+                               e = encoding.Nop
+                       }
+
+                       if e != nil {
+                               return e, name
+                       }
+               }
+       }
+}
+
+func fromMetaElement(s string) string {
+       for s != "" {
+               csLoc := strings.Index(s, "charset")
+               if csLoc == -1 {
+                       return ""
+               }
+               s = s[csLoc+len("charset"):]
+               s = strings.TrimLeft(s, " \t\n\f\r")
+               if !strings.HasPrefix(s, "=") {
+                       continue
+               }
+               s = s[1:]
+               s = strings.TrimLeft(s, " \t\n\f\r")
+               if s == "" {
+                       return ""
+               }
+               if q := s[0]; q == '"' || q == '\'' {
+                       s = s[1:]
+                       closeQuote := strings.IndexRune(s, rune(q))
+                       if closeQuote == -1 {
+                               return ""
+                       }
+                       return s[:closeQuote]
+               }
+
+               end := strings.IndexAny(s, "; \t\n\f\r")
+               if end == -1 {
+                       end = len(s)
+               }
+               return s[:end]
+       }
+       return ""
+}
+
+var boms = []struct {
+       bom []byte
+       enc string
+}{
+       {[]byte{0xfe, 0xff}, "utf-16be"},
+       {[]byte{0xff, 0xfe}, "utf-16le"},
+       {[]byte{0xef, 0xbb, 0xbf}, "utf-8"},
+}
diff --git a/internal/html/charset/charset_test.go b/internal/html/charset/charset_test.go
new file mode 100644 (file)
index 0000000..c2f6244
--- /dev/null
@@ -0,0 +1,237 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package charset
+
+import (
+       "bytes"
+       "encoding/xml"
+       "io"
+       "os"
+       "runtime"
+       "strings"
+       "testing"
+
+       "golang.org/x/text/transform"
+)
+
+func transformString(t transform.Transformer, s string) (string, error) {
+       r := transform.NewReader(strings.NewReader(s), t)
+       b, err := io.ReadAll(r)
+       return string(b), err
+}
+
+type testCase struct {
+       utf8, other, otherEncoding string
+}
+
+// testCases for encoding and decoding.
+var testCases = []testCase{
+       {"Résumé", "Résumé", "utf8"},
+       {"Résumé", "R\xe9sum\xe9", "latin1"},
+       {"これは漢字です。", "S0\x8c0o0\"oW[g0Y0\x020", "UTF-16LE"},
+       {"これは漢字です。", "0S0\x8c0oo\"[W0g0Y0\x02", "UTF-16BE"},
+       {"Hello, world", "Hello, world", "ASCII"},
+       {"Gdańsk", "Gda\xf1sk", "ISO-8859-2"},
+       {"Ââ Čč Đđ Ŋŋ Õõ Šš Žž Åå Ää", "\xc2\xe2 \xc8\xe8 \xa9\xb9 \xaf\xbf \xd5\xf5 \xaa\xba \xac\xbc \xc5\xe5 \xc4\xe4", "ISO-8859-10"},
+       {"สำหรับ", "\xca\xd3\xcb\xc3\u047a", "ISO-8859-11"},
+       {"latviešu", "latvie\xf0u", "ISO-8859-13"},
+       {"Seònaid", "Se\xf2naid", "ISO-8859-14"},
+       {"€1 is cheap", "\xa41 is cheap", "ISO-8859-15"},
+       {"românește", "rom\xe2ne\xbate", "ISO-8859-16"},
+       {"nutraĵo", "nutra\xbco", "ISO-8859-3"},
+       {"Kalâdlit", "Kal\xe2dlit", "ISO-8859-4"},
+       {"русский", "\xe0\xe3\xe1\xe1\xda\xd8\xd9", "ISO-8859-5"},
+       {"ελληνικά", "\xe5\xeb\xeb\xe7\xed\xe9\xea\xdc", "ISO-8859-7"},
+       {"Kağan", "Ka\xf0an", "ISO-8859-9"},
+       {"Résumé", "R\x8esum\x8e", "macintosh"},
+       {"Gdańsk", "Gda\xf1sk", "windows-1250"},
+       {"русский", "\xf0\xf3\xf1\xf1\xea\xe8\xe9", "windows-1251"},
+       {"Résumé", "R\xe9sum\xe9", "windows-1252"},
+       {"ελληνικά", "\xe5\xeb\xeb\xe7\xed\xe9\xea\xdc", "windows-1253"},
+       {"Kağan", "Ka\xf0an", "windows-1254"},
+       {"עִבְרִית", "\xf2\xc4\xe1\xc0\xf8\xc4\xe9\xfa", "windows-1255"},
+       {"العربية", "\xc7\xe1\xda\xd1\xc8\xed\xc9", "windows-1256"},
+       {"latviešu", "latvie\xf0u", "windows-1257"},
+       {"Việt", "Vi\xea\xf2t", "windows-1258"},
+       {"สำหรับ", "\xca\xd3\xcb\xc3\u047a", "windows-874"},
+       {"русский", "\xd2\xd5\xd3\xd3\xcb\xc9\xca", "KOI8-R"},
+       {"українська", "\xd5\xcb\xd2\xc1\xa7\xce\xd3\xd8\xcb\xc1", "KOI8-U"},
+       {"Hello 常用國字標準字體表", "Hello \xb1`\xa5\u03b0\xea\xa6r\xbc\u0437\u01e6r\xc5\xe9\xaa\xed", "big5"},
+       {"Hello 常用國字標準字體表", "Hello \xb3\xa3\xd3\xc3\x87\xf8\xd7\xd6\x98\xcb\x9c\xca\xd7\xd6\xf3\x77\xb1\xed", "gbk"},
+       {"Hello 常用國字標準字體表", "Hello \xb3\xa3\xd3\xc3\x87\xf8\xd7\xd6\x98\xcb\x9c\xca\xd7\xd6\xf3\x77\xb1\xed", "gb18030"},
+       {"עִבְרִית", "\x81\x30\xfb\x30\x81\x30\xf6\x34\x81\x30\xf9\x33\x81\x30\xf6\x30\x81\x30\xfb\x36\x81\x30\xf6\x34\x81\x30\xfa\x31\x81\x30\xfb\x38", "gb18030"},
+       {"㧯", "\x82\x31\x89\x38", "gb18030"},
+       {"これは漢字です。", "\x82\xb1\x82\xea\x82\xcd\x8a\xbf\x8e\x9a\x82\xc5\x82\xb7\x81B", "SJIS"},
+       {"Hello, 世界!", "Hello, \x90\xa2\x8aE!", "SJIS"},
+       {"イウエオカ", "\xb2\xb3\xb4\xb5\xb6", "SJIS"},
+       {"これは漢字です。", "\xa4\xb3\xa4\xec\xa4\u03f4\xc1\xbb\xfa\xa4\u01e4\xb9\xa1\xa3", "EUC-JP"},
+       {"Hello, 世界!", "Hello, \x1b$B@$3&\x1b(B!", "ISO-2022-JP"},
+       {"다음과 같은 조건을 따라야 합니다: 저작자표시", "\xb4\xd9\xc0\xbd\xb0\xfa \xb0\xb0\xc0\xba \xc1\xb6\xb0\xc7\xc0\xbb \xb5\xfb\xb6\xf3\xbe\xdf \xc7մϴ\xd9: \xc0\xfa\xc0\xdb\xc0\xdaǥ\xbd\xc3", "EUC-KR"},
+}
+
+func TestDecode(t *testing.T) {
+       testCases := append(testCases, []testCase{
+               // Replace multi-byte maximum subpart of ill-formed subsequence with
+               // single replacement character (WhatWG requirement).
+               {"Rés\ufffdumé", "Rés\xe1\x80umé", "utf8"},
+       }...)
+       for _, tc := range testCases {
+               e, _ := Lookup(tc.otherEncoding)
+               if e == nil {
+                       t.Errorf("%s: not found", tc.otherEncoding)
+                       continue
+               }
+               s, err := transformString(e.NewDecoder(), tc.other)
+               if err != nil {
+                       t.Errorf("%s: decode %q: %v", tc.otherEncoding, tc.other, err)
+                       continue
+               }
+               if s != tc.utf8 {
+                       t.Errorf("%s: got %q, want %q", tc.otherEncoding, s, tc.utf8)
+               }
+       }
+}
+
+func TestEncode(t *testing.T) {
+       testCases := append(testCases, []testCase{
+               // Use Go-style replacement.
+               {"Rés\xe1\x80umé", "Rés\ufffd\ufffdumé", "utf8"},
+               // U+0144 LATIN SMALL LETTER N WITH ACUTE not supported by encoding.
+               {"Gdańsk", "Gda&#324;sk", "ISO-8859-11"},
+               {"\ufffd", "&#65533;", "ISO-8859-11"},
+               {"a\xe1\x80b", "a&#65533;&#65533;b", "ISO-8859-11"},
+       }...)
+       for _, tc := range testCases {
+               e, _ := Lookup(tc.otherEncoding)
+               if e == nil {
+                       t.Errorf("%s: not found", tc.otherEncoding)
+                       continue
+               }
+               s, err := transformString(e.NewEncoder(), tc.utf8)
+               if err != nil {
+                       t.Errorf("%s: encode %q: %s", tc.otherEncoding, tc.utf8, err)
+                       continue
+               }
+               if s != tc.other {
+                       t.Errorf("%s: got %q, want %q", tc.otherEncoding, s, tc.other)
+               }
+       }
+}
+
+var sniffTestCases = []struct {
+       filename, declared, want string
+}{
+       {"HTTP-charset.html", "text/html; charset=iso-8859-15", "iso-8859-15"},
+       {"UTF-16LE-BOM.html", "", "utf-16le"},
+       {"UTF-16BE-BOM.html", "", "utf-16be"},
+       {"meta-content-attribute.html", "text/html", "iso-8859-15"},
+       {"meta-charset-attribute.html", "text/html", "iso-8859-15"},
+       {"No-encoding-declaration.html", "text/html", "utf-8"},
+       {"HTTP-vs-UTF-8-BOM.html", "text/html; charset=iso-8859-15", "utf-8"},
+       {"HTTP-vs-meta-content.html", "text/html; charset=iso-8859-15", "iso-8859-15"},
+       {"HTTP-vs-meta-charset.html", "text/html; charset=iso-8859-15", "iso-8859-15"},
+       {"UTF-8-BOM-vs-meta-content.html", "text/html", "utf-8"},
+       {"UTF-8-BOM-vs-meta-charset.html", "text/html", "utf-8"},
+}
+
+func TestSniff(t *testing.T) {
+       switch runtime.GOOS {
+       case "nacl": // platforms that don't permit direct file system access
+               t.Skipf("not supported on %q", runtime.GOOS)
+       }
+
+       for _, tc := range sniffTestCases {
+               content, err := os.ReadFile("testdata/" + tc.filename)
+               if err != nil {
+                       t.Errorf("%s: error reading file: %v", tc.filename, err)
+                       continue
+               }
+
+               _, name, _ := DetermineEncoding(content, tc.declared)
+               if name != tc.want {
+                       t.Errorf("%s: got %q, want %q", tc.filename, name, tc.want)
+                       continue
+               }
+       }
+}
+
+func TestReader(t *testing.T) {
+       switch runtime.GOOS {
+       case "nacl": // platforms that don't permit direct file system access
+               t.Skipf("not supported on %q", runtime.GOOS)
+       }
+
+       for _, tc := range sniffTestCases {
+               content, err := os.ReadFile("testdata/" + tc.filename)
+               if err != nil {
+                       t.Errorf("%s: error reading file: %v", tc.filename, err)
+                       continue
+               }
+
+               r, err := NewReader(bytes.NewReader(content), tc.declared)
+               if err != nil {
+                       t.Errorf("%s: error creating reader: %v", tc.filename, err)
+                       continue
+               }
+
+               got, err := io.ReadAll(r)
+               if err != nil {
+                       t.Errorf("%s: error reading from charset.NewReader: %v", tc.filename, err)
+                       continue
+               }
+
+               e, _ := Lookup(tc.want)
+               want, err := io.ReadAll(transform.NewReader(bytes.NewReader(content), e.NewDecoder()))
+               if err != nil {
+                       t.Errorf("%s: error decoding with hard-coded charset name: %v", tc.filename, err)
+                       continue
+               }
+
+               if !bytes.Equal(got, want) {
+                       t.Errorf("%s: got %q, want %q", tc.filename, got, want)
+                       continue
+               }
+       }
+}
+
+var metaTestCases = []struct {
+       meta, want string
+}{
+       {"", ""},
+       {"text/html", ""},
+       {"text/html; charset utf-8", ""},
+       {"text/html; charset=latin-2", "latin-2"},
+       {"text/html; charset; charset = utf-8", "utf-8"},
+       {`charset="big5"`, "big5"},
+       {"charset='shift_jis'", "shift_jis"},
+}
+
+func TestFromMeta(t *testing.T) {
+       for _, tc := range metaTestCases {
+               got := fromMetaElement(tc.meta)
+               if got != tc.want {
+                       t.Errorf("%q: got %q, want %q", tc.meta, got, tc.want)
+               }
+       }
+}
+
+func TestXML(t *testing.T) {
+       const s = "<?xml version=\"1.0\" encoding=\"windows-1252\"?><a><Word>r\xe9sum\xe9</Word></a>"
+
+       d := xml.NewDecoder(strings.NewReader(s))
+       d.CharsetReader = NewReaderLabel
+
+       var a struct {
+               Word string
+       }
+       if err := d.Decode(&a); err != nil {
+               t.Fatalf("Decode: %v", err)
+       }
+
+       want := "résumé"
+       if a.Word != want {
+               t.Errorf("got %q, want %q", a.Word, want)
+       }
+}
diff --git a/internal/html/charset/testdata/HTTP-charset.html b/internal/html/charset/testdata/HTTP-charset.html
new file mode 100644 (file)
index 0000000..9915fa0
--- /dev/null
@@ -0,0 +1,48 @@
+<!DOCTYPE html>
+<html  lang="en" >
+<head>
+  <title>HTTP charset</title>
+<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
+<link rel='help' href='http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream'>
+<link rel="stylesheet" type="text/css" href="./generatedtests.css">
+<script src="http://w3c-test.org/resources/testharness.js"></script>
+<script src="http://w3c-test.org/resources/testharnessreport.js"></script>
+<meta name='flags' content='http'>
+<meta name="assert" content="The character encoding of a page can be set using the HTTP header charset declaration.">
+<style type='text/css'>
+.test div { width: 50px; }</style>
+<link rel="stylesheet" type="text/css" href="the-input-byte-stream/support/encodingtests-15.css">
+</head>
+<body>
+<p class='title'>HTTP charset</p>
+
+
+<div id='log'></div>
+
+
+<div class='test'><div id='box' class='ýäè'>&#xA0;</div></div>
+
+
+
+
+
+<div class='description'>
+<p class="assertion" title="Assertion">The character encoding of a page can be set using the HTTP header charset declaration.</p>
+<div class="notes"><p><p>The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.&#x00C3;&#x0153;&#x00C3;&#x20AC;&#x00C3;&#x0161;</code>. This matches the sequence of bytes above when they are interpreted as ISO 8859-15. If the class name matches the selector then the test will pass.</p><p>The only character encoding declaration for this HTML file is in the HTTP header, which sets the encoding to ISO 8859-15.</p></p>
+</div>
+</div>
+<div class="nexttest"><div><a href="generate?test=the-input-byte-stream-003">Next test</a></div><div class="doctype">HTML5</div>
+<p class="jump">the-input-byte-stream-001<br /><a href="/International/tests/html5/the-input-byte-stream/results-basics#basics" target="_blank">Result summary &amp; related tests</a><br /><a href="http://w3c-test.org/framework/details/i18n-html5/the-input-byte-stream-001" target="_blank">Detailed results for this test</a><br/>       <a href="http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream" target="_blank">Link to spec</a></p>
+<div class='prereq'>Assumptions: <ul><li>The default encoding for the browser you are testing is not set to ISO 8859-15.</li>
+                               <li>The test is read from a server that supports HTTP.</li></ul></div>
+</div>
+<script>
+test(function() {
+assert_equals(document.getElementById('box').offsetWidth, 100);
+}, " ");
+</script>
+
+</body>
+</html>
+
+
diff --git a/internal/html/charset/testdata/HTTP-vs-UTF-8-BOM.html b/internal/html/charset/testdata/HTTP-vs-UTF-8-BOM.html
new file mode 100644 (file)
index 0000000..26e5d8b
--- /dev/null
@@ -0,0 +1,48 @@
+<!DOCTYPE html>
+<html  lang="en" >
+<head>
+  <title>HTTP vs UTF-8 BOM</title>
+<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
+<link rel='help' href='http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream'>
+<link rel="stylesheet" type="text/css" href="./generatedtests.css">
+<script src="http://w3c-test.org/resources/testharness.js"></script>
+<script src="http://w3c-test.org/resources/testharnessreport.js"></script>
+<meta name='flags' content='http'>
+<meta name="assert" content="A character encoding set in the HTTP header has lower precedence than the UTF-8 signature.">
+<style type='text/css'>
+.test div { width: 50px; }</style>
+<link rel="stylesheet" type="text/css" href="the-input-byte-stream/support/encodingtests-utf8.css">
+</head>
+<body>
+<p class='title'>HTTP vs UTF-8 BOM</p>
+
+
+<div id='log'></div>
+
+
+<div class='test'><div id='box' class='ýäè'>&#xA0;</div></div>
+
+
+
+
+
+<div class='description'>
+<p class="assertion" title="Assertion">A character encoding set in the HTTP header has lower precedence than the UTF-8 signature.</p>
+<div class="notes"><p><p>The HTTP header attempts to set the character encoding to ISO 8859-15. The page starts with a UTF-8 signature.</p><p>The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.&#x00FD;&#x00E4;&#x00E8;</code>. This matches the sequence of bytes above when they are interpreted as UTF-8. If the class name matches the selector then the test will pass.</p><p>If the test is unsuccessful, the characters &#x00EF;&#x00BB;&#x00BF; should appear at the top of the page.  These represent the bytes that make up the UTF-8 signature when encountered in the ISO 8859-15 encoding.</p></p>
+</div>
+</div>
+<div class="nexttest"><div><a href="generate?test=the-input-byte-stream-022">Next test</a></div><div class="doctype">HTML5</div>
+<p class="jump">the-input-byte-stream-034<br /><a href="/International/tests/html5/the-input-byte-stream/results-basics#precedence" target="_blank">Result summary &amp; related tests</a><br /><a href="http://w3c-test.org/framework/details/i18n-html5/the-input-byte-stream-034" target="_blank">Detailed results for this test</a><br/>   <a href="http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream" target="_blank">Link to spec</a></p>
+<div class='prereq'>Assumptions: <ul><li>The default encoding for the browser you are testing is not set to ISO 8859-15.</li>
+                               <li>The test is read from a server that supports HTTP.</li></ul></div>
+</div>
+<script>
+test(function() {
+assert_equals(document.getElementById('box').offsetWidth, 100);
+}, " ");
+</script>
+
+</body>
+</html>
+
+
diff --git a/internal/html/charset/testdata/HTTP-vs-meta-charset.html b/internal/html/charset/testdata/HTTP-vs-meta-charset.html
new file mode 100644 (file)
index 0000000..2f07e95
--- /dev/null
@@ -0,0 +1,49 @@
+<!DOCTYPE html>
+<html  lang="en" >
+<head>
+ <meta charset="iso-8859-1" > <title>HTTP vs meta charset</title>
+<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
+<link rel='help' href='http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream'>
+<link rel="stylesheet" type="text/css" href="./generatedtests.css">
+<script src="http://w3c-test.org/resources/testharness.js"></script>
+<script src="http://w3c-test.org/resources/testharnessreport.js"></script>
+<meta name='flags' content='http'>
+<meta name="assert" content="The HTTP header has a higher precedence than an encoding declaration in a meta charset attribute.">
+<style type='text/css'>
+.test div { width: 50px; }.test div { width: 90px; }
+</style>
+<link rel="stylesheet" type="text/css" href="the-input-byte-stream/support/encodingtests-15.css">
+</head>
+<body>
+<p class='title'>HTTP vs meta charset</p>
+
+
+<div id='log'></div>
+
+
+<div class='test'><div id='box' class='ýäè'>&#xA0;</div></div>
+
+
+
+
+
+<div class='description'>
+<p class="assertion" title="Assertion">The HTTP header has a higher precedence than an encoding declaration in a meta charset attribute.</p>
+<div class="notes"><p><p>The HTTP header attempts to set the character encoding to ISO 8859-15. The page contains an encoding declaration in a meta charset attribute that attempts to set the character encoding to ISO 8859-1.</p><p>The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.&#x00C3;&#x0153;&#x00C3;&#x20AC;&#x00C3;&#x0161;</code>. This matches the sequence of bytes above when they are interpreted as ISO 8859-15. If the class name matches the selector then the test will pass.</p></p>
+</div>
+</div>
+<div class="nexttest"><div><a href="generate?test=the-input-byte-stream-037">Next test</a></div><div class="doctype">HTML5</div>
+<p class="jump">the-input-byte-stream-018<br /><a href="/International/tests/html5/the-input-byte-stream/results-basics#precedence" target="_blank">Result summary &amp; related tests</a><br /><a href="http://w3c-test.org/framework/details/i18n-html5/the-input-byte-stream-018" target="_blank">Detailed results for this test</a><br/>   <a href="http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream" target="_blank">Link to spec</a></p>
+<div class='prereq'>Assumptions: <ul><li>The default encoding for the browser you are testing is not set to ISO 8859-15.</li>
+                               <li>The test is read from a server that supports HTTP.</li></ul></div>
+</div>
+<script>
+test(function() {
+assert_equals(document.getElementById('box').offsetWidth, 100);
+}, " ");
+</script>
+
+</body>
+</html>
+
+
diff --git a/internal/html/charset/testdata/HTTP-vs-meta-content.html b/internal/html/charset/testdata/HTTP-vs-meta-content.html
new file mode 100644 (file)
index 0000000..6853cdd
--- /dev/null
@@ -0,0 +1,49 @@
+<!DOCTYPE html>
+<html  lang="en" >
+<head>
+ <meta http-equiv="content-type" content="text/html;charset=iso-8859-1" > <title>HTTP vs meta content</title>
+<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
+<link rel='help' href='http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream'>
+<link rel="stylesheet" type="text/css" href="./generatedtests.css">
+<script src="http://w3c-test.org/resources/testharness.js"></script>
+<script src="http://w3c-test.org/resources/testharnessreport.js"></script>
+<meta name='flags' content='http'>
+<meta name="assert" content="The HTTP header has a higher precedence than an encoding declaration in a meta content attribute.">
+<style type='text/css'>
+.test div { width: 50px; }.test div { width: 90px; }
+</style>
+<link rel="stylesheet" type="text/css" href="the-input-byte-stream/support/encodingtests-15.css">
+</head>
+<body>
+<p class='title'>HTTP vs meta content</p>
+
+
+<div id='log'></div>
+
+
+<div class='test'><div id='box' class='ýäè'>&#xA0;</div></div>
+
+
+
+
+
+<div class='description'>
+<p class="assertion" title="Assertion">The HTTP header has a higher precedence than an encoding declaration in a meta content attribute.</p>
+<div class="notes"><p><p>The HTTP header attempts to set the character encoding to ISO 8859-15. The page contains an encoding declaration in a meta content attribute that attempts to set the character encoding to ISO 8859-1.</p><p>The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.&#x00C3;&#x0153;&#x00C3;&#x20AC;&#x00C3;&#x0161;</code>. This matches the sequence of bytes above when they are interpreted as ISO 8859-15. If the class name matches the selector then the test will pass.</p></p>
+</div>
+</div>
+<div class="nexttest"><div><a href="generate?test=the-input-byte-stream-018">Next test</a></div><div class="doctype">HTML5</div>
+<p class="jump">the-input-byte-stream-016<br /><a href="/International/tests/html5/the-input-byte-stream/results-basics#precedence" target="_blank">Result summary &amp; related tests</a><br /><a href="http://w3c-test.org/framework/details/i18n-html5/the-input-byte-stream-016" target="_blank">Detailed results for this test</a><br/>   <a href="http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream" target="_blank">Link to spec</a></p>
+<div class='prereq'>Assumptions: <ul><li>The default encoding for the browser you are testing is not set to ISO 8859-15.</li>
+                               <li>The test is read from a server that supports HTTP.</li></ul></div>
+</div>
+<script>
+test(function() {
+assert_equals(document.getElementById('box').offsetWidth, 100);
+}, " ");
+</script>
+
+</body>
+</html>
+
+
diff --git a/internal/html/charset/testdata/No-encoding-declaration.html b/internal/html/charset/testdata/No-encoding-declaration.html
new file mode 100644 (file)
index 0000000..612e26c
--- /dev/null
@@ -0,0 +1,47 @@
+<!DOCTYPE html>
+<html  lang="en" >
+<head>
+  <title>No encoding declaration</title>
+<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
+<link rel='help' href='http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream'>
+<link rel="stylesheet" type="text/css" href="./generatedtests.css">
+<script src="http://w3c-test.org/resources/testharness.js"></script>
+<script src="http://w3c-test.org/resources/testharnessreport.js"></script>
+<meta name='flags' content='http'>
+<meta name="assert" content="A page with no encoding information in HTTP, BOM, XML declaration or meta element will be treated as UTF-8.">
+<style type='text/css'>
+.test div { width: 50px; }</style>
+<link rel="stylesheet" type="text/css" href="the-input-byte-stream/support/encodingtests-utf8.css">
+</head>
+<body>
+<p class='title'>No encoding declaration</p>
+
+
+<div id='log'></div>
+
+
+<div class='test'><div id='box' class='ýäè'>&#xA0;</div></div>
+
+
+
+
+
+<div class='description'>
+<p class="assertion" title="Assertion">A page with no encoding information in HTTP, BOM, XML declaration or meta element will be treated as UTF-8.</p>
+<div class="notes"><p><p>The test on this page contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.&#x00FD;&#x00E4;&#x00E8;</code>. This matches the sequence of bytes above when they are interpreted as UTF-8. If the class name matches the selector then the test will pass.</p></p>
+</div>
+</div>
+<div class="nexttest"><div><a href="generate?test=the-input-byte-stream-034">Next test</a></div><div class="doctype">HTML5</div>
+<p class="jump">the-input-byte-stream-015<br /><a href="/International/tests/html5/the-input-byte-stream/results-basics#basics" target="_blank">Result summary &amp; related tests</a><br /><a href="http://w3c-test.org/framework/details/i18n-html5/the-input-byte-stream-015" target="_blank">Detailed results for this test</a><br/>       <a href="http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream" target="_blank">Link to spec</a></p>
+<div class='prereq'>Assumptions: <ul><li>The test is read from a server that supports HTTP.</li></ul></div>
+</div>
+<script>
+test(function() {
+assert_equals(document.getElementById('box').offsetWidth, 100);
+}, " ");
+</script>
+
+</body>
+</html>
+
+
diff --git a/internal/html/charset/testdata/README b/internal/html/charset/testdata/README
new file mode 100644 (file)
index 0000000..38ef0f9
--- /dev/null
@@ -0,0 +1,9 @@
+These test cases come from
+http://www.w3.org/International/tests/repository/html5/the-input-byte-stream/results-basics
+
+Distributed under both the W3C Test Suite License
+(http://www.w3.org/Consortium/Legal/2008/04-testsuite-license)
+and the W3C 3-clause BSD License
+(http://www.w3.org/Consortium/Legal/2008/03-bsd-license).
+To contribute to a W3C Test Suite, see the policies and contribution
+forms (http://www.w3.org/2004/10/27-testcases).
diff --git a/internal/html/charset/testdata/UTF-16BE-BOM.html b/internal/html/charset/testdata/UTF-16BE-BOM.html
new file mode 100644 (file)
index 0000000..3abf7a9
Binary files /dev/null and b/internal/html/charset/testdata/UTF-16BE-BOM.html differ
diff --git a/internal/html/charset/testdata/UTF-16LE-BOM.html b/internal/html/charset/testdata/UTF-16LE-BOM.html
new file mode 100644 (file)
index 0000000..76254c9
Binary files /dev/null and b/internal/html/charset/testdata/UTF-16LE-BOM.html differ
diff --git a/internal/html/charset/testdata/UTF-8-BOM-vs-meta-charset.html b/internal/html/charset/testdata/UTF-8-BOM-vs-meta-charset.html
new file mode 100644 (file)
index 0000000..83de433
--- /dev/null
@@ -0,0 +1,49 @@
+<!DOCTYPE html>
+<html  lang="en" >
+<head>
+ <meta charset="iso-8859-15"> <title>UTF-8 BOM vs meta charset</title>
+<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
+<link rel='help' href='http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream'>
+<link rel="stylesheet" type="text/css" href="./generatedtests.css">
+<script src="http://w3c-test.org/resources/testharness.js"></script>
+<script src="http://w3c-test.org/resources/testharnessreport.js"></script>
+<meta name='flags' content='http'>
+<meta name="assert" content="A page with a UTF-8 BOM will be recognized as UTF-8 even if the meta charset attribute declares a different encoding.">
+<style type='text/css'>
+.test div { width: 50px; }.test div { width: 90px; }
+</style>
+<link rel="stylesheet" type="text/css" href="the-input-byte-stream/support/encodingtests-utf8.css">
+</head>
+<body>
+<p class='title'>UTF-8 BOM vs meta charset</p>
+
+
+<div id='log'></div>
+
+
+<div class='test'><div id='box' class='ýäè'>&#xA0;</div></div>
+
+
+
+
+
+<div class='description'>
+<p class="assertion" title="Assertion">A page with a UTF-8 BOM will be recognized as UTF-8 even if the meta charset attribute declares a different encoding.</p>
+<div class="notes"><p><p>The page contains an encoding declaration in a meta charset attribute that attempts to set the character encoding to ISO 8859-15, but the file starts with a UTF-8 signature.</p><p>The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.&#x00FD;&#x00E4;&#x00E8;</code>. This matches the sequence of bytes above when they are interpreted as UTF-8. If the class name matches the selector then the test will pass.</p></p>
+</div>
+</div>
+<div class="nexttest"><div><a href="generate?test=the-input-byte-stream-024">Next test</a></div><div class="doctype">HTML5</div>
+<p class="jump">the-input-byte-stream-038<br /><a href="/International/tests/html5/the-input-byte-stream/results-basics#precedence" target="_blank">Result summary &amp; related tests</a><br /><a href="http://w3c-test.org/framework/details/i18n-html5/the-input-byte-stream-038" target="_blank">Detailed results for this test</a><br/>   <a href="http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream" target="_blank">Link to spec</a></p>
+<div class='prereq'>Assumptions: <ul><li>The default encoding for the browser you are testing is not set to ISO 8859-15.</li>
+                               <li>The test is read from a server that supports HTTP.</li></ul></div>
+</div>
+<script>
+test(function() {
+assert_equals(document.getElementById('box').offsetWidth, 100);
+}, " ");
+</script>
+
+</body>
+</html>
+
+
diff --git a/internal/html/charset/testdata/UTF-8-BOM-vs-meta-content.html b/internal/html/charset/testdata/UTF-8-BOM-vs-meta-content.html
new file mode 100644 (file)
index 0000000..501aac2
--- /dev/null
@@ -0,0 +1,48 @@
+<!DOCTYPE html>
+<html  lang="en" >
+<head>
+ <meta http-equiv="content-type" content="text/html; charset=iso-8859-15"> <title>UTF-8 BOM vs meta content</title>
+<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
+<link rel='help' href='http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream'>
+<link rel="stylesheet" type="text/css" href="./generatedtests.css">
+<script src="http://w3c-test.org/resources/testharness.js"></script>
+<script src="http://w3c-test.org/resources/testharnessreport.js"></script>
+<meta name='flags' content='http'>
+<meta name="assert" content="A page with a UTF-8 BOM will be recognized as UTF-8 even if the meta content attribute declares a different encoding.">
+<style type='text/css'>
+.test div { width: 50px; }</style>
+<link rel="stylesheet" type="text/css" href="the-input-byte-stream/support/encodingtests-utf8.css">
+</head>
+<body>
+<p class='title'>UTF-8 BOM vs meta content</p>
+
+
+<div id='log'></div>
+
+
+<div class='test'><div id='box' class='ýäè'>&#xA0;</div></div>
+
+
+
+
+
+<div class='description'>
+<p class="assertion" title="Assertion">A page with a UTF-8 BOM will be recognized as UTF-8 even if the meta content attribute declares a different encoding.</p>
+<div class="notes"><p><p>The page contains an encoding declaration in a meta content attribute that attempts to set the character encoding to ISO 8859-15, but the file starts with a UTF-8 signature.</p><p>The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.&#x00FD;&#x00E4;&#x00E8;</code>. This matches the sequence of bytes above when they are interpreted as UTF-8. If the class name matches the selector then the test will pass.</p></p>
+</div>
+</div>
+<div class="nexttest"><div><a href="generate?test=the-input-byte-stream-038">Next test</a></div><div class="doctype">HTML5</div>
+<p class="jump">the-input-byte-stream-037<br /><a href="/International/tests/html5/the-input-byte-stream/results-basics#precedence" target="_blank">Result summary &amp; related tests</a><br /><a href="http://w3c-test.org/framework/details/i18n-html5/the-input-byte-stream-037" target="_blank">Detailed results for this test</a><br/>   <a href="http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream" target="_blank">Link to spec</a></p>
+<div class='prereq'>Assumptions: <ul><li>The default encoding for the browser you are testing is not set to ISO 8859-15.</li>
+                               <li>The test is read from a server that supports HTTP.</li></ul></div>
+</div>
+<script>
+test(function() {
+assert_equals(document.getElementById('box').offsetWidth, 100);
+}, " ");
+</script>
+
+</body>
+</html>
+
+
diff --git a/internal/html/charset/testdata/meta-charset-attribute.html b/internal/html/charset/testdata/meta-charset-attribute.html
new file mode 100644 (file)
index 0000000..2d7d25a
--- /dev/null
@@ -0,0 +1,48 @@
+<!DOCTYPE html>
+<html  lang="en" >
+<head>
+ <meta charset="iso-8859-15"> <title>meta charset attribute</title>
+<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
+<link rel='help' href='http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream'>
+<link rel="stylesheet" type="text/css" href="./generatedtests.css">
+<script src="http://w3c-test.org/resources/testharness.js"></script>
+<script src="http://w3c-test.org/resources/testharnessreport.js"></script>
+<meta name='flags' content='http'>
+<meta name="assert" content="The character encoding of the page can be set by a meta element with charset attribute.">
+<style type='text/css'>
+.test div { width: 50px; }</style>
+<link rel="stylesheet" type="text/css" href="the-input-byte-stream/support/encodingtests-15.css">
+</head>
+<body>
+<p class='title'>meta charset attribute</p>
+
+
+<div id='log'></div>
+
+
+<div class='test'><div id='box' class='ýäè'>&#xA0;</div></div>
+
+
+
+
+
+<div class='description'>
+<p class="assertion" title="Assertion">The character encoding of the page can be set by a meta element with charset attribute.</p>
+<div class="notes"><p><p>The only character encoding declaration for this HTML file is in the charset attribute of the meta element, which declares the encoding to be ISO 8859-15.</p><p>The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.&#x00C3;&#x0153;&#x00C3;&#x20AC;&#x00C3;&#x0161;</code>. This matches the sequence of bytes above when they are interpreted as ISO 8859-15. If the class name matches the selector then the test will pass.</p></p>
+</div>
+</div>
+<div class="nexttest"><div><a href="generate?test=the-input-byte-stream-015">Next test</a></div><div class="doctype">HTML5</div>
+<p class="jump">the-input-byte-stream-009<br /><a href="/International/tests/html5/the-input-byte-stream/results-basics#basics" target="_blank">Result summary &amp; related tests</a><br /><a href="http://w3c-test.org/framework/details/i18n-html5/the-input-byte-stream-009" target="_blank">Detailed results for this test</a><br/>       <a href="http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream" target="_blank">Link to spec</a></p>
+<div class='prereq'>Assumptions: <ul><li>The default encoding for the browser you are testing is not set to ISO 8859-15.</li>
+                               <li>The test is read from a server that supports HTTP.</li></ul></div>
+</div>
+<script>
+test(function() {
+assert_equals(document.getElementById('box').offsetWidth, 100);
+}, " ");
+</script>
+
+</body>
+</html>
+
+
diff --git a/internal/html/charset/testdata/meta-content-attribute.html b/internal/html/charset/testdata/meta-content-attribute.html
new file mode 100644 (file)
index 0000000..1c3f228
--- /dev/null
@@ -0,0 +1,48 @@
+<!DOCTYPE html>
+<html  lang="en" >
+<head>
+ <meta http-equiv="content-type" content="text/html; charset=iso-8859-15"> <title>meta content attribute</title>
+<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
+<link rel='help' href='http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream'>
+<link rel="stylesheet" type="text/css" href="./generatedtests.css">
+<script src="http://w3c-test.org/resources/testharness.js"></script>
+<script src="http://w3c-test.org/resources/testharnessreport.js"></script>
+<meta name='flags' content='http'>
+<meta name="assert" content="The character encoding of the page can be set by a meta element with http-equiv and content attributes.">
+<style type='text/css'>
+.test div { width: 50px; }</style>
+<link rel="stylesheet" type="text/css" href="the-input-byte-stream/support/encodingtests-15.css">
+</head>
+<body>
+<p class='title'>meta content attribute</p>
+
+
+<div id='log'></div>
+
+
+<div class='test'><div id='box' class='ýäè'>&#xA0;</div></div>
+
+
+
+
+
+<div class='description'>
+<p class="assertion" title="Assertion">The character encoding of the page can be set by a meta element with http-equiv and content attributes.</p>
+<div class="notes"><p><p>The only character encoding declaration for this HTML file is in the content attribute of the meta element, which declares the encoding to be ISO 8859-15.</p><p>The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.&#x00C3;&#x0153;&#x00C3;&#x20AC;&#x00C3;&#x0161;</code>. This matches the sequence of bytes above when they are interpreted as ISO 8859-15. If the class name matches the selector then the test will pass.</p></p>
+</div>
+</div>
+<div class="nexttest"><div><a href="generate?test=the-input-byte-stream-009">Next test</a></div><div class="doctype">HTML5</div>
+<p class="jump">the-input-byte-stream-007<br /><a href="/International/tests/html5/the-input-byte-stream/results-basics#basics" target="_blank">Result summary &amp; related tests</a><br /><a href="http://w3c-test.org/framework/details/i18n-html5/the-input-byte-stream-007" target="_blank">Detailed results for this test</a><br/>       <a href="http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream" target="_blank">Link to spec</a></p>
+<div class='prereq'>Assumptions: <ul><li>The default encoding for the browser you are testing is not set to ISO 8859-15.</li>
+                               <li>The test is read from a server that supports HTTP.</li></ul></div>
+</div>
+<script>
+test(function() {
+assert_equals(document.getElementById('box').offsetWidth, 100);
+}, " ");
+</script>
+
+</body>
+</html>
+
+
diff --git a/internal/html/comment_test.go b/internal/html/comment_test.go
new file mode 100644 (file)
index 0000000..fd47de8
--- /dev/null
@@ -0,0 +1,291 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+       "bytes"
+       "strings"
+       "testing"
+)
+
+// TestComments exhaustively tests every 'interesting' N-byte string is
+// correctly parsed as a comment. N ranges from 4+1 to 4+maxSuffixLen
+// inclusive. 4 is the length of the "<!--" prefix that starts an HTML comment.
+//
+// 'Interesting' means that the N-4 byte suffix consists entirely of bytes
+// sampled from the interestingCommentBytes const string, below. These cover
+// all of the possible state transitions from comment-related parser states, as
+// listed in the HTML spec (https://html.spec.whatwg.org/#comment-start-state
+// and subsequent sections).
+//
+// The spec is written as an explicit state machine that, as a side effect,
+// accumulates "the comment token's data" to a separate buffer.
+// Tokenizer.readComment in this package does not have an explicit state
+// machine and usually returns the comment text as a sub-slice of the input,
+// between the opening '<' and closing '>' or EOF. This test confirms that the
+// two algorithms match.
+func TestComments(t *testing.T) {
+       const prefix = "<!--"
+       const maxSuffixLen = 6
+       buffer := make([]byte, 0, len(prefix)+maxSuffixLen)
+       testAllComments(t, append(buffer, prefix...))
+}
+
+// NUL isn't in this list, even though the HTML spec sections 13.2.5.43 -
+// 13.2.5.52 mentions it. It's not interesting in terms of state transitions.
+// It's equivalent to any other non-interesting byte (other than being replaced
+// by U+FFFD REPLACEMENT CHARACTER).
+//
+// EOF isn't in this list. The HTML spec treats EOF as "an input character" but
+// testOneComment below breaks the loop instead.
+//
+// 'x' represents all other "non-interesting" comment bytes.
+var interestingCommentBytes = [...]byte{
+       '!', '-', '<', '>', 'x',
+}
+
+// testAllComments recursively fills in buffer[len(buffer):cap(buffer)] with
+// interesting bytes and then tests that this package's tokenization matches
+// the HTML spec.
+//
+// Precondition: len(buffer) < cap(buffer)
+// Precondition: string(buffer[:4]) == "<!--"
+func testAllComments(t *testing.T, buffer []byte) {
+       for _, interesting := range interestingCommentBytes {
+               b := append(buffer, interesting)
+               testOneComment(t, b)
+               if len(b) < cap(b) {
+                       testAllComments(t, b)
+               }
+       }
+}
+
+func testOneComment(t *testing.T, b []byte) {
+       z := NewTokenizer(bytes.NewReader(b))
+       if next := z.Next(); next != CommentToken {
+               t.Fatalf("Next(%q): got %v, want %v", b, next, CommentToken)
+       }
+       gotRemainder := string(b[len(z.Raw()):])
+       gotComment := string(z.Text())
+
+       i := len("<!--")
+       wantBuffer := []byte(nil)
+loop:
+       for state := 43; ; {
+               // Consume the next input character, handling EOF.
+               if i >= len(b) {
+                       break
+               }
+               nextInputCharacter := b[i]
+               i++
+
+               switch state {
+               case 43: // 13.2.5.43 Comment start state.
+                       switch nextInputCharacter {
+                       case '-':
+                               state = 44
+                       case '>':
+                               break loop
+                       default:
+                               i-- // Reconsume.
+                               state = 45
+                       }
+
+               case 44: // 13.2.5.44 Comment start dash state.
+                       switch nextInputCharacter {
+                       case '-':
+                               state = 51
+                       case '>':
+                               break loop
+                       default:
+                               wantBuffer = append(wantBuffer, '-')
+                               i-- // Reconsume.
+                               state = 45
+                       }
+
+               case 45: // 13.2.5.45 Comment state.
+                       switch nextInputCharacter {
+                       case '-':
+                               state = 50
+                       case '<':
+                               wantBuffer = append(wantBuffer, '<')
+                               state = 46
+                       default:
+                               wantBuffer = append(wantBuffer, nextInputCharacter)
+                       }
+
+               case 46: // 13.2.5.46 Comment less-than sign state.
+                       switch nextInputCharacter {
+                       case '!':
+                               wantBuffer = append(wantBuffer, '!')
+                               state = 47
+                       case '<':
+                               wantBuffer = append(wantBuffer, '<')
+                               state = 46
+                       default:
+                               i-- // Reconsume.
+                               state = 45
+                       }
+
+               case 47: // 13.2.5.47 Comment less-than sign bang state.
+                       switch nextInputCharacter {
+                       case '-':
+                               state = 48
+                       default:
+                               i-- // Reconsume.
+                               state = 45
+                       }
+
+               case 48: // 13.2.5.48 Comment less-than sign bang dash state.
+                       switch nextInputCharacter {
+                       case '-':
+                               state = 49
+                       default:
+                               i-- // Reconsume.
+                               state = 50
+                       }
+
+               case 49: // 13.2.5.49 Comment less-than sign bang dash dash state.
+                       switch nextInputCharacter {
+                       case '>':
+                               break loop
+                       default:
+                               i-- // Reconsume.
+                               state = 51
+                       }
+
+               case 50: // 13.2.5.50 Comment end dash state.
+                       switch nextInputCharacter {
+                       case '-':
+                               state = 51
+                       default:
+                               wantBuffer = append(wantBuffer, '-')
+                               i-- // Reconsume.
+                               state = 45
+                       }
+
+               case 51: // 13.2.5.51 Comment end state.
+                       switch nextInputCharacter {
+                       case '!':
+                               state = 52
+                       case '-':
+                               wantBuffer = append(wantBuffer, '-')
+                       case '>':
+                               break loop
+                       default:
+                               wantBuffer = append(wantBuffer, "--"...)
+                               i-- // Reconsume.
+                               state = 45
+                       }
+
+               case 52: // 13.2.5.52 Comment end bang state.
+                       switch nextInputCharacter {
+                       case '-':
+                               wantBuffer = append(wantBuffer, "--!"...)
+                               state = 50
+                       case '>':
+                               break loop
+                       default:
+                               wantBuffer = append(wantBuffer, "--!"...)
+                               i-- // Reconsume.
+                               state = 45
+                       }
+
+               default:
+                       t.Fatalf("input=%q: unexpected state %d", b, state)
+               }
+       }
+
+       wantRemainder := ""
+       if i < len(b) {
+               wantRemainder = string(b[i:])
+       }
+       wantComment := string(wantBuffer)
+       if (gotComment != wantComment) || (gotRemainder != wantRemainder) {
+               t.Errorf("input=%q\ngot:  %q + %q\nwant: %q + %q",
+                       b, gotComment, gotRemainder, wantComment, wantRemainder)
+               return
+       }
+
+       // suffix is the "N-4 byte suffix" per the TestComments comment.
+       suffix := string(b[4:])
+
+       // Test that a round trip, rendering (escaped) and re-parsing, of a comment
+       // token (with that suffix as the Token.Data) preserves that string.
+       tok := Token{
+               Type: CommentToken,
+               Data: suffix,
+       }
+       z2 := NewTokenizer(strings.NewReader(tok.String()))
+       if next := z2.Next(); next != CommentToken {
+               t.Fatalf("round-trip Next(%q): got %v, want %v", suffix, next, CommentToken)
+       }
+       gotComment2 := string(z2.Text())
+       if gotComment2 != suffix {
+               t.Errorf("round-trip\ngot:  %q\nwant: %q", gotComment2, suffix)
+               return
+       }
+}
+
+// This table below summarizes the HTML-comment-related state machine from
+// 13.2.5.43 "Comment start state" and subsequent sections.
+// https://html.spec.whatwg.org/#comment-start-state
+//
+// Get to state 13.2.5.43 after seeing "<!--". Specifically, starting from the
+// initial 13.2.5.1 "Data state":
+//   - "<"  moves to 13.2.5.6  "Tag open state",
+//   - "!"  moves to 13.2.5.42 "Markup declaration open state",
+//   - "--" moves to 13.2.5.43 "Comment start state".
+// Each of these transitions are the only way to get to the 6/42/43 states.
+//
+// State   !         -         <         >         NUL       EOF       default   HTML spec section
+// 43      ...       s44       ...       s01.T.E0  ...       ...       r45       13.2.5.43 Comment start state
+// 44      ...       s51       ...       s01.T.E0  ...       T.Z.E1    r45.A-    13.2.5.44 Comment start dash state
+// 45      ...       s50       s46.A<    ...       t45.A?.E2 T.Z.E1    t45.Ax    13.2.5.45 Comment state
+// 46      s47.A!    ...       t46.A<    ...       ...       ...       r45       13.2.5.46 Comment less-than sign state
+// 47      ...       s48       ...       ...       ...       ...       r45       13.2.5.47 Comment less-than sign bang state
+// 48      ...       s49       ...       ...       ...       ...       r50       13.2.5.48 Comment less-than sign bang dash state
+// 49      ...       ...       ...       s01.T     ...       T.Z.E1    r51.E3    13.2.5.49 Comment less-than sign bang dash dash state
+// 50      ...       s51       ...       ...       ...       T.Z.E1    r45.A-    13.2.5.50 Comment end dash state
+// 51      s52       t51.A-    ...       s01.T     ...       T.Z.E1    r45.A--   13.2.5.51 Comment end state
+// 52      ...       s50.A--!  ...       s01.T.E4  ...       T.Z.E1    r45.A--!  13.2.5.52 Comment end bang state
+//
+// State 43 is the "Comment start state" meaning that we've only seen "<!--"
+// and nothing else. Similarly, state 44 means that we've only seen "<!---",
+// with three dashes, and nothing else. For the other states, we deduce
+// (working backwards) that the immediate prior input must be:
+//   - 45  something that's not '-'
+//   - 46  "<"
+//   - 47  "<!"
+//   - 48  "<!-"
+//   - 49  "<!--"  not including the opening "<!--"
+//   - 50  "-"     not including the opening "<!--" and also not "--"
+//   - 51  "--"    not including the opening "<!--"
+//   - 52  "--!"
+//
+// The table cell actions:
+//   - ...   do the default action
+//   - A!    append "!"      to the comment token's data.
+//   - A-    append "-"      to the comment token's data.
+//   - A--   append "--"     to the comment token's data.
+//   - A--!  append "--!"    to the comment token's data.
+//   - A<    append "<"      to the comment token's data.
+//   - A?    append "\uFFFD" to the comment token's data.
+//   - Ax    append the current input character to the comment token's data.
+//   - E0    parse error (abrupt-closing-of-empty-comment).
+//   - E1    parse error (eof-in-comment).
+//   - E2    parse error (unexpected-null-character).
+//   - E3    parse error (nested-comment).
+//   - E4    parse error (incorrectly-closed-comment).
+//   - T     emit the current comment token.
+//   - Z     emit an end-of-file token.
+//   - rNN   reconsume in the 13.2.5.NN     state (after any A* or E* operations).
+//   - s01   switch to the    13.2.5.1 Data state (after any A* or E* operations).
+//   - sNN   switch to the    13.2.5.NN     state (after any A* or E* operations).
+//   - tNN   stay in the      13.2.5.NN     state (after any A* or E* operations).
+//
+// The E* actions are called errors in the HTML spec but they are not fatal
+// (https://html.spec.whatwg.org/#parse-errors says "may [but not must] abort
+// the parser"). They are warnings that, in practice, browsers simply ignore.
diff --git a/internal/html/const.go b/internal/html/const.go
new file mode 100644 (file)
index 0000000..ff7acf2
--- /dev/null
@@ -0,0 +1,111 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+// Section 12.2.4.2 of the HTML5 specification says "The following elements
+// have varying levels of special parsing rules".
+// https://html.spec.whatwg.org/multipage/syntax.html#the-stack-of-open-elements
+var isSpecialElementMap = map[string]bool{
+       "address":    true,
+       "applet":     true,
+       "area":       true,
+       "article":    true,
+       "aside":      true,
+       "base":       true,
+       "basefont":   true,
+       "bgsound":    true,
+       "blockquote": true,
+       "body":       true,
+       "br":         true,
+       "button":     true,
+       "caption":    true,
+       "center":     true,
+       "col":        true,
+       "colgroup":   true,
+       "dd":         true,
+       "details":    true,
+       "dir":        true,
+       "div":        true,
+       "dl":         true,
+       "dt":         true,
+       "embed":      true,
+       "fieldset":   true,
+       "figcaption": true,
+       "figure":     true,
+       "footer":     true,
+       "form":       true,
+       "frame":      true,
+       "frameset":   true,
+       "h1":         true,
+       "h2":         true,
+       "h3":         true,
+       "h4":         true,
+       "h5":         true,
+       "h6":         true,
+       "head":       true,
+       "header":     true,
+       "hgroup":     true,
+       "hr":         true,
+       "html":       true,
+       "iframe":     true,
+       "img":        true,
+       "input":      true,
+       "keygen":     true, // "keygen" has been removed from the spec, but are kept here for backwards compatibility.
+       "li":         true,
+       "link":       true,
+       "listing":    true,
+       "main":       true,
+       "marquee":    true,
+       "menu":       true,
+       "meta":       true,
+       "nav":        true,
+       "noembed":    true,
+       "noframes":   true,
+       "noscript":   true,
+       "object":     true,
+       "ol":         true,
+       "p":          true,
+       "param":      true,
+       "plaintext":  true,
+       "pre":        true,
+       "script":     true,
+       "section":    true,
+       "select":     true,
+       "source":     true,
+       "style":      true,
+       "summary":    true,
+       "table":      true,
+       "tbody":      true,
+       "td":         true,
+       "template":   true,
+       "textarea":   true,
+       "tfoot":      true,
+       "th":         true,
+       "thead":      true,
+       "title":      true,
+       "tr":         true,
+       "track":      true,
+       "ul":         true,
+       "wbr":        true,
+       "xmp":        true,
+}
+
+func isSpecialElement(element *Node) bool {
+       switch element.Namespace {
+       case "", "html":
+               return isSpecialElementMap[element.Data]
+       case "math":
+               switch element.Data {
+               case "mi", "mo", "mn", "ms", "mtext", "annotation-xml":
+                       return true
+               }
+       case "svg":
+               switch element.Data {
+               case "foreignObject", "desc", "title":
+                       return true
+               }
+       }
+       return false
+}
diff --git a/internal/html/doc.go b/internal/html/doc.go
new file mode 100644 (file)
index 0000000..850c47f
--- /dev/null
@@ -0,0 +1,127 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+Package html implements an HTML5-compliant tokenizer and parser.
+
+Tokenization is done by creating a Tokenizer for an io.Reader r. It is the
+caller's responsibility to ensure that r provides UTF-8 encoded HTML.
+
+       z := html.NewTokenizer(r)
+
+Given a Tokenizer z, the HTML is tokenized by repeatedly calling z.Next(),
+which parses the next token and returns its type, or an error:
+
+       for {
+               tt := z.Next()
+               if tt == html.ErrorToken {
+                       // ...
+                       return ...
+               }
+               // Process the current token.
+       }
+
+There are two APIs for retrieving the current token. The high-level API is to
+call Token; the low-level API is to call Text or TagName / TagAttr. Both APIs
+allow optionally calling Raw after Next but before Token, Text, TagName, or
+TagAttr. In EBNF notation, the valid call sequence per token is:
+
+       Next {Raw} [ Token | Text | TagName {TagAttr} ]
+
+Token returns an independent data structure that completely describes a token.
+Entities (such as "&lt;") are unescaped, tag names and attribute keys are
+lower-cased, and attributes are collected into a []Attribute. For example:
+
+       for {
+               if z.Next() == html.ErrorToken {
+                       // Returning io.EOF indicates success.
+                       return z.Err()
+               }
+               emitToken(z.Token())
+       }
+
+The low-level API performs fewer allocations and copies, but the contents of
+the []byte values returned by Text, TagName and TagAttr may change on the next
+call to Next. For example, to extract an HTML page's anchor text:
+
+       depth := 0
+       for {
+               tt := z.Next()
+               switch tt {
+               case html.ErrorToken:
+                       return z.Err()
+               case html.TextToken:
+                       if depth > 0 {
+                               // emitBytes should copy the []byte it receives,
+                               // if it doesn't process it immediately.
+                               emitBytes(z.Text())
+                       }
+               case html.StartTagToken, html.EndTagToken:
+                       tn, _ := z.TagName()
+                       if len(tn) == 1 && tn[0] == 'a' {
+                               if tt == html.StartTagToken {
+                                       depth++
+                               } else {
+                                       depth--
+                               }
+                       }
+               }
+       }
+
+Parsing is done by calling Parse with an io.Reader, which returns the root of
+the parse tree (the document element) as a *Node. It is the caller's
+responsibility to ensure that the Reader provides UTF-8 encoded HTML. For
+example, to process each anchor node in depth-first order:
+
+       doc, err := html.Parse(r)
+       if err != nil {
+               // ...
+       }
+       var f func(*html.Node)
+       f = func(n *html.Node) {
+               if n.Type == html.ElementNode && n.Data == "a" {
+                       // Do something with n...
+               }
+               for c := n.FirstChild; c != nil; c = c.NextSibling {
+                       f(c)
+               }
+       }
+       f(doc)
+
+The relevant specifications include:
+https://html.spec.whatwg.org/multipage/syntax.html and
+https://html.spec.whatwg.org/multipage/syntax.html#tokenization
+
+# Security Considerations
+
+Care should be taken when parsing and interpreting HTML, whether full documents
+or fragments, within the framework of the HTML specification, especially with
+regard to untrusted inputs.
+
+This package provides both a tokenizer and a parser, which implement the
+tokenization, and tokenization and tree construction stages of the WHATWG HTML
+parsing specification respectively. While the tokenizer parses and normalizes
+individual HTML tokens, only the parser constructs the DOM tree from the
+tokenized HTML, as described in the tree construction stage of the
+specification, dynamically modifying or extending the document's DOM tree.
+
+If your use case requires semantically well-formed HTML documents, as defined by
+the WHATWG specification, the parser should be used rather than the tokenizer.
+
+In security contexts, if trust decisions are being made using the tokenized or
+parsed content, the input must be re-serialized (for instance by using Render or
+Token.String) in order for those trust decisions to hold, as the process of
+tokenization or parsing may alter the content.
+*/
+package html // import "git.earlybird.gay/today-engine/internal/html"
+
+// The tokenization algorithm implemented by this package is not a line-by-line
+// transliteration of the relatively verbose state-machine in the WHATWG
+// specification. A more direct approach is used instead, where the program
+// counter implies the state, such as whether it is tokenizing a tag or a text
+// node. Specification compliance is verified by checking expected and actual
+// outputs over a test suite rather than aiming for algorithmic fidelity.
+
+// TODO(nigeltao): Does a DOM API belong in this package or a separate one?
+// TODO(nigeltao): How does parsing interact with a JavaScript engine?
diff --git a/internal/html/doctype.go b/internal/html/doctype.go
new file mode 100644 (file)
index 0000000..c484e5a
--- /dev/null
@@ -0,0 +1,156 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+       "strings"
+)
+
+// parseDoctype parses the data from a DoctypeToken into a name,
+// public identifier, and system identifier. It returns a Node whose Type
+// is DoctypeNode, whose Data is the name, and which has attributes
+// named "system" and "public" for the two identifiers if they were present.
+// quirks is whether the document should be parsed in "quirks mode".
+func parseDoctype(s string) (n *Node, quirks bool) {
+       n = &Node{Type: DoctypeNode}
+
+       // Find the name.
+       space := strings.IndexAny(s, whitespace)
+       if space == -1 {
+               space = len(s)
+       }
+       n.Data = s[:space]
+       // The comparison to "html" is case-sensitive.
+       if n.Data != "html" {
+               quirks = true
+       }
+       n.Data = strings.ToLower(n.Data)
+       s = strings.TrimLeft(s[space:], whitespace)
+
+       if len(s) < 6 {
+               // It can't start with "PUBLIC" or "SYSTEM".
+               // Ignore the rest of the string.
+               return n, quirks || s != ""
+       }
+
+       key := strings.ToLower(s[:6])
+       s = s[6:]
+       for key == "public" || key == "system" {
+               s = strings.TrimLeft(s, whitespace)
+               if s == "" {
+                       break
+               }
+               quote := s[0]
+               if quote != '"' && quote != '\'' {
+                       break
+               }
+               s = s[1:]
+               q := strings.IndexRune(s, rune(quote))
+               var id string
+               if q == -1 {
+                       id = s
+                       s = ""
+               } else {
+                       id = s[:q]
+                       s = s[q+1:]
+               }
+               n.Attr = append(n.Attr, Attribute{Key: key, Val: id})
+               if key == "public" {
+                       key = "system"
+               } else {
+                       key = ""
+               }
+       }
+
+       if key != "" || s != "" {
+               quirks = true
+       } else if len(n.Attr) > 0 {
+               if n.Attr[0].Key == "public" {
+                       public := strings.ToLower(n.Attr[0].Val)
+                       switch public {
+                       case "-//w3o//dtd w3 html strict 3.0//en//", "-/w3d/dtd html 4.0 transitional/en", "html":
+                               quirks = true
+                       default:
+                               for _, q := range quirkyIDs {
+                                       if strings.HasPrefix(public, q) {
+                                               quirks = true
+                                               break
+                                       }
+                               }
+                       }
+                       // The following two public IDs only cause quirks mode if there is no system ID.
+                       if len(n.Attr) == 1 && (strings.HasPrefix(public, "-//w3c//dtd html 4.01 frameset//") ||
+                               strings.HasPrefix(public, "-//w3c//dtd html 4.01 transitional//")) {
+                               quirks = true
+                       }
+               }
+               if lastAttr := n.Attr[len(n.Attr)-1]; lastAttr.Key == "system" &&
+                       strings.ToLower(lastAttr.Val) == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd" {
+                       quirks = true
+               }
+       }
+
+       return n, quirks
+}
+
+// quirkyIDs is a list of public doctype identifiers that cause a document
+// to be interpreted in quirks mode. The identifiers should be in lower case.
+var quirkyIDs = []string{
+       "+//silmaril//dtd html pro v0r11 19970101//",
+       "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
+       "-//as//dtd html 3.0 aswedit + extensions//",
+       "-//ietf//dtd html 2.0 level 1//",
+       "-//ietf//dtd html 2.0 level 2//",
+       "-//ietf//dtd html 2.0 strict level 1//",
+       "-//ietf//dtd html 2.0 strict level 2//",
+       "-//ietf//dtd html 2.0 strict//",
+       "-//ietf//dtd html 2.0//",
+       "-//ietf//dtd html 2.1e//",
+       "-//ietf//dtd html 3.0//",
+       "-//ietf//dtd html 3.2 final//",
+       "-//ietf//dtd html 3.2//",
+       "-//ietf//dtd html 3//",
+       "-//ietf//dtd html level 0//",
+       "-//ietf//dtd html level 1//",
+       "-//ietf//dtd html level 2//",
+       "-//ietf//dtd html level 3//",
+       "-//ietf//dtd html strict level 0//",
+       "-//ietf//dtd html strict level 1//",
+       "-//ietf//dtd html strict level 2//",
+       "-//ietf//dtd html strict level 3//",
+       "-//ietf//dtd html strict//",
+       "-//ietf//dtd html//",
+       "-//metrius//dtd metrius presentational//",
+       "-//microsoft//dtd internet explorer 2.0 html strict//",
+       "-//microsoft//dtd internet explorer 2.0 html//",
+       "-//microsoft//dtd internet explorer 2.0 tables//",
+       "-//microsoft//dtd internet explorer 3.0 html strict//",
+       "-//microsoft//dtd internet explorer 3.0 html//",
+       "-//microsoft//dtd internet explorer 3.0 tables//",
+       "-//netscape comm. corp.//dtd html//",
+       "-//netscape comm. corp.//dtd strict html//",
+       "-//o'reilly and associates//dtd html 2.0//",
+       "-//o'reilly and associates//dtd html extended 1.0//",
+       "-//o'reilly and associates//dtd html extended relaxed 1.0//",
+       "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
+       "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
+       "-//spyglass//dtd html 2.0 extended//",
+       "-//sq//dtd html 2.0 hotmetal + extensions//",
+       "-//sun microsystems corp.//dtd hotjava html//",
+       "-//sun microsystems corp.//dtd hotjava strict html//",
+       "-//w3c//dtd html 3 1995-03-24//",
+       "-//w3c//dtd html 3.2 draft//",
+       "-//w3c//dtd html 3.2 final//",
+       "-//w3c//dtd html 3.2//",
+       "-//w3c//dtd html 3.2s draft//",
+       "-//w3c//dtd html 4.0 frameset//",
+       "-//w3c//dtd html 4.0 transitional//",
+       "-//w3c//dtd html experimental 19960712//",
+       "-//w3c//dtd html experimental 970421//",
+       "-//w3c//dtd w3 html//",
+       "-//w3o//dtd w3 html 3.0//",
+       "-//webtechs//dtd mozilla html 2.0//",
+       "-//webtechs//dtd mozilla html//",
+}
diff --git a/internal/html/entity.go b/internal/html/entity.go
new file mode 100644 (file)
index 0000000..b628880
--- /dev/null
@@ -0,0 +1,2253 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+// All entities that do not end with ';' are 6 or fewer bytes long.
+const longestEntityWithoutSemicolon = 6
+
+// entity is a map from HTML entity names to their values. The semicolon matters:
+// https://html.spec.whatwg.org/multipage/syntax.html#named-character-references
+// lists both "amp" and "amp;" as two separate entries.
+//
+// Note that the HTML5 list is larger than the HTML4 list at
+// http://www.w3.org/TR/html4/sgml/entities.html
+var entity = map[string]rune{
+       "AElig;":                           '\U000000C6',
+       "AMP;":                             '\U00000026',
+       "Aacute;":                          '\U000000C1',
+       "Abreve;":                          '\U00000102',
+       "Acirc;":                           '\U000000C2',
+       "Acy;":                             '\U00000410',
+       "Afr;":                             '\U0001D504',
+       "Agrave;":                          '\U000000C0',
+       "Alpha;":                           '\U00000391',
+       "Amacr;":                           '\U00000100',
+       "And;":                             '\U00002A53',
+       "Aogon;":                           '\U00000104',
+       "Aopf;":                            '\U0001D538',
+       "ApplyFunction;":                   '\U00002061',
+       "Aring;":                           '\U000000C5',
+       "Ascr;":                            '\U0001D49C',
+       "Assign;":                          '\U00002254',
+       "Atilde;":                          '\U000000C3',
+       "Auml;":                            '\U000000C4',
+       "Backslash;":                       '\U00002216',
+       "Barv;":                            '\U00002AE7',
+       "Barwed;":                          '\U00002306',
+       "Bcy;":                             '\U00000411',
+       "Because;":                         '\U00002235',
+       "Bernoullis;":                      '\U0000212C',
+       "Beta;":                            '\U00000392',
+       "Bfr;":                             '\U0001D505',
+       "Bopf;":                            '\U0001D539',
+       "Breve;":                           '\U000002D8',
+       "Bscr;":                            '\U0000212C',
+       "Bumpeq;":                          '\U0000224E',
+       "CHcy;":                            '\U00000427',
+       "COPY;":                            '\U000000A9',
+       "Cacute;":                          '\U00000106',
+       "Cap;":                             '\U000022D2',
+       "CapitalDifferentialD;":            '\U00002145',
+       "Cayleys;":                         '\U0000212D',
+       "Ccaron;":                          '\U0000010C',
+       "Ccedil;":                          '\U000000C7',
+       "Ccirc;":                           '\U00000108',
+       "Cconint;":                         '\U00002230',
+       "Cdot;":                            '\U0000010A',
+       "Cedilla;":                         '\U000000B8',
+       "CenterDot;":                       '\U000000B7',
+       "Cfr;":                             '\U0000212D',
+       "Chi;":                             '\U000003A7',
+       "CircleDot;":                       '\U00002299',
+       "CircleMinus;":                     '\U00002296',
+       "CirclePlus;":                      '\U00002295',
+       "CircleTimes;":                     '\U00002297',
+       "ClockwiseContourIntegral;":        '\U00002232',
+       "CloseCurlyDoubleQuote;":           '\U0000201D',
+       "CloseCurlyQuote;":                 '\U00002019',
+       "Colon;":                           '\U00002237',
+       "Colone;":                          '\U00002A74',
+       "Congruent;":                       '\U00002261',
+       "Conint;":                          '\U0000222F',
+       "ContourIntegral;":                 '\U0000222E',
+       "Copf;":                            '\U00002102',
+       "Coproduct;":                       '\U00002210',
+       "CounterClockwiseContourIntegral;": '\U00002233',
+       "Cross;":                           '\U00002A2F',
+       "Cscr;":                            '\U0001D49E',
+       "Cup;":                             '\U000022D3',
+       "CupCap;":                          '\U0000224D',
+       "DD;":                              '\U00002145',
+       "DDotrahd;":                        '\U00002911',
+       "DJcy;":                            '\U00000402',
+       "DScy;":                            '\U00000405',
+       "DZcy;":                            '\U0000040F',
+       "Dagger;":                          '\U00002021',
+       "Darr;":                            '\U000021A1',
+       "Dashv;":                           '\U00002AE4',
+       "Dcaron;":                          '\U0000010E',
+       "Dcy;":                             '\U00000414',
+       "Del;":                             '\U00002207',
+       "Delta;":                           '\U00000394',
+       "Dfr;":                             '\U0001D507',
+       "DiacriticalAcute;":                '\U000000B4',
+       "DiacriticalDot;":                  '\U000002D9',
+       "DiacriticalDoubleAcute;":          '\U000002DD',
+       "DiacriticalGrave;":                '\U00000060',
+       "DiacriticalTilde;":                '\U000002DC',
+       "Diamond;":                         '\U000022C4',
+       "DifferentialD;":                   '\U00002146',
+       "Dopf;":                            '\U0001D53B',
+       "Dot;":                             '\U000000A8',
+       "DotDot;":                          '\U000020DC',
+       "DotEqual;":                        '\U00002250',
+       "DoubleContourIntegral;":           '\U0000222F',
+       "DoubleDot;":                       '\U000000A8',
+       "DoubleDownArrow;":                 '\U000021D3',
+       "DoubleLeftArrow;":                 '\U000021D0',
+       "DoubleLeftRightArrow;":            '\U000021D4',
+       "DoubleLeftTee;":                   '\U00002AE4',
+       "DoubleLongLeftArrow;":             '\U000027F8',
+       "DoubleLongLeftRightArrow;":        '\U000027FA',
+       "DoubleLongRightArrow;":            '\U000027F9',
+       "DoubleRightArrow;":                '\U000021D2',
+       "DoubleRightTee;":                  '\U000022A8',
+       "DoubleUpArrow;":                   '\U000021D1',
+       "DoubleUpDownArrow;":               '\U000021D5',
+       "DoubleVerticalBar;":               '\U00002225',
+       "DownArrow;":                       '\U00002193',
+       "DownArrowBar;":                    '\U00002913',
+       "DownArrowUpArrow;":                '\U000021F5',
+       "DownBreve;":                       '\U00000311',
+       "DownLeftRightVector;":             '\U00002950',
+       "DownLeftTeeVector;":               '\U0000295E',
+       "DownLeftVector;":                  '\U000021BD',
+       "DownLeftVectorBar;":               '\U00002956',
+       "DownRightTeeVector;":              '\U0000295F',
+       "DownRightVector;":                 '\U000021C1',
+       "DownRightVectorBar;":              '\U00002957',
+       "DownTee;":                         '\U000022A4',
+       "DownTeeArrow;":                    '\U000021A7',
+       "Downarrow;":                       '\U000021D3',
+       "Dscr;":                            '\U0001D49F',
+       "Dstrok;":                          '\U00000110',
+       "ENG;":                             '\U0000014A',
+       "ETH;":                             '\U000000D0',
+       "Eacute;":                          '\U000000C9',
+       "Ecaron;":                          '\U0000011A',
+       "Ecirc;":                           '\U000000CA',
+       "Ecy;":                             '\U0000042D',
+       "Edot;":                            '\U00000116',
+       "Efr;":                             '\U0001D508',
+       "Egrave;":                          '\U000000C8',
+       "Element;":                         '\U00002208',
+       "Emacr;":                           '\U00000112',
+       "EmptySmallSquare;":                '\U000025FB',
+       "EmptyVerySmallSquare;":            '\U000025AB',
+       "Eogon;":                           '\U00000118',
+       "Eopf;":                            '\U0001D53C',
+       "Epsilon;":                         '\U00000395',
+       "Equal;":                           '\U00002A75',
+       "EqualTilde;":                      '\U00002242',
+       "Equilibrium;":                     '\U000021CC',
+       "Escr;":                            '\U00002130',
+       "Esim;":                            '\U00002A73',
+       "Eta;":                             '\U00000397',
+       "Euml;":                            '\U000000CB',
+       "Exists;":                          '\U00002203',
+       "ExponentialE;":                    '\U00002147',
+       "Fcy;":                             '\U00000424',
+       "Ffr;":                             '\U0001D509',
+       "FilledSmallSquare;":               '\U000025FC',
+       "FilledVerySmallSquare;":           '\U000025AA',
+       "Fopf;":                            '\U0001D53D',
+       "ForAll;":                          '\U00002200',
+       "Fouriertrf;":                      '\U00002131',
+       "Fscr;":                            '\U00002131',
+       "GJcy;":                            '\U00000403',
+       "GT;":                              '\U0000003E',
+       "Gamma;":                           '\U00000393',
+       "Gammad;":                          '\U000003DC',
+       "Gbreve;":                          '\U0000011E',
+       "Gcedil;":                          '\U00000122',
+       "Gcirc;":                           '\U0000011C',
+       "Gcy;":                             '\U00000413',
+       "Gdot;":                            '\U00000120',
+       "Gfr;":                             '\U0001D50A',
+       "Gg;":                              '\U000022D9',
+       "Gopf;":                            '\U0001D53E',
+       "GreaterEqual;":                    '\U00002265',
+       "GreaterEqualLess;":                '\U000022DB',
+       "GreaterFullEqual;":                '\U00002267',
+       "GreaterGreater;":                  '\U00002AA2',
+       "GreaterLess;":                     '\U00002277',
+       "GreaterSlantEqual;":               '\U00002A7E',
+       "GreaterTilde;":                    '\U00002273',
+       "Gscr;":                            '\U0001D4A2',
+       "Gt;":                              '\U0000226B',
+       "HARDcy;":                          '\U0000042A',
+       "Hacek;":                           '\U000002C7',
+       "Hat;":                             '\U0000005E',
+       "Hcirc;":                           '\U00000124',
+       "Hfr;":                             '\U0000210C',
+       "HilbertSpace;":                    '\U0000210B',
+       "Hopf;":                            '\U0000210D',
+       "HorizontalLine;":                  '\U00002500',
+       "Hscr;":                            '\U0000210B',
+       "Hstrok;":                          '\U00000126',
+       "HumpDownHump;":                    '\U0000224E',
+       "HumpEqual;":                       '\U0000224F',
+       "IEcy;":                            '\U00000415',
+       "IJlig;":                           '\U00000132',
+       "IOcy;":                            '\U00000401',
+       "Iacute;":                          '\U000000CD',
+       "Icirc;":                           '\U000000CE',
+       "Icy;":                             '\U00000418',
+       "Idot;":                            '\U00000130',
+       "Ifr;":                             '\U00002111',
+       "Igrave;":                          '\U000000CC',
+       "Im;":                              '\U00002111',
+       "Imacr;":                           '\U0000012A',
+       "ImaginaryI;":                      '\U00002148',
+       "Implies;":                         '\U000021D2',
+       "Int;":                             '\U0000222C',
+       "Integral;":                        '\U0000222B',
+       "Intersection;":                    '\U000022C2',
+       "InvisibleComma;":                  '\U00002063',
+       "InvisibleTimes;":                  '\U00002062',
+       "Iogon;":                           '\U0000012E',
+       "Iopf;":                            '\U0001D540',
+       "Iota;":                            '\U00000399',
+       "Iscr;":                            '\U00002110',
+       "Itilde;":                          '\U00000128',
+       "Iukcy;":                           '\U00000406',
+       "Iuml;":                            '\U000000CF',
+       "Jcirc;":                           '\U00000134',
+       "Jcy;":                             '\U00000419',
+       "Jfr;":                             '\U0001D50D',
+       "Jopf;":                            '\U0001D541',
+       "Jscr;":                            '\U0001D4A5',
+       "Jsercy;":                          '\U00000408',
+       "Jukcy;":                           '\U00000404',
+       "KHcy;":                            '\U00000425',
+       "KJcy;":                            '\U0000040C',
+       "Kappa;":                           '\U0000039A',
+       "Kcedil;":                          '\U00000136',
+       "Kcy;":                             '\U0000041A',
+       "Kfr;":                             '\U0001D50E',
+       "Kopf;":                            '\U0001D542',
+       "Kscr;":                            '\U0001D4A6',
+       "LJcy;":                            '\U00000409',
+       "LT;":                              '\U0000003C',
+       "Lacute;":                          '\U00000139',
+       "Lambda;":                          '\U0000039B',
+       "Lang;":                            '\U000027EA',
+       "Laplacetrf;":                      '\U00002112',
+       "Larr;":                            '\U0000219E',
+       "Lcaron;":                          '\U0000013D',
+       "Lcedil;":                          '\U0000013B',
+       "Lcy;":                             '\U0000041B',
+       "LeftAngleBracket;":                '\U000027E8',
+       "LeftArrow;":                       '\U00002190',
+       "LeftArrowBar;":                    '\U000021E4',
+       "LeftArrowRightArrow;":             '\U000021C6',
+       "LeftCeiling;":                     '\U00002308',
+       "LeftDoubleBracket;":               '\U000027E6',
+       "LeftDownTeeVector;":               '\U00002961',
+       "LeftDownVector;":                  '\U000021C3',
+       "LeftDownVectorBar;":               '\U00002959',
+       "LeftFloor;":                       '\U0000230A',
+       "LeftRightArrow;":                  '\U00002194',
+       "LeftRightVector;":                 '\U0000294E',
+       "LeftTee;":                         '\U000022A3',
+       "LeftTeeArrow;":                    '\U000021A4',
+       "LeftTeeVector;":                   '\U0000295A',
+       "LeftTriangle;":                    '\U000022B2',
+       "LeftTriangleBar;":                 '\U000029CF',
+       "LeftTriangleEqual;":               '\U000022B4',
+       "LeftUpDownVector;":                '\U00002951',
+       "LeftUpTeeVector;":                 '\U00002960',
+       "LeftUpVector;":                    '\U000021BF',
+       "LeftUpVectorBar;":                 '\U00002958',
+       "LeftVector;":                      '\U000021BC',
+       "LeftVectorBar;":                   '\U00002952',
+       "Leftarrow;":                       '\U000021D0',
+       "Leftrightarrow;":                  '\U000021D4',
+       "LessEqualGreater;":                '\U000022DA',
+       "LessFullEqual;":                   '\U00002266',
+       "LessGreater;":                     '\U00002276',
+       "LessLess;":                        '\U00002AA1',
+       "LessSlantEqual;":                  '\U00002A7D',
+       "LessTilde;":                       '\U00002272',
+       "Lfr;":                             '\U0001D50F',
+       "Ll;":                              '\U000022D8',
+       "Lleftarrow;":                      '\U000021DA',
+       "Lmidot;":                          '\U0000013F',
+       "LongLeftArrow;":                   '\U000027F5',
+       "LongLeftRightArrow;":              '\U000027F7',
+       "LongRightArrow;":                  '\U000027F6',
+       "Longleftarrow;":                   '\U000027F8',
+       "Longleftrightarrow;":              '\U000027FA',
+       "Longrightarrow;":                  '\U000027F9',
+       "Lopf;":                            '\U0001D543',
+       "LowerLeftArrow;":                  '\U00002199',
+       "LowerRightArrow;":                 '\U00002198',
+       "Lscr;":                            '\U00002112',
+       "Lsh;":                             '\U000021B0',
+       "Lstrok;":                          '\U00000141',
+       "Lt;":                              '\U0000226A',
+       "Map;":                             '\U00002905',
+       "Mcy;":                             '\U0000041C',
+       "MediumSpace;":                     '\U0000205F',
+       "Mellintrf;":                       '\U00002133',
+       "Mfr;":                             '\U0001D510',
+       "MinusPlus;":                       '\U00002213',
+       "Mopf;":                            '\U0001D544',
+       "Mscr;":                            '\U00002133',
+       "Mu;":                              '\U0000039C',
+       "NJcy;":                            '\U0000040A',
+       "Nacute;":                          '\U00000143',
+       "Ncaron;":                          '\U00000147',
+       "Ncedil;":                          '\U00000145',
+       "Ncy;":                             '\U0000041D',
+       "NegativeMediumSpace;":             '\U0000200B',
+       "NegativeThickSpace;":              '\U0000200B',
+       "NegativeThinSpace;":               '\U0000200B',
+       "NegativeVeryThinSpace;":           '\U0000200B',
+       "NestedGreaterGreater;":            '\U0000226B',
+       "NestedLessLess;":                  '\U0000226A',
+       "NewLine;":                         '\U0000000A',
+       "Nfr;":                             '\U0001D511',
+       "NoBreak;":                         '\U00002060',
+       "NonBreakingSpace;":                '\U000000A0',
+       "Nopf;":                            '\U00002115',
+       "Not;":                             '\U00002AEC',
+       "NotCongruent;":                    '\U00002262',
+       "NotCupCap;":                       '\U0000226D',
+       "NotDoubleVerticalBar;":            '\U00002226',
+       "NotElement;":                      '\U00002209',
+       "NotEqual;":                        '\U00002260',
+       "NotExists;":                       '\U00002204',
+       "NotGreater;":                      '\U0000226F',
+       "NotGreaterEqual;":                 '\U00002271',
+       "NotGreaterLess;":                  '\U00002279',
+       "NotGreaterTilde;":                 '\U00002275',
+       "NotLeftTriangle;":                 '\U000022EA',
+       "NotLeftTriangleEqual;":            '\U000022EC',
+       "NotLess;":                         '\U0000226E',
+       "NotLessEqual;":                    '\U00002270',
+       "NotLessGreater;":                  '\U00002278',
+       "NotLessTilde;":                    '\U00002274',
+       "NotPrecedes;":                     '\U00002280',
+       "NotPrecedesSlantEqual;":           '\U000022E0',
+       "NotReverseElement;":               '\U0000220C',
+       "NotRightTriangle;":                '\U000022EB',
+       "NotRightTriangleEqual;":           '\U000022ED',
+       "NotSquareSubsetEqual;":            '\U000022E2',
+       "NotSquareSupersetEqual;":          '\U000022E3',
+       "NotSubsetEqual;":                  '\U00002288',
+       "NotSucceeds;":                     '\U00002281',
+       "NotSucceedsSlantEqual;":           '\U000022E1',
+       "NotSupersetEqual;":                '\U00002289',
+       "NotTilde;":                        '\U00002241',
+       "NotTildeEqual;":                   '\U00002244',
+       "NotTildeFullEqual;":               '\U00002247',
+       "NotTildeTilde;":                   '\U00002249',
+       "NotVerticalBar;":                  '\U00002224',
+       "Nscr;":                            '\U0001D4A9',
+       "Ntilde;":                          '\U000000D1',
+       "Nu;":                              '\U0000039D',
+       "OElig;":                           '\U00000152',
+       "Oacute;":                          '\U000000D3',
+       "Ocirc;":                           '\U000000D4',
+       "Ocy;":                             '\U0000041E',
+       "Odblac;":                          '\U00000150',
+       "Ofr;":                             '\U0001D512',
+       "Ograve;":                          '\U000000D2',
+       "Omacr;":                           '\U0000014C',
+       "Omega;":                           '\U000003A9',
+       "Omicron;":                         '\U0000039F',
+       "Oopf;":                            '\U0001D546',
+       "OpenCurlyDoubleQuote;":            '\U0000201C',
+       "OpenCurlyQuote;":                  '\U00002018',
+       "Or;":                              '\U00002A54',
+       "Oscr;":                            '\U0001D4AA',
+       "Oslash;":                          '\U000000D8',
+       "Otilde;":                          '\U000000D5',
+       "Otimes;":                          '\U00002A37',
+       "Ouml;":                            '\U000000D6',
+       "OverBar;":                         '\U0000203E',
+       "OverBrace;":                       '\U000023DE',
+       "OverBracket;":                     '\U000023B4',
+       "OverParenthesis;":                 '\U000023DC',
+       "PartialD;":                        '\U00002202',
+       "Pcy;":                             '\U0000041F',
+       "Pfr;":                             '\U0001D513',
+       "Phi;":                             '\U000003A6',
+       "Pi;":                              '\U000003A0',
+       "PlusMinus;":                       '\U000000B1',
+       "Poincareplane;":                   '\U0000210C',
+       "Popf;":                            '\U00002119',
+       "Pr;":                              '\U00002ABB',
+       "Precedes;":                        '\U0000227A',
+       "PrecedesEqual;":                   '\U00002AAF',
+       "PrecedesSlantEqual;":              '\U0000227C',
+       "PrecedesTilde;":                   '\U0000227E',
+       "Prime;":                           '\U00002033',
+       "Product;":                         '\U0000220F',
+       "Proportion;":                      '\U00002237',
+       "Proportional;":                    '\U0000221D',
+       "Pscr;":                            '\U0001D4AB',
+       "Psi;":                             '\U000003A8',
+       "QUOT;":                            '\U00000022',
+       "Qfr;":                             '\U0001D514',
+       "Qopf;":                            '\U0000211A',
+       "Qscr;":                            '\U0001D4AC',
+       "RBarr;":                           '\U00002910',
+       "REG;":                             '\U000000AE',
+       "Racute;":                          '\U00000154',
+       "Rang;":                            '\U000027EB',
+       "Rarr;":                            '\U000021A0',
+       "Rarrtl;":                          '\U00002916',
+       "Rcaron;":                          '\U00000158',
+       "Rcedil;":                          '\U00000156',
+       "Rcy;":                             '\U00000420',
+       "Re;":                              '\U0000211C',
+       "ReverseElement;":                  '\U0000220B',
+       "ReverseEquilibrium;":              '\U000021CB',
+       "ReverseUpEquilibrium;":            '\U0000296F',
+       "Rfr;":                             '\U0000211C',
+       "Rho;":                             '\U000003A1',
+       "RightAngleBracket;":               '\U000027E9',
+       "RightArrow;":                      '\U00002192',
+       "RightArrowBar;":                   '\U000021E5',
+       "RightArrowLeftArrow;":             '\U000021C4',
+       "RightCeiling;":                    '\U00002309',
+       "RightDoubleBracket;":              '\U000027E7',
+       "RightDownTeeVector;":              '\U0000295D',
+       "RightDownVector;":                 '\U000021C2',
+       "RightDownVectorBar;":              '\U00002955',
+       "RightFloor;":                      '\U0000230B',
+       "RightTee;":                        '\U000022A2',
+       "RightTeeArrow;":                   '\U000021A6',
+       "RightTeeVector;":                  '\U0000295B',
+       "RightTriangle;":                   '\U000022B3',
+       "RightTriangleBar;":                '\U000029D0',
+       "RightTriangleEqual;":              '\U000022B5',
+       "RightUpDownVector;":               '\U0000294F',
+       "RightUpTeeVector;":                '\U0000295C',
+       "RightUpVector;":                   '\U000021BE',
+       "RightUpVectorBar;":                '\U00002954',
+       "RightVector;":                     '\U000021C0',
+       "RightVectorBar;":                  '\U00002953',
+       "Rightarrow;":                      '\U000021D2',
+       "Ropf;":                            '\U0000211D',
+       "RoundImplies;":                    '\U00002970',
+       "Rrightarrow;":                     '\U000021DB',
+       "Rscr;":                            '\U0000211B',
+       "Rsh;":                             '\U000021B1',
+       "RuleDelayed;":                     '\U000029F4',
+       "SHCHcy;":                          '\U00000429',
+       "SHcy;":                            '\U00000428',
+       "SOFTcy;":                          '\U0000042C',
+       "Sacute;":                          '\U0000015A',
+       "Sc;":                              '\U00002ABC',
+       "Scaron;":                          '\U00000160',
+       "Scedil;":                          '\U0000015E',
+       "Scirc;":                           '\U0000015C',
+       "Scy;":                             '\U00000421',
+       "Sfr;":                             '\U0001D516',
+       "ShortDownArrow;":                  '\U00002193',
+       "ShortLeftArrow;":                  '\U00002190',
+       "ShortRightArrow;":                 '\U00002192',
+       "ShortUpArrow;":                    '\U00002191',
+       "Sigma;":                           '\U000003A3',
+       "SmallCircle;":                     '\U00002218',
+       "Sopf;":                            '\U0001D54A',
+       "Sqrt;":                            '\U0000221A',
+       "Square;":                          '\U000025A1',
+       "SquareIntersection;":              '\U00002293',
+       "SquareSubset;":                    '\U0000228F',
+       "SquareSubsetEqual;":               '\U00002291',
+       "SquareSuperset;":                  '\U00002290',
+       "SquareSupersetEqual;":             '\U00002292',
+       "SquareUnion;":                     '\U00002294',
+       "Sscr;":                            '\U0001D4AE',
+       "Star;":                            '\U000022C6',
+       "Sub;":                             '\U000022D0',
+       "Subset;":                          '\U000022D0',
+       "SubsetEqual;":                     '\U00002286',
+       "Succeeds;":                        '\U0000227B',
+       "SucceedsEqual;":                   '\U00002AB0',
+       "SucceedsSlantEqual;":              '\U0000227D',
+       "SucceedsTilde;":                   '\U0000227F',
+       "SuchThat;":                        '\U0000220B',
+       "Sum;":                             '\U00002211',
+       "Sup;":                             '\U000022D1',
+       "Superset;":                        '\U00002283',
+       "SupersetEqual;":                   '\U00002287',
+       "Supset;":                          '\U000022D1',
+       "THORN;":                           '\U000000DE',
+       "TRADE;":                           '\U00002122',
+       "TSHcy;":                           '\U0000040B',
+       "TScy;":                            '\U00000426',
+       "Tab;":                             '\U00000009',
+       "Tau;":                             '\U000003A4',
+       "Tcaron;":                          '\U00000164',
+       "Tcedil;":                          '\U00000162',
+       "Tcy;":                             '\U00000422',
+       "Tfr;":                             '\U0001D517',
+       "Therefore;":                       '\U00002234',
+       "Theta;":                           '\U00000398',
+       "ThinSpace;":                       '\U00002009',
+       "Tilde;":                           '\U0000223C',
+       "TildeEqual;":                      '\U00002243',
+       "TildeFullEqual;":                  '\U00002245',
+       "TildeTilde;":                      '\U00002248',
+       "Topf;":                            '\U0001D54B',
+       "TripleDot;":                       '\U000020DB',
+       "Tscr;":                            '\U0001D4AF',
+       "Tstrok;":                          '\U00000166',
+       "Uacute;":                          '\U000000DA',
+       "Uarr;":                            '\U0000219F',
+       "Uarrocir;":                        '\U00002949',
+       "Ubrcy;":                           '\U0000040E',
+       "Ubreve;":                          '\U0000016C',
+       "Ucirc;":                           '\U000000DB',
+       "Ucy;":                             '\U00000423',
+       "Udblac;":                          '\U00000170',
+       "Ufr;":                             '\U0001D518',
+       "Ugrave;":                          '\U000000D9',
+       "Umacr;":                           '\U0000016A',
+       "UnderBar;":                        '\U0000005F',
+       "UnderBrace;":                      '\U000023DF',
+       "UnderBracket;":                    '\U000023B5',
+       "UnderParenthesis;":                '\U000023DD',
+       "Union;":                           '\U000022C3',
+       "UnionPlus;":                       '\U0000228E',
+       "Uogon;":                           '\U00000172',
+       "Uopf;":                            '\U0001D54C',
+       "UpArrow;":                         '\U00002191',
+       "UpArrowBar;":                      '\U00002912',
+       "UpArrowDownArrow;":                '\U000021C5',
+       "UpDownArrow;":                     '\U00002195',
+       "UpEquilibrium;":                   '\U0000296E',
+       "UpTee;":                           '\U000022A5',
+       "UpTeeArrow;":                      '\U000021A5',
+       "Uparrow;":                         '\U000021D1',
+       "Updownarrow;":                     '\U000021D5',
+       "UpperLeftArrow;":                  '\U00002196',
+       "UpperRightArrow;":                 '\U00002197',
+       "Upsi;":                            '\U000003D2',
+       "Upsilon;":                         '\U000003A5',
+       "Uring;":                           '\U0000016E',
+       "Uscr;":                            '\U0001D4B0',
+       "Utilde;":                          '\U00000168',
+       "Uuml;":                            '\U000000DC',
+       "VDash;":                           '\U000022AB',
+       "Vbar;":                            '\U00002AEB',
+       "Vcy;":                             '\U00000412',
+       "Vdash;":                           '\U000022A9',
+       "Vdashl;":                          '\U00002AE6',
+       "Vee;":                             '\U000022C1',
+       "Verbar;":                          '\U00002016',
+       "Vert;":                            '\U00002016',
+       "VerticalBar;":                     '\U00002223',
+       "VerticalLine;":                    '\U0000007C',
+       "VerticalSeparator;":               '\U00002758',
+       "VerticalTilde;":                   '\U00002240',
+       "VeryThinSpace;":                   '\U0000200A',
+       "Vfr;":                             '\U0001D519',
+       "Vopf;":                            '\U0001D54D',
+       "Vscr;":                            '\U0001D4B1',
+       "Vvdash;":                          '\U000022AA',
+       "Wcirc;":                           '\U00000174',
+       "Wedge;":                           '\U000022C0',
+       "Wfr;":                             '\U0001D51A',
+       "Wopf;":                            '\U0001D54E',
+       "Wscr;":                            '\U0001D4B2',
+       "Xfr;":                             '\U0001D51B',
+       "Xi;":                              '\U0000039E',
+       "Xopf;":                            '\U0001D54F',
+       "Xscr;":                            '\U0001D4B3',
+       "YAcy;":                            '\U0000042F',
+       "YIcy;":                            '\U00000407',
+       "YUcy;":                            '\U0000042E',
+       "Yacute;":                          '\U000000DD',
+       "Ycirc;":                           '\U00000176',
+       "Ycy;":                             '\U0000042B',
+       "Yfr;":                             '\U0001D51C',
+       "Yopf;":                            '\U0001D550',
+       "Yscr;":                            '\U0001D4B4',
+       "Yuml;":                            '\U00000178',
+       "ZHcy;":                            '\U00000416',
+       "Zacute;":                          '\U00000179',
+       "Zcaron;":                          '\U0000017D',
+       "Zcy;":                             '\U00000417',
+       "Zdot;":                            '\U0000017B',
+       "ZeroWidthSpace;":                  '\U0000200B',
+       "Zeta;":                            '\U00000396',
+       "Zfr;":                             '\U00002128',
+       "Zopf;":                            '\U00002124',
+       "Zscr;":                            '\U0001D4B5',
+       "aacute;":                          '\U000000E1',
+       "abreve;":                          '\U00000103',
+       "ac;":                              '\U0000223E',
+       "acd;":                             '\U0000223F',
+       "acirc;":                           '\U000000E2',
+       "acute;":                           '\U000000B4',
+       "acy;":                             '\U00000430',
+       "aelig;":                           '\U000000E6',
+       "af;":                              '\U00002061',
+       "afr;":                             '\U0001D51E',
+       "agrave;":                          '\U000000E0',
+       "alefsym;":                         '\U00002135',
+       "aleph;":                           '\U00002135',
+       "alpha;":                           '\U000003B1',
+       "amacr;":                           '\U00000101',
+       "amalg;":                           '\U00002A3F',
+       "amp;":                             '\U00000026',
+       "and;":                             '\U00002227',
+       "andand;":                          '\U00002A55',
+       "andd;":                            '\U00002A5C',
+       "andslope;":                        '\U00002A58',
+       "andv;":                            '\U00002A5A',
+       "ang;":                             '\U00002220',
+       "ange;":                            '\U000029A4',
+       "angle;":                           '\U00002220',
+       "angmsd;":                          '\U00002221',
+       "angmsdaa;":                        '\U000029A8',
+       "angmsdab;":                        '\U000029A9',
+       "angmsdac;":                        '\U000029AA',
+       "angmsdad;":                        '\U000029AB',
+       "angmsdae;":                        '\U000029AC',
+       "angmsdaf;":                        '\U000029AD',
+       "angmsdag;":                        '\U000029AE',
+       "angmsdah;":                        '\U000029AF',
+       "angrt;":                           '\U0000221F',
+       "angrtvb;":                         '\U000022BE',
+       "angrtvbd;":                        '\U0000299D',
+       "angsph;":                          '\U00002222',
+       "angst;":                           '\U000000C5',
+       "angzarr;":                         '\U0000237C',
+       "aogon;":                           '\U00000105',
+       "aopf;":                            '\U0001D552',
+       "ap;":                              '\U00002248',
+       "apE;":                             '\U00002A70',
+       "apacir;":                          '\U00002A6F',
+       "ape;":                             '\U0000224A',
+       "apid;":                            '\U0000224B',
+       "apos;":                            '\U00000027',
+       "approx;":                          '\U00002248',
+       "approxeq;":                        '\U0000224A',
+       "aring;":                           '\U000000E5',
+       "ascr;":                            '\U0001D4B6',
+       "ast;":                             '\U0000002A',
+       "asymp;":                           '\U00002248',
+       "asympeq;":                         '\U0000224D',
+       "atilde;":                          '\U000000E3',
+       "auml;":                            '\U000000E4',
+       "awconint;":                        '\U00002233',
+       "awint;":                           '\U00002A11',
+       "bNot;":                            '\U00002AED',
+       "backcong;":                        '\U0000224C',
+       "backepsilon;":                     '\U000003F6',
+       "backprime;":                       '\U00002035',
+       "backsim;":                         '\U0000223D',
+       "backsimeq;":                       '\U000022CD',
+       "barvee;":                          '\U000022BD',
+       "barwed;":                          '\U00002305',
+       "barwedge;":                        '\U00002305',
+       "bbrk;":                            '\U000023B5',
+       "bbrktbrk;":                        '\U000023B6',
+       "bcong;":                           '\U0000224C',
+       "bcy;":                             '\U00000431',
+       "bdquo;":                           '\U0000201E',
+       "becaus;":                          '\U00002235',
+       "because;":                         '\U00002235',
+       "bemptyv;":                         '\U000029B0',
+       "bepsi;":                           '\U000003F6',
+       "bernou;":                          '\U0000212C',
+       "beta;":                            '\U000003B2',
+       "beth;":                            '\U00002136',
+       "between;":                         '\U0000226C',
+       "bfr;":                             '\U0001D51F',
+       "bigcap;":                          '\U000022C2',
+       "bigcirc;":                         '\U000025EF',
+       "bigcup;":                          '\U000022C3',
+       "bigodot;":                         '\U00002A00',
+       "bigoplus;":                        '\U00002A01',
+       "bigotimes;":                       '\U00002A02',
+       "bigsqcup;":                        '\U00002A06',
+       "bigstar;":                         '\U00002605',
+       "bigtriangledown;":                 '\U000025BD',
+       "bigtriangleup;":                   '\U000025B3',
+       "biguplus;":                        '\U00002A04',
+       "bigvee;":                          '\U000022C1',
+       "bigwedge;":                        '\U000022C0',
+       "bkarow;":                          '\U0000290D',
+       "blacklozenge;":                    '\U000029EB',
+       "blacksquare;":                     '\U000025AA',
+       "blacktriangle;":                   '\U000025B4',
+       "blacktriangledown;":               '\U000025BE',
+       "blacktriangleleft;":               '\U000025C2',
+       "blacktriangleright;":              '\U000025B8',
+       "blank;":                           '\U00002423',
+       "blk12;":                           '\U00002592',
+       "blk14;":                           '\U00002591',
+       "blk34;":                           '\U00002593',
+       "block;":                           '\U00002588',
+       "bnot;":                            '\U00002310',
+       "bopf;":                            '\U0001D553',
+       "bot;":                             '\U000022A5',
+       "bottom;":                          '\U000022A5',
+       "bowtie;":                          '\U000022C8',
+       "boxDL;":                           '\U00002557',
+       "boxDR;":                           '\U00002554',
+       "boxDl;":                           '\U00002556',
+       "boxDr;":                           '\U00002553',
+       "boxH;":                            '\U00002550',
+       "boxHD;":                           '\U00002566',
+       "boxHU;":                           '\U00002569',
+       "boxHd;":                           '\U00002564',
+       "boxHu;":                           '\U00002567',
+       "boxUL;":                           '\U0000255D',
+       "boxUR;":                           '\U0000255A',
+       "boxUl;":                           '\U0000255C',
+       "boxUr;":                           '\U00002559',
+       "boxV;":                            '\U00002551',
+       "boxVH;":                           '\U0000256C',
+       "boxVL;":                           '\U00002563',
+       "boxVR;":                           '\U00002560',
+       "boxVh;":                           '\U0000256B',
+       "boxVl;":                           '\U00002562',
+       "boxVr;":                           '\U0000255F',
+       "boxbox;":                          '\U000029C9',
+       "boxdL;":                           '\U00002555',
+       "boxdR;":                           '\U00002552',
+       "boxdl;":                           '\U00002510',
+       "boxdr;":                           '\U0000250C',
+       "boxh;":                            '\U00002500',
+       "boxhD;":                           '\U00002565',
+       "boxhU;":                           '\U00002568',
+       "boxhd;":                           '\U0000252C',
+       "boxhu;":                           '\U00002534',
+       "boxminus;":                        '\U0000229F',
+       "boxplus;":                         '\U0000229E',
+       "boxtimes;":                        '\U000022A0',
+       "boxuL;":                           '\U0000255B',
+       "boxuR;":                           '\U00002558',
+       "boxul;":                           '\U00002518',
+       "boxur;":                           '\U00002514',
+       "boxv;":                            '\U00002502',
+       "boxvH;":                           '\U0000256A',
+       "boxvL;":                           '\U00002561',
+       "boxvR;":                           '\U0000255E',
+       "boxvh;":                           '\U0000253C',
+       "boxvl;":                           '\U00002524',
+       "boxvr;":                           '\U0000251C',
+       "bprime;":                          '\U00002035',
+       "breve;":                           '\U000002D8',
+       "brvbar;":                          '\U000000A6',
+       "bscr;":                            '\U0001D4B7',
+       "bsemi;":                           '\U0000204F',
+       "bsim;":                            '\U0000223D',
+       "bsime;":                           '\U000022CD',
+       "bsol;":                            '\U0000005C',
+       "bsolb;":                           '\U000029C5',
+       "bsolhsub;":                        '\U000027C8',
+       "bull;":                            '\U00002022',
+       "bullet;":                          '\U00002022',
+       "bump;":                            '\U0000224E',
+       "bumpE;":                           '\U00002AAE',
+       "bumpe;":                           '\U0000224F',
+       "bumpeq;":                          '\U0000224F',
+       "cacute;":                          '\U00000107',
+       "cap;":                             '\U00002229',
+       "capand;":                          '\U00002A44',
+       "capbrcup;":                        '\U00002A49',
+       "capcap;":                          '\U00002A4B',
+       "capcup;":                          '\U00002A47',
+       "capdot;":                          '\U00002A40',
+       "caret;":                           '\U00002041',
+       "caron;":                           '\U000002C7',
+       "ccaps;":                           '\U00002A4D',
+       "ccaron;":                          '\U0000010D',
+       "ccedil;":                          '\U000000E7',
+       "ccirc;":                           '\U00000109',
+       "ccups;":                           '\U00002A4C',
+       "ccupssm;":                         '\U00002A50',
+       "cdot;":                            '\U0000010B',
+       "cedil;":                           '\U000000B8',
+       "cemptyv;":                         '\U000029B2',
+       "cent;":                            '\U000000A2',
+       "centerdot;":                       '\U000000B7',
+       "cfr;":                             '\U0001D520',
+       "chcy;":                            '\U00000447',
+       "check;":                           '\U00002713',
+       "checkmark;":                       '\U00002713',
+       "chi;":                             '\U000003C7',
+       "cir;":                             '\U000025CB',
+       "cirE;":                            '\U000029C3',
+       "circ;":                            '\U000002C6',
+       "circeq;":                          '\U00002257',
+       "circlearrowleft;":                 '\U000021BA',
+       "circlearrowright;":                '\U000021BB',
+       "circledR;":                        '\U000000AE',
+       "circledS;":                        '\U000024C8',
+       "circledast;":                      '\U0000229B',
+       "circledcirc;":                     '\U0000229A',
+       "circleddash;":                     '\U0000229D',
+       "cire;":                            '\U00002257',
+       "cirfnint;":                        '\U00002A10',
+       "cirmid;":                          '\U00002AEF',
+       "cirscir;":                         '\U000029C2',
+       "clubs;":                           '\U00002663',
+       "clubsuit;":                        '\U00002663',
+       "colon;":                           '\U0000003A',
+       "colone;":                          '\U00002254',
+       "coloneq;":                         '\U00002254',
+       "comma;":                           '\U0000002C',
+       "commat;":                          '\U00000040',
+       "comp;":                            '\U00002201',
+       "compfn;":                          '\U00002218',
+       "complement;":                      '\U00002201',
+       "complexes;":                       '\U00002102',
+       "cong;":                            '\U00002245',
+       "congdot;":                         '\U00002A6D',
+       "conint;":                          '\U0000222E',
+       "copf;":                            '\U0001D554',
+       "coprod;":                          '\U00002210',
+       "copy;":                            '\U000000A9',
+       "copysr;":                          '\U00002117',
+       "crarr;":                           '\U000021B5',
+       "cross;":                           '\U00002717',
+       "cscr;":                            '\U0001D4B8',
+       "csub;":                            '\U00002ACF',
+       "csube;":                           '\U00002AD1',
+       "csup;":                            '\U00002AD0',
+       "csupe;":                           '\U00002AD2',
+       "ctdot;":                           '\U000022EF',
+       "cudarrl;":                         '\U00002938',
+       "cudarrr;":                         '\U00002935',
+       "cuepr;":                           '\U000022DE',
+       "cuesc;":                           '\U000022DF',
+       "cularr;":                          '\U000021B6',
+       "cularrp;":                         '\U0000293D',
+       "cup;":                             '\U0000222A',
+       "cupbrcap;":                        '\U00002A48',
+       "cupcap;":                          '\U00002A46',
+       "cupcup;":                          '\U00002A4A',
+       "cupdot;":                          '\U0000228D',
+       "cupor;":                           '\U00002A45',
+       "curarr;":                          '\U000021B7',
+       "curarrm;":                         '\U0000293C',
+       "curlyeqprec;":                     '\U000022DE',
+       "curlyeqsucc;":                     '\U000022DF',
+       "curlyvee;":                        '\U000022CE',
+       "curlywedge;":                      '\U000022CF',
+       "curren;":                          '\U000000A4',
+       "curvearrowleft;":                  '\U000021B6',
+       "curvearrowright;":                 '\U000021B7',
+       "cuvee;":                           '\U000022CE',
+       "cuwed;":                           '\U000022CF',
+       "cwconint;":                        '\U00002232',
+       "cwint;":                           '\U00002231',
+       "cylcty;":                          '\U0000232D',
+       "dArr;":                            '\U000021D3',
+       "dHar;":                            '\U00002965',
+       "dagger;":                          '\U00002020',
+       "daleth;":                          '\U00002138',
+       "darr;":                            '\U00002193',
+       "dash;":                            '\U00002010',
+       "dashv;":                           '\U000022A3',
+       "dbkarow;":                         '\U0000290F',
+       "dblac;":                           '\U000002DD',
+       "dcaron;":                          '\U0000010F',
+       "dcy;":                             '\U00000434',
+       "dd;":                              '\U00002146',
+       "ddagger;":                         '\U00002021',
+       "ddarr;":                           '\U000021CA',
+       "ddotseq;":                         '\U00002A77',
+       "deg;":                             '\U000000B0',
+       "delta;":                           '\U000003B4',
+       "demptyv;":                         '\U000029B1',
+       "dfisht;":                          '\U0000297F',
+       "dfr;":                             '\U0001D521',
+       "dharl;":                           '\U000021C3',
+       "dharr;":                           '\U000021C2',
+       "diam;":                            '\U000022C4',
+       "diamond;":                         '\U000022C4',
+       "diamondsuit;":                     '\U00002666',
+       "diams;":                           '\U00002666',
+       "die;":                             '\U000000A8',
+       "digamma;":                         '\U000003DD',
+       "disin;":                           '\U000022F2',
+       "div;":                             '\U000000F7',
+       "divide;":                          '\U000000F7',
+       "divideontimes;":                   '\U000022C7',
+       "divonx;":                          '\U000022C7',
+       "djcy;":                            '\U00000452',
+       "dlcorn;":                          '\U0000231E',
+       "dlcrop;":                          '\U0000230D',
+       "dollar;":                          '\U00000024',
+       "dopf;":                            '\U0001D555',
+       "dot;":                             '\U000002D9',
+       "doteq;":                           '\U00002250',
+       "doteqdot;":                        '\U00002251',
+       "dotminus;":                        '\U00002238',
+       "dotplus;":                         '\U00002214',
+       "dotsquare;":                       '\U000022A1',
+       "doublebarwedge;":                  '\U00002306',
+       "downarrow;":                       '\U00002193',
+       "downdownarrows;":                  '\U000021CA',
+       "downharpoonleft;":                 '\U000021C3',
+       "downharpoonright;":                '\U000021C2',
+       "drbkarow;":                        '\U00002910',
+       "drcorn;":                          '\U0000231F',
+       "drcrop;":                          '\U0000230C',
+       "dscr;":                            '\U0001D4B9',
+       "dscy;":                            '\U00000455',
+       "dsol;":                            '\U000029F6',
+       "dstrok;":                          '\U00000111',
+       "dtdot;":                           '\U000022F1',
+       "dtri;":                            '\U000025BF',
+       "dtrif;":                           '\U000025BE',
+       "duarr;":                           '\U000021F5',
+       "duhar;":                           '\U0000296F',
+       "dwangle;":                         '\U000029A6',
+       "dzcy;":                            '\U0000045F',
+       "dzigrarr;":                        '\U000027FF',
+       "eDDot;":                           '\U00002A77',
+       "eDot;":                            '\U00002251',
+       "eacute;":                          '\U000000E9',
+       "easter;":                          '\U00002A6E',
+       "ecaron;":                          '\U0000011B',
+       "ecir;":                            '\U00002256',
+       "ecirc;":                           '\U000000EA',
+       "ecolon;":                          '\U00002255',
+       "ecy;":                             '\U0000044D',
+       "edot;":                            '\U00000117',
+       "ee;":                              '\U00002147',
+       "efDot;":                           '\U00002252',
+       "efr;":                             '\U0001D522',
+       "eg;":                              '\U00002A9A',
+       "egrave;":                          '\U000000E8',
+       "egs;":                             '\U00002A96',
+       "egsdot;":                          '\U00002A98',
+       "el;":                              '\U00002A99',
+       "elinters;":                        '\U000023E7',
+       "ell;":                             '\U00002113',
+       "els;":                             '\U00002A95',
+       "elsdot;":                          '\U00002A97',
+       "emacr;":                           '\U00000113',
+       "empty;":                           '\U00002205',
+       "emptyset;":                        '\U00002205',
+       "emptyv;":                          '\U00002205',
+       "emsp;":                            '\U00002003',
+       "emsp13;":                          '\U00002004',
+       "emsp14;":                          '\U00002005',
+       "eng;":                             '\U0000014B',
+       "ensp;":                            '\U00002002',
+       "eogon;":                           '\U00000119',
+       "eopf;":                            '\U0001D556',
+       "epar;":                            '\U000022D5',
+       "eparsl;":                          '\U000029E3',
+       "eplus;":                           '\U00002A71',
+       "epsi;":                            '\U000003B5',
+       "epsilon;":                         '\U000003B5',
+       "epsiv;":                           '\U000003F5',
+       "eqcirc;":                          '\U00002256',
+       "eqcolon;":                         '\U00002255',
+       "eqsim;":                           '\U00002242',
+       "eqslantgtr;":                      '\U00002A96',
+       "eqslantless;":                     '\U00002A95',
+       "equals;":                          '\U0000003D',
+       "equest;":                          '\U0000225F',
+       "equiv;":                           '\U00002261',
+       "equivDD;":                         '\U00002A78',
+       "eqvparsl;":                        '\U000029E5',
+       "erDot;":                           '\U00002253',
+       "erarr;":                           '\U00002971',
+       "escr;":                            '\U0000212F',
+       "esdot;":                           '\U00002250',
+       "esim;":                            '\U00002242',
+       "eta;":                             '\U000003B7',
+       "eth;":                             '\U000000F0',
+       "euml;":                            '\U000000EB',
+       "euro;":                            '\U000020AC',
+       "excl;":                            '\U00000021',
+       "exist;":                           '\U00002203',
+       "expectation;":                     '\U00002130',
+       "exponentiale;":                    '\U00002147',
+       "fallingdotseq;":                   '\U00002252',
+       "fcy;":                             '\U00000444',
+       "female;":                          '\U00002640',
+       "ffilig;":                          '\U0000FB03',
+       "fflig;":                           '\U0000FB00',
+       "ffllig;":                          '\U0000FB04',
+       "ffr;":                             '\U0001D523',
+       "filig;":                           '\U0000FB01',
+       "flat;":                            '\U0000266D',
+       "fllig;":                           '\U0000FB02',
+       "fltns;":                           '\U000025B1',
+       "fnof;":                            '\U00000192',
+       "fopf;":                            '\U0001D557',
+       "forall;":                          '\U00002200',
+       "fork;":                            '\U000022D4',
+       "forkv;":                           '\U00002AD9',
+       "fpartint;":                        '\U00002A0D',
+       "frac12;":                          '\U000000BD',
+       "frac13;":                          '\U00002153',
+       "frac14;":                          '\U000000BC',
+       "frac15;":                          '\U00002155',
+       "frac16;":                          '\U00002159',
+       "frac18;":                          '\U0000215B',
+       "frac23;":                          '\U00002154',
+       "frac25;":                          '\U00002156',
+       "frac34;":                          '\U000000BE',
+       "frac35;":                          '\U00002157',
+       "frac38;":                          '\U0000215C',
+       "frac45;":                          '\U00002158',
+       "frac56;":                          '\U0000215A',
+       "frac58;":                          '\U0000215D',
+       "frac78;":                          '\U0000215E',
+       "frasl;":                           '\U00002044',
+       "frown;":                           '\U00002322',
+       "fscr;":                            '\U0001D4BB',
+       "gE;":                              '\U00002267',
+       "gEl;":                             '\U00002A8C',
+       "gacute;":                          '\U000001F5',
+       "gamma;":                           '\U000003B3',
+       "gammad;":                          '\U000003DD',
+       "gap;":                             '\U00002A86',
+       "gbreve;":                          '\U0000011F',
+       "gcirc;":                           '\U0000011D',
+       "gcy;":                             '\U00000433',
+       "gdot;":                            '\U00000121',
+       "ge;":                              '\U00002265',
+       "gel;":                             '\U000022DB',
+       "geq;":                             '\U00002265',
+       "geqq;":                            '\U00002267',
+       "geqslant;":                        '\U00002A7E',
+       "ges;":                             '\U00002A7E',
+       "gescc;":                           '\U00002AA9',
+       "gesdot;":                          '\U00002A80',
+       "gesdoto;":                         '\U00002A82',
+       "gesdotol;":                        '\U00002A84',
+       "gesles;":                          '\U00002A94',
+       "gfr;":                             '\U0001D524',
+       "gg;":                              '\U0000226B',
+       "ggg;":                             '\U000022D9',
+       "gimel;":                           '\U00002137',
+       "gjcy;":                            '\U00000453',
+       "gl;":                              '\U00002277',
+       "glE;":                             '\U00002A92',
+       "gla;":                             '\U00002AA5',
+       "glj;":                             '\U00002AA4',
+       "gnE;":                             '\U00002269',
+       "gnap;":                            '\U00002A8A',
+       "gnapprox;":                        '\U00002A8A',
+       "gne;":                             '\U00002A88',
+       "gneq;":                            '\U00002A88',
+       "gneqq;":                           '\U00002269',
+       "gnsim;":                           '\U000022E7',
+       "gopf;":                            '\U0001D558',
+       "grave;":                           '\U00000060',
+       "gscr;":                            '\U0000210A',
+       "gsim;":                            '\U00002273',
+       "gsime;":                           '\U00002A8E',
+       "gsiml;":                           '\U00002A90',
+       "gt;":                              '\U0000003E',
+       "gtcc;":                            '\U00002AA7',
+       "gtcir;":                           '\U00002A7A',
+       "gtdot;":                           '\U000022D7',
+       "gtlPar;":                          '\U00002995',
+       "gtquest;":                         '\U00002A7C',
+       "gtrapprox;":                       '\U00002A86',
+       "gtrarr;":                          '\U00002978',
+       "gtrdot;":                          '\U000022D7',
+       "gtreqless;":                       '\U000022DB',
+       "gtreqqless;":                      '\U00002A8C',
+       "gtrless;":                         '\U00002277',
+       "gtrsim;":                          '\U00002273',
+       "hArr;":                            '\U000021D4',
+       "hairsp;":                          '\U0000200A',
+       "half;":                            '\U000000BD',
+       "hamilt;":                          '\U0000210B',
+       "hardcy;":                          '\U0000044A',
+       "harr;":                            '\U00002194',
+       "harrcir;":                         '\U00002948',
+       "harrw;":                           '\U000021AD',
+       "hbar;":                            '\U0000210F',
+       "hcirc;":                           '\U00000125',
+       "hearts;":                          '\U00002665',
+       "heartsuit;":                       '\U00002665',
+       "hellip;":                          '\U00002026',
+       "hercon;":                          '\U000022B9',
+       "hfr;":                             '\U0001D525',
+       "hksearow;":                        '\U00002925',
+       "hkswarow;":                        '\U00002926',
+       "hoarr;":                           '\U000021FF',
+       "homtht;":                          '\U0000223B',
+       "hookleftarrow;":                   '\U000021A9',
+       "hookrightarrow;":                  '\U000021AA',
+       "hopf;":                            '\U0001D559',
+       "horbar;":                          '\U00002015',
+       "hscr;":                            '\U0001D4BD',
+       "hslash;":                          '\U0000210F',
+       "hstrok;":                          '\U00000127',
+       "hybull;":                          '\U00002043',
+       "hyphen;":                          '\U00002010',
+       "iacute;":                          '\U000000ED',
+       "ic;":                              '\U00002063',
+       "icirc;":                           '\U000000EE',
+       "icy;":                             '\U00000438',
+       "iecy;":                            '\U00000435',
+       "iexcl;":                           '\U000000A1',
+       "iff;":                             '\U000021D4',
+       "ifr;":                             '\U0001D526',
+       "igrave;":                          '\U000000EC',
+       "ii;":                              '\U00002148',
+       "iiiint;":                          '\U00002A0C',
+       "iiint;":                           '\U0000222D',
+       "iinfin;":                          '\U000029DC',
+       "iiota;":                           '\U00002129',
+       "ijlig;":                           '\U00000133',
+       "imacr;":                           '\U0000012B',
+       "image;":                           '\U00002111',
+       "imagline;":                        '\U00002110',
+       "imagpart;":                        '\U00002111',
+       "imath;":                           '\U00000131',
+       "imof;":                            '\U000022B7',
+       "imped;":                           '\U000001B5',
+       "in;":                              '\U00002208',
+       "incare;":                          '\U00002105',
+       "infin;":                           '\U0000221E',
+       "infintie;":                        '\U000029DD',
+       "inodot;":                          '\U00000131',
+       "int;":                             '\U0000222B',
+       "intcal;":                          '\U000022BA',
+       "integers;":                        '\U00002124',
+       "intercal;":                        '\U000022BA',
+       "intlarhk;":                        '\U00002A17',
+       "intprod;":                         '\U00002A3C',
+       "iocy;":                            '\U00000451',
+       "iogon;":                           '\U0000012F',
+       "iopf;":                            '\U0001D55A',
+       "iota;":                            '\U000003B9',
+       "iprod;":                           '\U00002A3C',
+       "iquest;":                          '\U000000BF',
+       "iscr;":                            '\U0001D4BE',
+       "isin;":                            '\U00002208',
+       "isinE;":                           '\U000022F9',
+       "isindot;":                         '\U000022F5',
+       "isins;":                           '\U000022F4',
+       "isinsv;":                          '\U000022F3',
+       "isinv;":                           '\U00002208',
+       "it;":                              '\U00002062',
+       "itilde;":                          '\U00000129',
+       "iukcy;":                           '\U00000456',
+       "iuml;":                            '\U000000EF',
+       "jcirc;":                           '\U00000135',
+       "jcy;":                             '\U00000439',
+       "jfr;":                             '\U0001D527',
+       "jmath;":                           '\U00000237',
+       "jopf;":                            '\U0001D55B',
+       "jscr;":                            '\U0001D4BF',
+       "jsercy;":                          '\U00000458',
+       "jukcy;":                           '\U00000454',
+       "kappa;":                           '\U000003BA',
+       "kappav;":                          '\U000003F0',
+       "kcedil;":                          '\U00000137',
+       "kcy;":                             '\U0000043A',
+       "kfr;":                             '\U0001D528',
+       "kgreen;":                          '\U00000138',
+       "khcy;":                            '\U00000445',
+       "kjcy;":                            '\U0000045C',
+       "kopf;":                            '\U0001D55C',
+       "kscr;":                            '\U0001D4C0',
+       "lAarr;":                           '\U000021DA',
+       "lArr;":                            '\U000021D0',
+       "lAtail;":                          '\U0000291B',
+       "lBarr;":                           '\U0000290E',
+       "lE;":                              '\U00002266',
+       "lEg;":                             '\U00002A8B',
+       "lHar;":                            '\U00002962',
+       "lacute;":                          '\U0000013A',
+       "laemptyv;":                        '\U000029B4',
+       "lagran;":                          '\U00002112',
+       "lambda;":                          '\U000003BB',
+       "lang;":                            '\U000027E8',
+       "langd;":                           '\U00002991',
+       "langle;":                          '\U000027E8',
+       "lap;":                             '\U00002A85',
+       "laquo;":                           '\U000000AB',
+       "larr;":                            '\U00002190',
+       "larrb;":                           '\U000021E4',
+       "larrbfs;":                         '\U0000291F',
+       "larrfs;":                          '\U0000291D',
+       "larrhk;":                          '\U000021A9',
+       "larrlp;":                          '\U000021AB',
+       "larrpl;":                          '\U00002939',
+       "larrsim;":                         '\U00002973',
+       "larrtl;":                          '\U000021A2',
+       "lat;":                             '\U00002AAB',
+       "latail;":                          '\U00002919',
+       "late;":                            '\U00002AAD',
+       "lbarr;":                           '\U0000290C',
+       "lbbrk;":                           '\U00002772',
+       "lbrace;":                          '\U0000007B',
+       "lbrack;":                          '\U0000005B',
+       "lbrke;":                           '\U0000298B',
+       "lbrksld;":                         '\U0000298F',
+       "lbrkslu;":                         '\U0000298D',
+       "lcaron;":                          '\U0000013E',
+       "lcedil;":                          '\U0000013C',
+       "lceil;":                           '\U00002308',
+       "lcub;":                            '\U0000007B',
+       "lcy;":                             '\U0000043B',
+       "ldca;":                            '\U00002936',
+       "ldquo;":                           '\U0000201C',
+       "ldquor;":                          '\U0000201E',
+       "ldrdhar;":                         '\U00002967',
+       "ldrushar;":                        '\U0000294B',
+       "ldsh;":                            '\U000021B2',
+       "le;":                              '\U00002264',
+       "leftarrow;":                       '\U00002190',
+       "leftarrowtail;":                   '\U000021A2',
+       "leftharpoondown;":                 '\U000021BD',
+       "leftharpoonup;":                   '\U000021BC',
+       "leftleftarrows;":                  '\U000021C7',
+       "leftrightarrow;":                  '\U00002194',
+       "leftrightarrows;":                 '\U000021C6',
+       "leftrightharpoons;":               '\U000021CB',
+       "leftrightsquigarrow;":             '\U000021AD',
+       "leftthreetimes;":                  '\U000022CB',
+       "leg;":                             '\U000022DA',
+       "leq;":                             '\U00002264',
+       "leqq;":                            '\U00002266',
+       "leqslant;":                        '\U00002A7D',
+       "les;":                             '\U00002A7D',
+       "lescc;":                           '\U00002AA8',
+       "lesdot;":                          '\U00002A7F',
+       "lesdoto;":                         '\U00002A81',
+       "lesdotor;":                        '\U00002A83',
+       "lesges;":                          '\U00002A93',
+       "lessapprox;":                      '\U00002A85',
+       "lessdot;":                         '\U000022D6',
+       "lesseqgtr;":                       '\U000022DA',
+       "lesseqqgtr;":                      '\U00002A8B',
+       "lessgtr;":                         '\U00002276',
+       "lesssim;":                         '\U00002272',
+       "lfisht;":                          '\U0000297C',
+       "lfloor;":                          '\U0000230A',
+       "lfr;":                             '\U0001D529',
+       "lg;":                              '\U00002276',
+       "lgE;":                             '\U00002A91',
+       "lhard;":                           '\U000021BD',
+       "lharu;":                           '\U000021BC',
+       "lharul;":                          '\U0000296A',
+       "lhblk;":                           '\U00002584',
+       "ljcy;":                            '\U00000459',
+       "ll;":                              '\U0000226A',
+       "llarr;":                           '\U000021C7',
+       "llcorner;":                        '\U0000231E',
+       "llhard;":                          '\U0000296B',
+       "lltri;":                           '\U000025FA',
+       "lmidot;":                          '\U00000140',
+       "lmoust;":                          '\U000023B0',
+       "lmoustache;":                      '\U000023B0',
+       "lnE;":                             '\U00002268',
+       "lnap;":                            '\U00002A89',
+       "lnapprox;":                        '\U00002A89',
+       "lne;":                             '\U00002A87',
+       "lneq;":                            '\U00002A87',
+       "lneqq;":                           '\U00002268',
+       "lnsim;":                           '\U000022E6',
+       "loang;":                           '\U000027EC',
+       "loarr;":                           '\U000021FD',
+       "lobrk;":                           '\U000027E6',
+       "longleftarrow;":                   '\U000027F5',
+       "longleftrightarrow;":              '\U000027F7',
+       "longmapsto;":                      '\U000027FC',
+       "longrightarrow;":                  '\U000027F6',
+       "looparrowleft;":                   '\U000021AB',
+       "looparrowright;":                  '\U000021AC',
+       "lopar;":                           '\U00002985',
+       "lopf;":                            '\U0001D55D',
+       "loplus;":                          '\U00002A2D',
+       "lotimes;":                         '\U00002A34',
+       "lowast;":                          '\U00002217',
+       "lowbar;":                          '\U0000005F',
+       "loz;":                             '\U000025CA',
+       "lozenge;":                         '\U000025CA',
+       "lozf;":                            '\U000029EB',
+       "lpar;":                            '\U00000028',
+       "lparlt;":                          '\U00002993',
+       "lrarr;":                           '\U000021C6',
+       "lrcorner;":                        '\U0000231F',
+       "lrhar;":                           '\U000021CB',
+       "lrhard;":                          '\U0000296D',
+       "lrm;":                             '\U0000200E',
+       "lrtri;":                           '\U000022BF',
+       "lsaquo;":                          '\U00002039',
+       "lscr;":                            '\U0001D4C1',
+       "lsh;":                             '\U000021B0',
+       "lsim;":                            '\U00002272',
+       "lsime;":                           '\U00002A8D',
+       "lsimg;":                           '\U00002A8F',
+       "lsqb;":                            '\U0000005B',
+       "lsquo;":                           '\U00002018',
+       "lsquor;":                          '\U0000201A',
+       "lstrok;":                          '\U00000142',
+       "lt;":                              '\U0000003C',
+       "ltcc;":                            '\U00002AA6',
+       "ltcir;":                           '\U00002A79',
+       "ltdot;":                           '\U000022D6',
+       "lthree;":                          '\U000022CB',
+       "ltimes;":                          '\U000022C9',
+       "ltlarr;":                          '\U00002976',
+       "ltquest;":                         '\U00002A7B',
+       "ltrPar;":                          '\U00002996',
+       "ltri;":                            '\U000025C3',
+       "ltrie;":                           '\U000022B4',
+       "ltrif;":                           '\U000025C2',
+       "lurdshar;":                        '\U0000294A',
+       "luruhar;":                         '\U00002966',
+       "mDDot;":                           '\U0000223A',
+       "macr;":                            '\U000000AF',
+       "male;":                            '\U00002642',
+       "malt;":                            '\U00002720',
+       "maltese;":                         '\U00002720',
+       "map;":                             '\U000021A6',
+       "mapsto;":                          '\U000021A6',
+       "mapstodown;":                      '\U000021A7',
+       "mapstoleft;":                      '\U000021A4',
+       "mapstoup;":                        '\U000021A5',
+       "marker;":                          '\U000025AE',
+       "mcomma;":                          '\U00002A29',
+       "mcy;":                             '\U0000043C',
+       "mdash;":                           '\U00002014',
+       "measuredangle;":                   '\U00002221',
+       "mfr;":                             '\U0001D52A',
+       "mho;":                             '\U00002127',
+       "micro;":                           '\U000000B5',
+       "mid;":                             '\U00002223',
+       "midast;":                          '\U0000002A',
+       "midcir;":                          '\U00002AF0',
+       "middot;":                          '\U000000B7',
+       "minus;":                           '\U00002212',
+       "minusb;":                          '\U0000229F',
+       "minusd;":                          '\U00002238',
+       "minusdu;":                         '\U00002A2A',
+       "mlcp;":                            '\U00002ADB',
+       "mldr;":                            '\U00002026',
+       "mnplus;":                          '\U00002213',
+       "models;":                          '\U000022A7',
+       "mopf;":                            '\U0001D55E',
+       "mp;":                              '\U00002213',
+       "mscr;":                            '\U0001D4C2',
+       "mstpos;":                          '\U0000223E',
+       "mu;":                              '\U000003BC',
+       "multimap;":                        '\U000022B8',
+       "mumap;":                           '\U000022B8',
+       "nLeftarrow;":                      '\U000021CD',
+       "nLeftrightarrow;":                 '\U000021CE',
+       "nRightarrow;":                     '\U000021CF',
+       "nVDash;":                          '\U000022AF',
+       "nVdash;":                          '\U000022AE',
+       "nabla;":                           '\U00002207',
+       "nacute;":                          '\U00000144',
+       "nap;":                             '\U00002249',
+       "napos;":                           '\U00000149',
+       "napprox;":                         '\U00002249',
+       "natur;":                           '\U0000266E',
+       "natural;":                         '\U0000266E',
+       "naturals;":                        '\U00002115',
+       "nbsp;":                            '\U000000A0',
+       "ncap;":                            '\U00002A43',
+       "ncaron;":                          '\U00000148',
+       "ncedil;":                          '\U00000146',
+       "ncong;":                           '\U00002247',
+       "ncup;":                            '\U00002A42',
+       "ncy;":                             '\U0000043D',
+       "ndash;":                           '\U00002013',
+       "ne;":                              '\U00002260',
+       "neArr;":                           '\U000021D7',
+       "nearhk;":                          '\U00002924',
+       "nearr;":                           '\U00002197',
+       "nearrow;":                         '\U00002197',
+       "nequiv;":                          '\U00002262',
+       "nesear;":                          '\U00002928',
+       "nexist;":                          '\U00002204',
+       "nexists;":                         '\U00002204',
+       "nfr;":                             '\U0001D52B',
+       "nge;":                             '\U00002271',
+       "ngeq;":                            '\U00002271',
+       "ngsim;":                           '\U00002275',
+       "ngt;":                             '\U0000226F',
+       "ngtr;":                            '\U0000226F',
+       "nhArr;":                           '\U000021CE',
+       "nharr;":                           '\U000021AE',
+       "nhpar;":                           '\U00002AF2',
+       "ni;":                              '\U0000220B',
+       "nis;":                             '\U000022FC',
+       "nisd;":                            '\U000022FA',
+       "niv;":                             '\U0000220B',
+       "njcy;":                            '\U0000045A',
+       "nlArr;":                           '\U000021CD',
+       "nlarr;":                           '\U0000219A',
+       "nldr;":                            '\U00002025',
+       "nle;":                             '\U00002270',
+       "nleftarrow;":                      '\U0000219A',
+       "nleftrightarrow;":                 '\U000021AE',
+       "nleq;":                            '\U00002270',
+       "nless;":                           '\U0000226E',
+       "nlsim;":                           '\U00002274',
+       "nlt;":                             '\U0000226E',
+       "nltri;":                           '\U000022EA',
+       "nltrie;":                          '\U000022EC',
+       "nmid;":                            '\U00002224',
+       "nopf;":                            '\U0001D55F',
+       "not;":                             '\U000000AC',
+       "notin;":                           '\U00002209',
+       "notinva;":                         '\U00002209',
+       "notinvb;":                         '\U000022F7',
+       "notinvc;":                         '\U000022F6',
+       "notni;":                           '\U0000220C',
+       "notniva;":                         '\U0000220C',
+       "notnivb;":                         '\U000022FE',
+       "notnivc;":                         '\U000022FD',
+       "npar;":                            '\U00002226',
+       "nparallel;":                       '\U00002226',
+       "npolint;":                         '\U00002A14',
+       "npr;":                             '\U00002280',
+       "nprcue;":                          '\U000022E0',
+       "nprec;":                           '\U00002280',
+       "nrArr;":                           '\U000021CF',
+       "nrarr;":                           '\U0000219B',
+       "nrightarrow;":                     '\U0000219B',
+       "nrtri;":                           '\U000022EB',
+       "nrtrie;":                          '\U000022ED',
+       "nsc;":                             '\U00002281',
+       "nsccue;":                          '\U000022E1',
+       "nscr;":                            '\U0001D4C3',
+       "nshortmid;":                       '\U00002224',
+       "nshortparallel;":                  '\U00002226',
+       "nsim;":                            '\U00002241',
+       "nsime;":                           '\U00002244',
+       "nsimeq;":                          '\U00002244',
+       "nsmid;":                           '\U00002224',
+       "nspar;":                           '\U00002226',
+       "nsqsube;":                         '\U000022E2',
+       "nsqsupe;":                         '\U000022E3',
+       "nsub;":                            '\U00002284',
+       "nsube;":                           '\U00002288',
+       "nsubseteq;":                       '\U00002288',
+       "nsucc;":                           '\U00002281',
+       "nsup;":                            '\U00002285',
+       "nsupe;":                           '\U00002289',
+       "nsupseteq;":                       '\U00002289',
+       "ntgl;":                            '\U00002279',
+       "ntilde;":                          '\U000000F1',
+       "ntlg;":                            '\U00002278',
+       "ntriangleleft;":                   '\U000022EA',
+       "ntrianglelefteq;":                 '\U000022EC',
+       "ntriangleright;":                  '\U000022EB',
+       "ntrianglerighteq;":                '\U000022ED',
+       "nu;":                              '\U000003BD',
+       "num;":                             '\U00000023',
+       "numero;":                          '\U00002116',
+       "numsp;":                           '\U00002007',
+       "nvDash;":                          '\U000022AD',
+       "nvHarr;":                          '\U00002904',
+       "nvdash;":                          '\U000022AC',
+       "nvinfin;":                         '\U000029DE',
+       "nvlArr;":                          '\U00002902',
+       "nvrArr;":                          '\U00002903',
+       "nwArr;":                           '\U000021D6',
+       "nwarhk;":                          '\U00002923',
+       "nwarr;":                           '\U00002196',
+       "nwarrow;":                         '\U00002196',
+       "nwnear;":                          '\U00002927',
+       "oS;":                              '\U000024C8',
+       "oacute;":                          '\U000000F3',
+       "oast;":                            '\U0000229B',
+       "ocir;":                            '\U0000229A',
+       "ocirc;":                           '\U000000F4',
+       "ocy;":                             '\U0000043E',
+       "odash;":                           '\U0000229D',
+       "odblac;":                          '\U00000151',
+       "odiv;":                            '\U00002A38',
+       "odot;":                            '\U00002299',
+       "odsold;":                          '\U000029BC',
+       "oelig;":                           '\U00000153',
+       "ofcir;":                           '\U000029BF',
+       "ofr;":                             '\U0001D52C',
+       "ogon;":                            '\U000002DB',
+       "ograve;":                          '\U000000F2',
+       "ogt;":                             '\U000029C1',
+       "ohbar;":                           '\U000029B5',
+       "ohm;":                             '\U000003A9',
+       "oint;":                            '\U0000222E',
+       "olarr;":                           '\U000021BA',
+       "olcir;":                           '\U000029BE',
+       "olcross;":                         '\U000029BB',
+       "oline;":                           '\U0000203E',
+       "olt;":                             '\U000029C0',
+       "omacr;":                           '\U0000014D',
+       "omega;":                           '\U000003C9',
+       "omicron;":                         '\U000003BF',
+       "omid;":                            '\U000029B6',
+       "ominus;":                          '\U00002296',
+       "oopf;":                            '\U0001D560',
+       "opar;":                            '\U000029B7',
+       "operp;":                           '\U000029B9',
+       "oplus;":                           '\U00002295',
+       "or;":                              '\U00002228',
+       "orarr;":                           '\U000021BB',
+       "ord;":                             '\U00002A5D',
+       "order;":                           '\U00002134',
+       "orderof;":                         '\U00002134',
+       "ordf;":                            '\U000000AA',
+       "ordm;":                            '\U000000BA',
+       "origof;":                          '\U000022B6',
+       "oror;":                            '\U00002A56',
+       "orslope;":                         '\U00002A57',
+       "orv;":                             '\U00002A5B',
+       "oscr;":                            '\U00002134',
+       "oslash;":                          '\U000000F8',
+       "osol;":                            '\U00002298',
+       "otilde;":                          '\U000000F5',
+       "otimes;":                          '\U00002297',
+       "otimesas;":                        '\U00002A36',
+       "ouml;":                            '\U000000F6',
+       "ovbar;":                           '\U0000233D',
+       "par;":                             '\U00002225',
+       "para;":                            '\U000000B6',
+       "parallel;":                        '\U00002225',
+       "parsim;":                          '\U00002AF3',
+       "parsl;":                           '\U00002AFD',
+       "part;":                            '\U00002202',
+       "pcy;":                             '\U0000043F',
+       "percnt;":                          '\U00000025',
+       "period;":                          '\U0000002E',
+       "permil;":                          '\U00002030',
+       "perp;":                            '\U000022A5',
+       "pertenk;":                         '\U00002031',
+       "pfr;":                             '\U0001D52D',
+       "phi;":                             '\U000003C6',
+       "phiv;":                            '\U000003D5',
+       "phmmat;":                          '\U00002133',
+       "phone;":                           '\U0000260E',
+       "pi;":                              '\U000003C0',
+       "pitchfork;":                       '\U000022D4',
+       "piv;":                             '\U000003D6',
+       "planck;":                          '\U0000210F',
+       "planckh;":                         '\U0000210E',
+       "plankv;":                          '\U0000210F',
+       "plus;":                            '\U0000002B',
+       "plusacir;":                        '\U00002A23',
+       "plusb;":                           '\U0000229E',
+       "pluscir;":                         '\U00002A22',
+       "plusdo;":                          '\U00002214',
+       "plusdu;":                          '\U00002A25',
+       "pluse;":                           '\U00002A72',
+       "plusmn;":                          '\U000000B1',
+       "plussim;":                         '\U00002A26',
+       "plustwo;":                         '\U00002A27',
+       "pm;":                              '\U000000B1',
+       "pointint;":                        '\U00002A15',
+       "popf;":                            '\U0001D561',
+       "pound;":                           '\U000000A3',
+       "pr;":                              '\U0000227A',
+       "prE;":                             '\U00002AB3',
+       "prap;":                            '\U00002AB7',
+       "prcue;":                           '\U0000227C',
+       "pre;":                             '\U00002AAF',
+       "prec;":                            '\U0000227A',
+       "precapprox;":                      '\U00002AB7',
+       "preccurlyeq;":                     '\U0000227C',
+       "preceq;":                          '\U00002AAF',
+       "precnapprox;":                     '\U00002AB9',
+       "precneqq;":                        '\U00002AB5',
+       "precnsim;":                        '\U000022E8',
+       "precsim;":                         '\U0000227E',
+       "prime;":                           '\U00002032',
+       "primes;":                          '\U00002119',
+       "prnE;":                            '\U00002AB5',
+       "prnap;":                           '\U00002AB9',
+       "prnsim;":                          '\U000022E8',
+       "prod;":                            '\U0000220F',
+       "profalar;":                        '\U0000232E',
+       "profline;":                        '\U00002312',
+       "profsurf;":                        '\U00002313',
+       "prop;":                            '\U0000221D',
+       "propto;":                          '\U0000221D',
+       "prsim;":                           '\U0000227E',
+       "prurel;":                          '\U000022B0',
+       "pscr;":                            '\U0001D4C5',
+       "psi;":                             '\U000003C8',
+       "puncsp;":                          '\U00002008',
+       "qfr;":                             '\U0001D52E',
+       "qint;":                            '\U00002A0C',
+       "qopf;":                            '\U0001D562',
+       "qprime;":                          '\U00002057',
+       "qscr;":                            '\U0001D4C6',
+       "quaternions;":                     '\U0000210D',
+       "quatint;":                         '\U00002A16',
+       "quest;":                           '\U0000003F',
+       "questeq;":                         '\U0000225F',
+       "quot;":                            '\U00000022',
+       "rAarr;":                           '\U000021DB',
+       "rArr;":                            '\U000021D2',
+       "rAtail;":                          '\U0000291C',
+       "rBarr;":                           '\U0000290F',
+       "rHar;":                            '\U00002964',
+       "racute;":                          '\U00000155',
+       "radic;":                           '\U0000221A',
+       "raemptyv;":                        '\U000029B3',
+       "rang;":                            '\U000027E9',
+       "rangd;":                           '\U00002992',
+       "range;":                           '\U000029A5',
+       "rangle;":                          '\U000027E9',
+       "raquo;":                           '\U000000BB',
+       "rarr;":                            '\U00002192',
+       "rarrap;":                          '\U00002975',
+       "rarrb;":                           '\U000021E5',
+       "rarrbfs;":                         '\U00002920',
+       "rarrc;":                           '\U00002933',
+       "rarrfs;":                          '\U0000291E',
+       "rarrhk;":                          '\U000021AA',
+       "rarrlp;":                          '\U000021AC',
+       "rarrpl;":                          '\U00002945',
+       "rarrsim;":                         '\U00002974',
+       "rarrtl;":                          '\U000021A3',
+       "rarrw;":                           '\U0000219D',
+       "ratail;":                          '\U0000291A',
+       "ratio;":                           '\U00002236',
+       "rationals;":                       '\U0000211A',
+       "rbarr;":                           '\U0000290D',
+       "rbbrk;":                           '\U00002773',
+       "rbrace;":                          '\U0000007D',
+       "rbrack;":                          '\U0000005D',
+       "rbrke;":                           '\U0000298C',
+       "rbrksld;":                         '\U0000298E',
+       "rbrkslu;":                         '\U00002990',
+       "rcaron;":                          '\U00000159',
+       "rcedil;":                          '\U00000157',
+       "rceil;":                           '\U00002309',
+       "rcub;":                            '\U0000007D',
+       "rcy;":                             '\U00000440',
+       "rdca;":                            '\U00002937',
+       "rdldhar;":                         '\U00002969',
+       "rdquo;":                           '\U0000201D',
+       "rdquor;":                          '\U0000201D',
+       "rdsh;":                            '\U000021B3',
+       "real;":                            '\U0000211C',
+       "realine;":                         '\U0000211B',
+       "realpart;":                        '\U0000211C',
+       "reals;":                           '\U0000211D',
+       "rect;":                            '\U000025AD',
+       "reg;":                             '\U000000AE',
+       "rfisht;":                          '\U0000297D',
+       "rfloor;":                          '\U0000230B',
+       "rfr;":                             '\U0001D52F',
+       "rhard;":                           '\U000021C1',
+       "rharu;":                           '\U000021C0',
+       "rharul;":                          '\U0000296C',
+       "rho;":                             '\U000003C1',
+       "rhov;":                            '\U000003F1',
+       "rightarrow;":                      '\U00002192',
+       "rightarrowtail;":                  '\U000021A3',
+       "rightharpoondown;":                '\U000021C1',
+       "rightharpoonup;":                  '\U000021C0',
+       "rightleftarrows;":                 '\U000021C4',
+       "rightleftharpoons;":               '\U000021CC',
+       "rightrightarrows;":                '\U000021C9',
+       "rightsquigarrow;":                 '\U0000219D',
+       "rightthreetimes;":                 '\U000022CC',
+       "ring;":                            '\U000002DA',
+       "risingdotseq;":                    '\U00002253',
+       "rlarr;":                           '\U000021C4',
+       "rlhar;":                           '\U000021CC',
+       "rlm;":                             '\U0000200F',
+       "rmoust;":                          '\U000023B1',
+       "rmoustache;":                      '\U000023B1',
+       "rnmid;":                           '\U00002AEE',
+       "roang;":                           '\U000027ED',
+       "roarr;":                           '\U000021FE',
+       "robrk;":                           '\U000027E7',
+       "ropar;":                           '\U00002986',
+       "ropf;":                            '\U0001D563',
+       "roplus;":                          '\U00002A2E',
+       "rotimes;":                         '\U00002A35',
+       "rpar;":                            '\U00000029',
+       "rpargt;":                          '\U00002994',
+       "rppolint;":                        '\U00002A12',
+       "rrarr;":                           '\U000021C9',
+       "rsaquo;":                          '\U0000203A',
+       "rscr;":                            '\U0001D4C7',
+       "rsh;":                             '\U000021B1',
+       "rsqb;":                            '\U0000005D',
+       "rsquo;":                           '\U00002019',
+       "rsquor;":                          '\U00002019',
+       "rthree;":                          '\U000022CC',
+       "rtimes;":                          '\U000022CA',
+       "rtri;":                            '\U000025B9',
+       "rtrie;":                           '\U000022B5',
+       "rtrif;":                           '\U000025B8',
+       "rtriltri;":                        '\U000029CE',
+       "ruluhar;":                         '\U00002968',
+       "rx;":                              '\U0000211E',
+       "sacute;":                          '\U0000015B',
+       "sbquo;":                           '\U0000201A',
+       "sc;":                              '\U0000227B',
+       "scE;":                             '\U00002AB4',
+       "scap;":                            '\U00002AB8',
+       "scaron;":                          '\U00000161',
+       "sccue;":                           '\U0000227D',
+       "sce;":                             '\U00002AB0',
+       "scedil;":                          '\U0000015F',
+       "scirc;":                           '\U0000015D',
+       "scnE;":                            '\U00002AB6',
+       "scnap;":                           '\U00002ABA',
+       "scnsim;":                          '\U000022E9',
+       "scpolint;":                        '\U00002A13',
+       "scsim;":                           '\U0000227F',
+       "scy;":                             '\U00000441',
+       "sdot;":                            '\U000022C5',
+       "sdotb;":                           '\U000022A1',
+       "sdote;":                           '\U00002A66',
+       "seArr;":                           '\U000021D8',
+       "searhk;":                          '\U00002925',
+       "searr;":                           '\U00002198',
+       "searrow;":                         '\U00002198',
+       "sect;":                            '\U000000A7',
+       "semi;":                            '\U0000003B',
+       "seswar;":                          '\U00002929',
+       "setminus;":                        '\U00002216',
+       "setmn;":                           '\U00002216',
+       "sext;":                            '\U00002736',
+       "sfr;":                             '\U0001D530',
+       "sfrown;":                          '\U00002322',
+       "sharp;":                           '\U0000266F',
+       "shchcy;":                          '\U00000449',
+       "shcy;":                            '\U00000448',
+       "shortmid;":                        '\U00002223',
+       "shortparallel;":                   '\U00002225',
+       "shy;":                             '\U000000AD',
+       "sigma;":                           '\U000003C3',
+       "sigmaf;":                          '\U000003C2',
+       "sigmav;":                          '\U000003C2',
+       "sim;":                             '\U0000223C',
+       "simdot;":                          '\U00002A6A',
+       "sime;":                            '\U00002243',
+       "simeq;":                           '\U00002243',
+       "simg;":                            '\U00002A9E',
+       "simgE;":                           '\U00002AA0',
+       "siml;":                            '\U00002A9D',
+       "simlE;":                           '\U00002A9F',
+       "simne;":                           '\U00002246',
+       "simplus;":                         '\U00002A24',
+       "simrarr;":                         '\U00002972',
+       "slarr;":                           '\U00002190',
+       "smallsetminus;":                   '\U00002216',
+       "smashp;":                          '\U00002A33',
+       "smeparsl;":                        '\U000029E4',
+       "smid;":                            '\U00002223',
+       "smile;":                           '\U00002323',
+       "smt;":                             '\U00002AAA',
+       "smte;":                            '\U00002AAC',
+       "softcy;":                          '\U0000044C',
+       "sol;":                             '\U0000002F',
+       "solb;":                            '\U000029C4',
+       "solbar;":                          '\U0000233F',
+       "sopf;":                            '\U0001D564',
+       "spades;":                          '\U00002660',
+       "spadesuit;":                       '\U00002660',
+       "spar;":                            '\U00002225',
+       "sqcap;":                           '\U00002293',
+       "sqcup;":                           '\U00002294',
+       "sqsub;":                           '\U0000228F',
+       "sqsube;":                          '\U00002291',
+       "sqsubset;":                        '\U0000228F',
+       "sqsubseteq;":                      '\U00002291',
+       "sqsup;":                           '\U00002290',
+       "sqsupe;":                          '\U00002292',
+       "sqsupset;":                        '\U00002290',
+       "sqsupseteq;":                      '\U00002292',
+       "squ;":                             '\U000025A1',
+       "square;":                          '\U000025A1',
+       "squarf;":                          '\U000025AA',
+       "squf;":                            '\U000025AA',
+       "srarr;":                           '\U00002192',
+       "sscr;":                            '\U0001D4C8',
+       "ssetmn;":                          '\U00002216',
+       "ssmile;":                          '\U00002323',
+       "sstarf;":                          '\U000022C6',
+       "star;":                            '\U00002606',
+       "starf;":                           '\U00002605',
+       "straightepsilon;":                 '\U000003F5',
+       "straightphi;":                     '\U000003D5',
+       "strns;":                           '\U000000AF',
+       "sub;":                             '\U00002282',
+       "subE;":                            '\U00002AC5',
+       "subdot;":                          '\U00002ABD',
+       "sube;":                            '\U00002286',
+       "subedot;":                         '\U00002AC3',
+       "submult;":                         '\U00002AC1',
+       "subnE;":                           '\U00002ACB',
+       "subne;":                           '\U0000228A',
+       "subplus;":                         '\U00002ABF',
+       "subrarr;":                         '\U00002979',
+       "subset;":                          '\U00002282',
+       "subseteq;":                        '\U00002286',
+       "subseteqq;":                       '\U00002AC5',
+       "subsetneq;":                       '\U0000228A',
+       "subsetneqq;":                      '\U00002ACB',
+       "subsim;":                          '\U00002AC7',
+       "subsub;":                          '\U00002AD5',
+       "subsup;":                          '\U00002AD3',
+       "succ;":                            '\U0000227B',
+       "succapprox;":                      '\U00002AB8',
+       "succcurlyeq;":                     '\U0000227D',
+       "succeq;":                          '\U00002AB0',
+       "succnapprox;":                     '\U00002ABA',
+       "succneqq;":                        '\U00002AB6',
+       "succnsim;":                        '\U000022E9',
+       "succsim;":                         '\U0000227F',
+       "sum;":                             '\U00002211',
+       "sung;":                            '\U0000266A',
+       "sup;":                             '\U00002283',
+       "sup1;":                            '\U000000B9',
+       "sup2;":                            '\U000000B2',
+       "sup3;":                            '\U000000B3',
+       "supE;":                            '\U00002AC6',
+       "supdot;":                          '\U00002ABE',
+       "supdsub;":                         '\U00002AD8',
+       "supe;":                            '\U00002287',
+       "supedot;":                         '\U00002AC4',
+       "suphsol;":                         '\U000027C9',
+       "suphsub;":                         '\U00002AD7',
+       "suplarr;":                         '\U0000297B',
+       "supmult;":                         '\U00002AC2',
+       "supnE;":                           '\U00002ACC',
+       "supne;":                           '\U0000228B',
+       "supplus;":                         '\U00002AC0',
+       "supset;":                          '\U00002283',
+       "supseteq;":                        '\U00002287',
+       "supseteqq;":                       '\U00002AC6',
+       "supsetneq;":                       '\U0000228B',
+       "supsetneqq;":                      '\U00002ACC',
+       "supsim;":                          '\U00002AC8',
+       "supsub;":                          '\U00002AD4',
+       "supsup;":                          '\U00002AD6',
+       "swArr;":                           '\U000021D9',
+       "swarhk;":                          '\U00002926',
+       "swarr;":                           '\U00002199',
+       "swarrow;":                         '\U00002199',
+       "swnwar;":                          '\U0000292A',
+       "szlig;":                           '\U000000DF',
+       "target;":                          '\U00002316',
+       "tau;":                             '\U000003C4',
+       "tbrk;":                            '\U000023B4',
+       "tcaron;":                          '\U00000165',
+       "tcedil;":                          '\U00000163',
+       "tcy;":                             '\U00000442',
+       "tdot;":                            '\U000020DB',
+       "telrec;":                          '\U00002315',
+       "tfr;":                             '\U0001D531',
+       "there4;":                          '\U00002234',
+       "therefore;":                       '\U00002234',
+       "theta;":                           '\U000003B8',
+       "thetasym;":                        '\U000003D1',
+       "thetav;":                          '\U000003D1',
+       "thickapprox;":                     '\U00002248',
+       "thicksim;":                        '\U0000223C',
+       "thinsp;":                          '\U00002009',
+       "thkap;":                           '\U00002248',
+       "thksim;":                          '\U0000223C',
+       "thorn;":                           '\U000000FE',
+       "tilde;":                           '\U000002DC',
+       "times;":                           '\U000000D7',
+       "timesb;":                          '\U000022A0',
+       "timesbar;":                        '\U00002A31',
+       "timesd;":                          '\U00002A30',
+       "tint;":                            '\U0000222D',
+       "toea;":                            '\U00002928',
+       "top;":                             '\U000022A4',
+       "topbot;":                          '\U00002336',
+       "topcir;":                          '\U00002AF1',
+       "topf;":                            '\U0001D565',
+       "topfork;":                         '\U00002ADA',
+       "tosa;":                            '\U00002929',
+       "tprime;":                          '\U00002034',
+       "trade;":                           '\U00002122',
+       "triangle;":                        '\U000025B5',
+       "triangledown;":                    '\U000025BF',
+       "triangleleft;":                    '\U000025C3',
+       "trianglelefteq;":                  '\U000022B4',
+       "triangleq;":                       '\U0000225C',
+       "triangleright;":                   '\U000025B9',
+       "trianglerighteq;":                 '\U000022B5',
+       "tridot;":                          '\U000025EC',
+       "trie;":                            '\U0000225C',
+       "triminus;":                        '\U00002A3A',
+       "triplus;":                         '\U00002A39',
+       "trisb;":                           '\U000029CD',
+       "tritime;":                         '\U00002A3B',
+       "trpezium;":                        '\U000023E2',
+       "tscr;":                            '\U0001D4C9',
+       "tscy;":                            '\U00000446',
+       "tshcy;":                           '\U0000045B',
+       "tstrok;":                          '\U00000167',
+       "twixt;":                           '\U0000226C',
+       "twoheadleftarrow;":                '\U0000219E',
+       "twoheadrightarrow;":               '\U000021A0',
+       "uArr;":                            '\U000021D1',
+       "uHar;":                            '\U00002963',
+       "uacute;":                          '\U000000FA',
+       "uarr;":                            '\U00002191',
+       "ubrcy;":                           '\U0000045E',
+       "ubreve;":                          '\U0000016D',
+       "ucirc;":                           '\U000000FB',
+       "ucy;":                             '\U00000443',
+       "udarr;":                           '\U000021C5',
+       "udblac;":                          '\U00000171',
+       "udhar;":                           '\U0000296E',
+       "ufisht;":                          '\U0000297E',
+       "ufr;":                             '\U0001D532',
+       "ugrave;":                          '\U000000F9',
+       "uharl;":                           '\U000021BF',
+       "uharr;":                           '\U000021BE',
+       "uhblk;":                           '\U00002580',
+       "ulcorn;":                          '\U0000231C',
+       "ulcorner;":                        '\U0000231C',
+       "ulcrop;":                          '\U0000230F',
+       "ultri;":                           '\U000025F8',
+       "umacr;":                           '\U0000016B',
+       "uml;":                             '\U000000A8',
+       "uogon;":                           '\U00000173',
+       "uopf;":                            '\U0001D566',
+       "uparrow;":                         '\U00002191',
+       "updownarrow;":                     '\U00002195',
+       "upharpoonleft;":                   '\U000021BF',
+       "upharpoonright;":                  '\U000021BE',
+       "uplus;":                           '\U0000228E',
+       "upsi;":                            '\U000003C5',
+       "upsih;":                           '\U000003D2',
+       "upsilon;":                         '\U000003C5',
+       "upuparrows;":                      '\U000021C8',
+       "urcorn;":                          '\U0000231D',
+       "urcorner;":                        '\U0000231D',
+       "urcrop;":                          '\U0000230E',
+       "uring;":                           '\U0000016F',
+       "urtri;":                           '\U000025F9',
+       "uscr;":                            '\U0001D4CA',
+       "utdot;":                           '\U000022F0',
+       "utilde;":                          '\U00000169',
+       "utri;":                            '\U000025B5',
+       "utrif;":                           '\U000025B4',
+       "uuarr;":                           '\U000021C8',
+       "uuml;":                            '\U000000FC',
+       "uwangle;":                         '\U000029A7',
+       "vArr;":                            '\U000021D5',
+       "vBar;":                            '\U00002AE8',
+       "vBarv;":                           '\U00002AE9',
+       "vDash;":                           '\U000022A8',
+       "vangrt;":                          '\U0000299C',
+       "varepsilon;":                      '\U000003F5',
+       "varkappa;":                        '\U000003F0',
+       "varnothing;":                      '\U00002205',
+       "varphi;":                          '\U000003D5',
+       "varpi;":                           '\U000003D6',
+       "varpropto;":                       '\U0000221D',
+       "varr;":                            '\U00002195',
+       "varrho;":                          '\U000003F1',
+       "varsigma;":                        '\U000003C2',
+       "vartheta;":                        '\U000003D1',
+       "vartriangleleft;":                 '\U000022B2',
+       "vartriangleright;":                '\U000022B3',
+       "vcy;":                             '\U00000432',
+       "vdash;":                           '\U000022A2',
+       "vee;":                             '\U00002228',
+       "veebar;":                          '\U000022BB',
+       "veeeq;":                           '\U0000225A',
+       "vellip;":                          '\U000022EE',
+       "verbar;":                          '\U0000007C',
+       "vert;":                            '\U0000007C',
+       "vfr;":                             '\U0001D533',
+       "vltri;":                           '\U000022B2',
+       "vopf;":                            '\U0001D567',
+       "vprop;":                           '\U0000221D',
+       "vrtri;":                           '\U000022B3',
+       "vscr;":                            '\U0001D4CB',
+       "vzigzag;":                         '\U0000299A',
+       "wcirc;":                           '\U00000175',
+       "wedbar;":                          '\U00002A5F',
+       "wedge;":                           '\U00002227',
+       "wedgeq;":                          '\U00002259',
+       "weierp;":                          '\U00002118',
+       "wfr;":                             '\U0001D534',
+       "wopf;":                            '\U0001D568',
+       "wp;":                              '\U00002118',
+       "wr;":                              '\U00002240',
+       "wreath;":                          '\U00002240',
+       "wscr;":                            '\U0001D4CC',
+       "xcap;":                            '\U000022C2',
+       "xcirc;":                           '\U000025EF',
+       "xcup;":                            '\U000022C3',
+       "xdtri;":                           '\U000025BD',
+       "xfr;":                             '\U0001D535',
+       "xhArr;":                           '\U000027FA',
+       "xharr;":                           '\U000027F7',
+       "xi;":                              '\U000003BE',
+       "xlArr;":                           '\U000027F8',
+       "xlarr;":                           '\U000027F5',
+       "xmap;":                            '\U000027FC',
+       "xnis;":                            '\U000022FB',
+       "xodot;":                           '\U00002A00',
+       "xopf;":                            '\U0001D569',
+       "xoplus;":                          '\U00002A01',
+       "xotime;":                          '\U00002A02',
+       "xrArr;":                           '\U000027F9',
+       "xrarr;":                           '\U000027F6',
+       "xscr;":                            '\U0001D4CD',
+       "xsqcup;":                          '\U00002A06',
+       "xuplus;":                          '\U00002A04',
+       "xutri;":                           '\U000025B3',
+       "xvee;":                            '\U000022C1',
+       "xwedge;":                          '\U000022C0',
+       "yacute;":                          '\U000000FD',
+       "yacy;":                            '\U0000044F',
+       "ycirc;":                           '\U00000177',
+       "ycy;":                             '\U0000044B',
+       "yen;":                             '\U000000A5',
+       "yfr;":                             '\U0001D536',
+       "yicy;":                            '\U00000457',
+       "yopf;":                            '\U0001D56A',
+       "yscr;":                            '\U0001D4CE',
+       "yucy;":                            '\U0000044E',
+       "yuml;":                            '\U000000FF',
+       "zacute;":                          '\U0000017A',
+       "zcaron;":                          '\U0000017E',
+       "zcy;":                             '\U00000437',
+       "zdot;":                            '\U0000017C',
+       "zeetrf;":                          '\U00002128',
+       "zeta;":                            '\U000003B6',
+       "zfr;":                             '\U0001D537',
+       "zhcy;":                            '\U00000436',
+       "zigrarr;":                         '\U000021DD',
+       "zopf;":                            '\U0001D56B',
+       "zscr;":                            '\U0001D4CF',
+       "zwj;":                             '\U0000200D',
+       "zwnj;":                            '\U0000200C',
+       "AElig":                            '\U000000C6',
+       "AMP":                              '\U00000026',
+       "Aacute":                           '\U000000C1',
+       "Acirc":                            '\U000000C2',
+       "Agrave":                           '\U000000C0',
+       "Aring":                            '\U000000C5',
+       "Atilde":                           '\U000000C3',
+       "Auml":                             '\U000000C4',
+       "COPY":                             '\U000000A9',
+       "Ccedil":                           '\U000000C7',
+       "ETH":                              '\U000000D0',
+       "Eacute":                           '\U000000C9',
+       "Ecirc":                            '\U000000CA',
+       "Egrave":                           '\U000000C8',
+       "Euml":                             '\U000000CB',
+       "GT":                               '\U0000003E',
+       "Iacute":                           '\U000000CD',
+       "Icirc":                            '\U000000CE',
+       "Igrave":                           '\U000000CC',
+       "Iuml":                             '\U000000CF',
+       "LT":                               '\U0000003C',
+       "Ntilde":                           '\U000000D1',
+       "Oacute":                           '\U000000D3',
+       "Ocirc":                            '\U000000D4',
+       "Ograve":                           '\U000000D2',
+       "Oslash":                           '\U000000D8',
+       "Otilde":                           '\U000000D5',
+       "Ouml":                             '\U000000D6',
+       "QUOT":                             '\U00000022',
+       "REG":                              '\U000000AE',
+       "THORN":                            '\U000000DE',
+       "Uacute":                           '\U000000DA',
+       "Ucirc":                            '\U000000DB',
+       "Ugrave":                           '\U000000D9',
+       "Uuml":                             '\U000000DC',
+       "Yacute":                           '\U000000DD',
+       "aacute":                           '\U000000E1',
+       "acirc":                            '\U000000E2',
+       "acute":                            '\U000000B4',
+       "aelig":                            '\U000000E6',
+       "agrave":                           '\U000000E0',
+       "amp":                              '\U00000026',
+       "aring":                            '\U000000E5',
+       "atilde":                           '\U000000E3',
+       "auml":                             '\U000000E4',
+       "brvbar":                           '\U000000A6',
+       "ccedil":                           '\U000000E7',
+       "cedil":                            '\U000000B8',
+       "cent":                             '\U000000A2',
+       "copy":                             '\U000000A9',
+       "curren":                           '\U000000A4',
+       "deg":                              '\U000000B0',
+       "divide":                           '\U000000F7',
+       "eacute":                           '\U000000E9',
+       "ecirc":                            '\U000000EA',
+       "egrave":                           '\U000000E8',
+       "eth":                              '\U000000F0',
+       "euml":                             '\U000000EB',
+       "frac12":                           '\U000000BD',
+       "frac14":                           '\U000000BC',
+       "frac34":                           '\U000000BE',
+       "gt":                               '\U0000003E',
+       "iacute":                           '\U000000ED',
+       "icirc":                            '\U000000EE',
+       "iexcl":                            '\U000000A1',
+       "igrave":                           '\U000000EC',
+       "iquest":                           '\U000000BF',
+       "iuml":                             '\U000000EF',
+       "laquo":                            '\U000000AB',
+       "lt":                               '\U0000003C',
+       "macr":                             '\U000000AF',
+       "micro":                            '\U000000B5',
+       "middot":                           '\U000000B7',
+       "nbsp":                             '\U000000A0',
+       "not":                              '\U000000AC',
+       "ntilde":                           '\U000000F1',
+       "oacute":                           '\U000000F3',
+       "ocirc":                            '\U000000F4',
+       "ograve":                           '\U000000F2',
+       "ordf":                             '\U000000AA',
+       "ordm":                             '\U000000BA',
+       "oslash":                           '\U000000F8',
+       "otilde":                           '\U000000F5',
+       "ouml":                             '\U000000F6',
+       "para":                             '\U000000B6',
+       "plusmn":                           '\U000000B1',
+       "pound":                            '\U000000A3',
+       "quot":                             '\U00000022',
+       "raquo":                            '\U000000BB',
+       "reg":                              '\U000000AE',
+       "sect":                             '\U000000A7',
+       "shy":                              '\U000000AD',
+       "sup1":                             '\U000000B9',
+       "sup2":                             '\U000000B2',
+       "sup3":                             '\U000000B3',
+       "szlig":                            '\U000000DF',
+       "thorn":                            '\U000000FE',
+       "times":                            '\U000000D7',
+       "uacute":                           '\U000000FA',
+       "ucirc":                            '\U000000FB',
+       "ugrave":                           '\U000000F9',
+       "uml":                              '\U000000A8',
+       "uuml":                             '\U000000FC',
+       "yacute":                           '\U000000FD',
+       "yen":                              '\U000000A5',
+       "yuml":                             '\U000000FF',
+}
+
+// HTML entities that are two unicode codepoints.
+var entity2 = map[string][2]rune{
+       // TODO(nigeltao): Handle replacements that are wider than their names.
+       // "nLt;":                     {'\u226A', '\u20D2'},
+       // "nGt;":                     {'\u226B', '\u20D2'},
+       "NotEqualTilde;":           {'\u2242', '\u0338'},
+       "NotGreaterFullEqual;":     {'\u2267', '\u0338'},
+       "NotGreaterGreater;":       {'\u226B', '\u0338'},
+       "NotGreaterSlantEqual;":    {'\u2A7E', '\u0338'},
+       "NotHumpDownHump;":         {'\u224E', '\u0338'},
+       "NotHumpEqual;":            {'\u224F', '\u0338'},
+       "NotLeftTriangleBar;":      {'\u29CF', '\u0338'},
+       "NotLessLess;":             {'\u226A', '\u0338'},
+       "NotLessSlantEqual;":       {'\u2A7D', '\u0338'},
+       "NotNestedGreaterGreater;": {'\u2AA2', '\u0338'},
+       "NotNestedLessLess;":       {'\u2AA1', '\u0338'},
+       "NotPrecedesEqual;":        {'\u2AAF', '\u0338'},
+       "NotRightTriangleBar;":     {'\u29D0', '\u0338'},
+       "NotSquareSubset;":         {'\u228F', '\u0338'},
+       "NotSquareSuperset;":       {'\u2290', '\u0338'},
+       "NotSubset;":               {'\u2282', '\u20D2'},
+       "NotSucceedsEqual;":        {'\u2AB0', '\u0338'},
+       "NotSucceedsTilde;":        {'\u227F', '\u0338'},
+       "NotSuperset;":             {'\u2283', '\u20D2'},
+       "ThickSpace;":              {'\u205F', '\u200A'},
+       "acE;":                     {'\u223E', '\u0333'},
+       "bne;":                     {'\u003D', '\u20E5'},
+       "bnequiv;":                 {'\u2261', '\u20E5'},
+       "caps;":                    {'\u2229', '\uFE00'},
+       "cups;":                    {'\u222A', '\uFE00'},
+       "fjlig;":                   {'\u0066', '\u006A'},
+       "gesl;":                    {'\u22DB', '\uFE00'},
+       "gvertneqq;":               {'\u2269', '\uFE00'},
+       "gvnE;":                    {'\u2269', '\uFE00'},
+       "lates;":                   {'\u2AAD', '\uFE00'},
+       "lesg;":                    {'\u22DA', '\uFE00'},
+       "lvertneqq;":               {'\u2268', '\uFE00'},
+       "lvnE;":                    {'\u2268', '\uFE00'},
+       "nGg;":                     {'\u22D9', '\u0338'},
+       "nGtv;":                    {'\u226B', '\u0338'},
+       "nLl;":                     {'\u22D8', '\u0338'},
+       "nLtv;":                    {'\u226A', '\u0338'},
+       "nang;":                    {'\u2220', '\u20D2'},
+       "napE;":                    {'\u2A70', '\u0338'},
+       "napid;":                   {'\u224B', '\u0338'},
+       "nbump;":                   {'\u224E', '\u0338'},
+       "nbumpe;":                  {'\u224F', '\u0338'},
+       "ncongdot;":                {'\u2A6D', '\u0338'},
+       "nedot;":                   {'\u2250', '\u0338'},
+       "nesim;":                   {'\u2242', '\u0338'},
+       "ngE;":                     {'\u2267', '\u0338'},
+       "ngeqq;":                   {'\u2267', '\u0338'},
+       "ngeqslant;":               {'\u2A7E', '\u0338'},
+       "nges;":                    {'\u2A7E', '\u0338'},
+       "nlE;":                     {'\u2266', '\u0338'},
+       "nleqq;":                   {'\u2266', '\u0338'},
+       "nleqslant;":               {'\u2A7D', '\u0338'},
+       "nles;":                    {'\u2A7D', '\u0338'},
+       "notinE;":                  {'\u22F9', '\u0338'},
+       "notindot;":                {'\u22F5', '\u0338'},
+       "nparsl;":                  {'\u2AFD', '\u20E5'},
+       "npart;":                   {'\u2202', '\u0338'},
+       "npre;":                    {'\u2AAF', '\u0338'},
+       "npreceq;":                 {'\u2AAF', '\u0338'},
+       "nrarrc;":                  {'\u2933', '\u0338'},
+       "nrarrw;":                  {'\u219D', '\u0338'},
+       "nsce;":                    {'\u2AB0', '\u0338'},
+       "nsubE;":                   {'\u2AC5', '\u0338'},
+       "nsubset;":                 {'\u2282', '\u20D2'},
+       "nsubseteqq;":              {'\u2AC5', '\u0338'},
+       "nsucceq;":                 {'\u2AB0', '\u0338'},
+       "nsupE;":                   {'\u2AC6', '\u0338'},
+       "nsupset;":                 {'\u2283', '\u20D2'},
+       "nsupseteqq;":              {'\u2AC6', '\u0338'},
+       "nvap;":                    {'\u224D', '\u20D2'},
+       "nvge;":                    {'\u2265', '\u20D2'},
+       "nvgt;":                    {'\u003E', '\u20D2'},
+       "nvle;":                    {'\u2264', '\u20D2'},
+       "nvlt;":                    {'\u003C', '\u20D2'},
+       "nvltrie;":                 {'\u22B4', '\u20D2'},
+       "nvrtrie;":                 {'\u22B5', '\u20D2'},
+       "nvsim;":                   {'\u223C', '\u20D2'},
+       "race;":                    {'\u223D', '\u0331'},
+       "smtes;":                   {'\u2AAC', '\uFE00'},
+       "sqcaps;":                  {'\u2293', '\uFE00'},
+       "sqcups;":                  {'\u2294', '\uFE00'},
+       "varsubsetneq;":            {'\u228A', '\uFE00'},
+       "varsubsetneqq;":           {'\u2ACB', '\uFE00'},
+       "varsupsetneq;":            {'\u228B', '\uFE00'},
+       "varsupsetneqq;":           {'\u2ACC', '\uFE00'},
+       "vnsub;":                   {'\u2282', '\u20D2'},
+       "vnsup;":                   {'\u2283', '\u20D2'},
+       "vsubnE;":                  {'\u2ACB', '\uFE00'},
+       "vsubne;":                  {'\u228A', '\uFE00'},
+       "vsupnE;":                  {'\u2ACC', '\uFE00'},
+       "vsupne;":                  {'\u228B', '\uFE00'},
+}
diff --git a/internal/html/entity_test.go b/internal/html/entity_test.go
new file mode 100644 (file)
index 0000000..b53f866
--- /dev/null
@@ -0,0 +1,29 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+       "testing"
+       "unicode/utf8"
+)
+
+func TestEntityLength(t *testing.T) {
+       // We verify that the length of UTF-8 encoding of each value is <= 1 + len(key).
+       // The +1 comes from the leading "&". This property implies that the length of
+       // unescaped text is <= the length of escaped text.
+       for k, v := range entity {
+               if 1+len(k) < utf8.RuneLen(v) {
+                       t.Error("escaped entity &" + k + " is shorter than its UTF-8 encoding " + string(v))
+               }
+               if len(k) > longestEntityWithoutSemicolon && k[len(k)-1] != ';' {
+                       t.Errorf("entity name %s is %d characters, but longestEntityWithoutSemicolon=%d", k, len(k), longestEntityWithoutSemicolon)
+               }
+       }
+       for k, v := range entity2 {
+               if 1+len(k) < utf8.RuneLen(v[0])+utf8.RuneLen(v[1]) {
+                       t.Error("escaped entity &" + k + " is shorter than its UTF-8 encoding " + string(v[0]) + string(v[1]))
+               }
+       }
+}
diff --git a/internal/html/escape.go b/internal/html/escape.go
new file mode 100644 (file)
index 0000000..04c6bec
--- /dev/null
@@ -0,0 +1,339 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+       "bytes"
+       "strings"
+       "unicode/utf8"
+)
+
+// These replacements permit compatibility with old numeric entities that
+// assumed Windows-1252 encoding.
+// https://html.spec.whatwg.org/multipage/syntax.html#consume-a-character-reference
+var replacementTable = [...]rune{
+       '\u20AC', // First entry is what 0x80 should be replaced with.
+       '\u0081',
+       '\u201A',
+       '\u0192',
+       '\u201E',
+       '\u2026',
+       '\u2020',
+       '\u2021',
+       '\u02C6',
+       '\u2030',
+       '\u0160',
+       '\u2039',
+       '\u0152',
+       '\u008D',
+       '\u017D',
+       '\u008F',
+       '\u0090',
+       '\u2018',
+       '\u2019',
+       '\u201C',
+       '\u201D',
+       '\u2022',
+       '\u2013',
+       '\u2014',
+       '\u02DC',
+       '\u2122',
+       '\u0161',
+       '\u203A',
+       '\u0153',
+       '\u009D',
+       '\u017E',
+       '\u0178', // Last entry is 0x9F.
+       // 0x00->'\uFFFD' is handled programmatically.
+       // 0x0D->'\u000D' is a no-op.
+}
+
+// unescapeEntity reads an entity like "&lt;" from b[src:] and writes the
+// corresponding "<" to b[dst:], returning the incremented dst and src cursors.
+// Precondition: b[src] == '&' && dst <= src.
+// attribute should be true if parsing an attribute value.
+func unescapeEntity(b []byte, dst, src int, attribute bool) (dst1, src1 int) {
+       // https://html.spec.whatwg.org/multipage/syntax.html#consume-a-character-reference
+
+       // i starts at 1 because we already know that s[0] == '&'.
+       i, s := 1, b[src:]
+
+       if len(s) <= 1 {
+               b[dst] = b[src]
+               return dst + 1, src + 1
+       }
+
+       if s[i] == '#' {
+               if len(s) <= 3 { // We need to have at least "&#.".
+                       b[dst] = b[src]
+                       return dst + 1, src + 1
+               }
+               i++
+               c := s[i]
+               hex := false
+               if c == 'x' || c == 'X' {
+                       hex = true
+                       i++
+               }
+
+               x := '\x00'
+               for i < len(s) {
+                       c = s[i]
+                       i++
+                       if hex {
+                               if '0' <= c && c <= '9' {
+                                       x = 16*x + rune(c) - '0'
+                                       continue
+                               } else if 'a' <= c && c <= 'f' {
+                                       x = 16*x + rune(c) - 'a' + 10
+                                       continue
+                               } else if 'A' <= c && c <= 'F' {
+                                       x = 16*x + rune(c) - 'A' + 10
+                                       continue
+                               }
+                       } else if '0' <= c && c <= '9' {
+                               x = 10*x + rune(c) - '0'
+                               continue
+                       }
+                       if c != ';' {
+                               i--
+                       }
+                       break
+               }
+
+               if i <= 3 { // No characters matched.
+                       b[dst] = b[src]
+                       return dst + 1, src + 1
+               }
+
+               if 0x80 <= x && x <= 0x9F {
+                       // Replace characters from Windows-1252 with UTF-8 equivalents.
+                       x = replacementTable[x-0x80]
+               } else if x == 0 || (0xD800 <= x && x <= 0xDFFF) || x > 0x10FFFF {
+                       // Replace invalid characters with the replacement character.
+                       x = '\uFFFD'
+               }
+
+               return dst + utf8.EncodeRune(b[dst:], x), src + i
+       }
+
+       // Consume the maximum number of characters possible, with the
+       // consumed characters matching one of the named references.
+
+       for i < len(s) {
+               c := s[i]
+               i++
+               // Lower-cased characters are more common in entities, so we check for them first.
+               if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' {
+                       continue
+               }
+               if c != ';' {
+                       i--
+               }
+               break
+       }
+
+       entityName := string(s[1:i])
+       if entityName == "" {
+               // No-op.
+       } else if attribute && entityName[len(entityName)-1] != ';' && len(s) > i && s[i] == '=' {
+               // No-op.
+       } else if x := entity[entityName]; x != 0 {
+               return dst + utf8.EncodeRune(b[dst:], x), src + i
+       } else if x := entity2[entityName]; x[0] != 0 {
+               dst1 := dst + utf8.EncodeRune(b[dst:], x[0])
+               return dst1 + utf8.EncodeRune(b[dst1:], x[1]), src + i
+       } else if !attribute {
+               maxLen := len(entityName) - 1
+               if maxLen > longestEntityWithoutSemicolon {
+                       maxLen = longestEntityWithoutSemicolon
+               }
+               for j := maxLen; j > 1; j-- {
+                       if x := entity[entityName[:j]]; x != 0 {
+                               return dst + utf8.EncodeRune(b[dst:], x), src + j + 1
+                       }
+               }
+       }
+
+       dst1, src1 = dst+i, src+i
+       copy(b[dst:dst1], b[src:src1])
+       return dst1, src1
+}
+
+// unescape unescapes b's entities in-place, so that "a&lt;b" becomes "a<b".
+// attribute should be true if parsing an attribute value.
+func unescape(b []byte, attribute bool) []byte {
+       for i, c := range b {
+               if c == '&' {
+                       dst, src := unescapeEntity(b, i, i, attribute)
+                       for src < len(b) {
+                               c := b[src]
+                               if c == '&' {
+                                       dst, src = unescapeEntity(b, dst, src, attribute)
+                               } else {
+                                       b[dst] = c
+                                       dst, src = dst+1, src+1
+                               }
+                       }
+                       return b[0:dst]
+               }
+       }
+       return b
+}
+
+// lower lower-cases the A-Z bytes in b in-place, so that "aBc" becomes "abc".
+func lower(b []byte) []byte {
+       for i, c := range b {
+               if 'A' <= c && c <= 'Z' {
+                       b[i] = c + 'a' - 'A'
+               }
+       }
+       return b
+}
+
+// escapeComment is like func escape but escapes its input bytes less often.
+// Per https://github.com/golang/go/issues/58246 some HTML comments are (1)
+// meaningful and (2) contain angle brackets that we'd like to avoid escaping
+// unless we have to.
+//
+// "We have to" includes the '&' byte, since that introduces other escapes.
+//
+// It also includes those bytes (not including EOF) that would otherwise end
+// the comment. Per the summary table at the bottom of comment_test.go, this is
+// the '>' byte that, per above, we'd like to avoid escaping unless we have to.
+//
+// Studying the summary table (and T actions in its '>' column) closely, we
+// only need to escape in states 43, 44, 49, 51 and 52. State 43 is at the
+// start of the comment data. State 52 is after a '!'. The other three states
+// are after a '-'.
+//
+// Our algorithm is thus to escape every '&' and to escape '>' if and only if:
+//   - The '>' is after a '!' or '-' (in the unescaped data) or
+//   - The '>' is at the start of the comment data (after the opening "<!--").
+func escapeComment(w writer, s string) error {
+       // When modifying this function, consider manually increasing the
+       // maxSuffixLen constant in func TestComments, from 6 to e.g. 9 or more.
+       // That increase should only be temporary, not committed, as it
+       // exponentially affects the test running time.
+
+       if len(s) == 0 {
+               return nil
+       }
+
+       // Loop:
+       //   - Grow j such that s[i:j] does not need escaping.
+       //   - If s[j] does need escaping, output s[i:j] and an escaped s[j],
+       //     resetting i and j to point past that s[j] byte.
+       i := 0
+       for j := 0; j < len(s); j++ {
+               escaped := ""
+               switch s[j] {
+               case '&':
+                       escaped = "&amp;"
+
+               case '>':
+                       if j > 0 {
+                               if prev := s[j-1]; (prev != '!') && (prev != '-') {
+                                       continue
+                               }
+                       }
+                       escaped = "&gt;"
+
+               default:
+                       continue
+               }
+
+               if i < j {
+                       if _, err := w.WriteString(s[i:j]); err != nil {
+                               return err
+                       }
+               }
+               if _, err := w.WriteString(escaped); err != nil {
+                       return err
+               }
+               i = j + 1
+       }
+
+       if i < len(s) {
+               if _, err := w.WriteString(s[i:]); err != nil {
+                       return err
+               }
+       }
+       return nil
+}
+
+// escapeCommentString is to EscapeString as escapeComment is to escape.
+func escapeCommentString(s string) string {
+       if strings.IndexAny(s, "&>") == -1 {
+               return s
+       }
+       var buf bytes.Buffer
+       escapeComment(&buf, s)
+       return buf.String()
+}
+
+const escapedChars = "&'<>\"\r"
+
+func escape(w writer, s string) error {
+       i := strings.IndexAny(s, escapedChars)
+       for i != -1 {
+               if _, err := w.WriteString(s[:i]); err != nil {
+                       return err
+               }
+               var esc string
+               switch s[i] {
+               case '&':
+                       esc = "&amp;"
+               case '\'':
+                       // "&#39;" is shorter than "&apos;" and apos was not in HTML until HTML5.
+                       esc = "&#39;"
+               case '<':
+                       esc = "&lt;"
+               case '>':
+                       esc = "&gt;"
+               case '"':
+                       // "&#34;" is shorter than "&quot;".
+                       esc = "&#34;"
+               case '\r':
+                       esc = "&#13;"
+               default:
+                       panic("unrecognized escape character")
+               }
+               s = s[i+1:]
+               if _, err := w.WriteString(esc); err != nil {
+                       return err
+               }
+               i = strings.IndexAny(s, escapedChars)
+       }
+       _, err := w.WriteString(s)
+       return err
+}
+
+// EscapeString escapes special characters like "<" to become "&lt;". It
+// escapes only five such characters: <, >, &, ' and ".
+// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't
+// always true.
+func EscapeString(s string) string {
+       if strings.IndexAny(s, escapedChars) == -1 {
+               return s
+       }
+       var buf bytes.Buffer
+       escape(&buf, s)
+       return buf.String()
+}
+
+// UnescapeString unescapes entities like "&lt;" to become "<". It unescapes a
+// larger range of entities than EscapeString escapes. For example, "&aacute;"
+// unescapes to "á", as does "&#225;" and "&xE1;".
+// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't
+// always true.
+func UnescapeString(s string) string {
+       for _, c := range s {
+               if c == '&' {
+                       return string(unescape([]byte(s), false))
+               }
+       }
+       return s
+}
diff --git a/internal/html/escape_test.go b/internal/html/escape_test.go
new file mode 100644 (file)
index 0000000..b405d4b
--- /dev/null
@@ -0,0 +1,97 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import "testing"
+
+type unescapeTest struct {
+       // A short description of the test case.
+       desc string
+       // The HTML text.
+       html string
+       // The unescaped text.
+       unescaped string
+}
+
+var unescapeTests = []unescapeTest{
+       // Handle no entities.
+       {
+               "copy",
+               "A\ttext\nstring",
+               "A\ttext\nstring",
+       },
+       // Handle simple named entities.
+       {
+               "simple",
+               "&amp; &gt; &lt;",
+               "& > <",
+       },
+       // Handle hitting the end of the string.
+       {
+               "stringEnd",
+               "&amp &amp",
+               "& &",
+       },
+       // Handle entities with two codepoints.
+       {
+               "multiCodepoint",
+               "text &gesl; blah",
+               "text \u22db\ufe00 blah",
+       },
+       // Handle decimal numeric entities.
+       {
+               "decimalEntity",
+               "Delta = &#916; ",
+               "Delta = Δ ",
+       },
+       // Handle hexadecimal numeric entities.
+       {
+               "hexadecimalEntity",
+               "Lambda = &#x3bb; = &#X3Bb ",
+               "Lambda = λ = λ ",
+       },
+       // Handle numeric early termination.
+       {
+               "numericEnds",
+               "&# &#x &#128;43 &copy = &#169f = &#xa9",
+               "&# &#x €43 © = ©f = ©",
+       },
+       // Handle numeric ISO-8859-1 entity replacements.
+       {
+               "numericReplacements",
+               "Footnote&#x87;",
+               "Footnote‡",
+       },
+}
+
+func TestUnescape(t *testing.T) {
+       for _, tt := range unescapeTests {
+               unescaped := UnescapeString(tt.html)
+               if unescaped != tt.unescaped {
+                       t.Errorf("TestUnescape %s: want %q, got %q", tt.desc, tt.unescaped, unescaped)
+               }
+       }
+}
+
+func TestUnescapeEscape(t *testing.T) {
+       ss := []string{
+               ``,
+               `abc def`,
+               `a & b`,
+               `a&amp;b`,
+               `a &amp b`,
+               `&quot;`,
+               `"`,
+               `"<&>"`,
+               `&quot;&lt;&amp;&gt;&quot;`,
+               `3&5==1 && 0<1, "0&lt;1", a+acute=&aacute;`,
+               `The special characters are: <, >, &, ' and "`,
+       }
+       for _, s := range ss {
+               if got := UnescapeString(EscapeString(s)); got != s {
+                       t.Errorf("got %q want %q", got, s)
+               }
+       }
+}
diff --git a/internal/html/example_test.go b/internal/html/example_test.go
new file mode 100644 (file)
index 0000000..ebdd168
--- /dev/null
@@ -0,0 +1,40 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This example demonstrates parsing HTML data and walking the resulting tree.
+package html_test
+
+import (
+       "fmt"
+       "log"
+       "strings"
+
+       "git.earlybird.gay/today-engine/internal/html"
+)
+
+func ExampleParse() {
+       s := `<p>Links:</p><ul><li><a href="foo">Foo</a><li><a href="/bar/baz">BarBaz</a></ul>`
+       doc, err := html.Parse(strings.NewReader(s))
+       if err != nil {
+               log.Fatal(err)
+       }
+       var f func(*html.Node)
+       f = func(n *html.Node) {
+               if n.Type == html.ElementNode && n.Data == "a" {
+                       for _, a := range n.Attr {
+                               if a.Key == "href" {
+                                       fmt.Println(a.Val)
+                                       break
+                               }
+                       }
+               }
+               for c := n.FirstChild; c != nil; c = c.NextSibling {
+                       f(c)
+               }
+       }
+       f(doc)
+       // Output:
+       // foo
+       // /bar/baz
+}
diff --git a/internal/html/foreign.go b/internal/html/foreign.go
new file mode 100644 (file)
index 0000000..9da9e9d
--- /dev/null
@@ -0,0 +1,222 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+       "strings"
+)
+
+func adjustAttributeNames(aa []Attribute, nameMap map[string]string) {
+       for i := range aa {
+               if newName, ok := nameMap[aa[i].Key]; ok {
+                       aa[i].Key = newName
+               }
+       }
+}
+
+func adjustForeignAttributes(aa []Attribute) {
+       for i, a := range aa {
+               if a.Key == "" || a.Key[0] != 'x' {
+                       continue
+               }
+               switch a.Key {
+               case "xlink:actuate", "xlink:arcrole", "xlink:href", "xlink:role", "xlink:show",
+                       "xlink:title", "xlink:type", "xml:base", "xml:lang", "xml:space", "xmlns:xlink":
+                       j := strings.Index(a.Key, ":")
+                       aa[i].Namespace = a.Key[:j]
+                       aa[i].Key = a.Key[j+1:]
+               }
+       }
+}
+
+func htmlIntegrationPoint(n *Node) bool {
+       if n.Type != ElementNode {
+               return false
+       }
+       switch n.Namespace {
+       case "math":
+               if n.Data == "annotation-xml" {
+                       for _, a := range n.Attr {
+                               if a.Key == "encoding" {
+                                       val := strings.ToLower(a.Val)
+                                       if val == "text/html" || val == "application/xhtml+xml" {
+                                               return true
+                                       }
+                               }
+                       }
+               }
+       case "svg":
+               switch n.Data {
+               case "desc", "foreignObject", "title":
+                       return true
+               }
+       }
+       return false
+}
+
+func mathMLTextIntegrationPoint(n *Node) bool {
+       if n.Namespace != "math" {
+               return false
+       }
+       switch n.Data {
+       case "mi", "mo", "mn", "ms", "mtext":
+               return true
+       }
+       return false
+}
+
+// Section 12.2.6.5.
+var breakout = map[string]bool{
+       "b":          true,
+       "big":        true,
+       "blockquote": true,
+       "body":       true,
+       "br":         true,
+       "center":     true,
+       "code":       true,
+       "dd":         true,
+       "div":        true,
+       "dl":         true,
+       "dt":         true,
+       "em":         true,
+       "embed":      true,
+       "h1":         true,
+       "h2":         true,
+       "h3":         true,
+       "h4":         true,
+       "h5":         true,
+       "h6":         true,
+       "head":       true,
+       "hr":         true,
+       "i":          true,
+       "img":        true,
+       "li":         true,
+       "listing":    true,
+       "menu":       true,
+       "meta":       true,
+       "nobr":       true,
+       "ol":         true,
+       "p":          true,
+       "pre":        true,
+       "ruby":       true,
+       "s":          true,
+       "small":      true,
+       "span":       true,
+       "strong":     true,
+       "strike":     true,
+       "sub":        true,
+       "sup":        true,
+       "table":      true,
+       "tt":         true,
+       "u":          true,
+       "ul":         true,
+       "var":        true,
+}
+
+// Section 12.2.6.5.
+var svgTagNameAdjustments = map[string]string{
+       "altglyph":            "altGlyph",
+       "altglyphdef":         "altGlyphDef",
+       "altglyphitem":        "altGlyphItem",
+       "animatecolor":        "animateColor",
+       "animatemotion":       "animateMotion",
+       "animatetransform":    "animateTransform",
+       "clippath":            "clipPath",
+       "feblend":             "feBlend",
+       "fecolormatrix":       "feColorMatrix",
+       "fecomponenttransfer": "feComponentTransfer",
+       "fecomposite":         "feComposite",
+       "feconvolvematrix":    "feConvolveMatrix",
+       "fediffuselighting":   "feDiffuseLighting",
+       "fedisplacementmap":   "feDisplacementMap",
+       "fedistantlight":      "feDistantLight",
+       "feflood":             "feFlood",
+       "fefunca":             "feFuncA",
+       "fefuncb":             "feFuncB",
+       "fefuncg":             "feFuncG",
+       "fefuncr":             "feFuncR",
+       "fegaussianblur":      "feGaussianBlur",
+       "feimage":             "feImage",
+       "femerge":             "feMerge",
+       "femergenode":         "feMergeNode",
+       "femorphology":        "feMorphology",
+       "feoffset":            "feOffset",
+       "fepointlight":        "fePointLight",
+       "fespecularlighting":  "feSpecularLighting",
+       "fespotlight":         "feSpotLight",
+       "fetile":              "feTile",
+       "feturbulence":        "feTurbulence",
+       "foreignobject":       "foreignObject",
+       "glyphref":            "glyphRef",
+       "lineargradient":      "linearGradient",
+       "radialgradient":      "radialGradient",
+       "textpath":            "textPath",
+}
+
+// Section 12.2.6.1
+var mathMLAttributeAdjustments = map[string]string{
+       "definitionurl": "definitionURL",
+}
+
+var svgAttributeAdjustments = map[string]string{
+       "attributename":       "attributeName",
+       "attributetype":       "attributeType",
+       "basefrequency":       "baseFrequency",
+       "baseprofile":         "baseProfile",
+       "calcmode":            "calcMode",
+       "clippathunits":       "clipPathUnits",
+       "diffuseconstant":     "diffuseConstant",
+       "edgemode":            "edgeMode",
+       "filterunits":         "filterUnits",
+       "glyphref":            "glyphRef",
+       "gradienttransform":   "gradientTransform",
+       "gradientunits":       "gradientUnits",
+       "kernelmatrix":        "kernelMatrix",
+       "kernelunitlength":    "kernelUnitLength",
+       "keypoints":           "keyPoints",
+       "keysplines":          "keySplines",
+       "keytimes":            "keyTimes",
+       "lengthadjust":        "lengthAdjust",
+       "limitingconeangle":   "limitingConeAngle",
+       "markerheight":        "markerHeight",
+       "markerunits":         "markerUnits",
+       "markerwidth":         "markerWidth",
+       "maskcontentunits":    "maskContentUnits",
+       "maskunits":           "maskUnits",
+       "numoctaves":          "numOctaves",
+       "pathlength":          "pathLength",
+       "patterncontentunits": "patternContentUnits",
+       "patterntransform":    "patternTransform",
+       "patternunits":        "patternUnits",
+       "pointsatx":           "pointsAtX",
+       "pointsaty":           "pointsAtY",
+       "pointsatz":           "pointsAtZ",
+       "preservealpha":       "preserveAlpha",
+       "preserveaspectratio": "preserveAspectRatio",
+       "primitiveunits":      "primitiveUnits",
+       "refx":                "refX",
+       "refy":                "refY",
+       "repeatcount":         "repeatCount",
+       "repeatdur":           "repeatDur",
+       "requiredextensions":  "requiredExtensions",
+       "requiredfeatures":    "requiredFeatures",
+       "specularconstant":    "specularConstant",
+       "specularexponent":    "specularExponent",
+       "spreadmethod":        "spreadMethod",
+       "startoffset":         "startOffset",
+       "stddeviation":        "stdDeviation",
+       "stitchtiles":         "stitchTiles",
+       "surfacescale":        "surfaceScale",
+       "systemlanguage":      "systemLanguage",
+       "tablevalues":         "tableValues",
+       "targetx":             "targetX",
+       "targety":             "targetY",
+       "textlength":          "textLength",
+       "viewbox":             "viewBox",
+       "viewtarget":          "viewTarget",
+       "xchannelselector":    "xChannelSelector",
+       "ychannelselector":    "yChannelSelector",
+       "zoomandpan":          "zoomAndPan",
+}
diff --git a/internal/html/node.go b/internal/html/node.go
new file mode 100644 (file)
index 0000000..bb1624b
--- /dev/null
@@ -0,0 +1,225 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+       "git.earlybird.gay/today-engine/internal/html/atom"
+)
+
+// A NodeType is the type of a Node.
+type NodeType uint32
+
+const (
+       ErrorNode NodeType = iota
+       TextNode
+       DocumentNode
+       ElementNode
+       CommentNode
+       DoctypeNode
+       // RawNode nodes are not returned by the parser, but can be part of the
+       // Node tree passed to func Render to insert raw HTML (without escaping).
+       // If so, this package makes no guarantee that the rendered HTML is secure
+       // (from e.g. Cross Site Scripting attacks) or well-formed.
+       RawNode
+       scopeMarkerNode
+)
+
+// Section 12.2.4.3 says "The markers are inserted when entering applet,
+// object, marquee, template, td, th, and caption elements, and are used
+// to prevent formatting from "leaking" into applet, object, marquee,
+// template, td, th, and caption elements".
+var scopeMarker = Node{Type: scopeMarkerNode}
+
+// A Node consists of a NodeType and some Data (tag name for element nodes,
+// content for text) and are part of a tree of Nodes. Element nodes may also
+// have a Namespace and contain a slice of Attributes. Data is unescaped, so
+// that it looks like "a<b" rather than "a&lt;b". For element nodes, DataAtom
+// is the atom for Data, or zero if Data is not a known tag name.
+//
+// An empty Namespace implies a "http://www.w3.org/1999/xhtml" namespace.
+// Similarly, "math" is short for "http://www.w3.org/1998/Math/MathML", and
+// "svg" is short for "http://www.w3.org/2000/svg".
+type Node struct {
+       Parent, FirstChild, LastChild, PrevSibling, NextSibling *Node
+
+       Type      NodeType
+       DataAtom  atom.Atom
+       Data      string
+       Namespace string
+       Attr      []Attribute
+}
+
+// InsertBefore inserts newChild as a child of n, immediately before oldChild
+// in the sequence of n's children. oldChild may be nil, in which case newChild
+// is appended to the end of n's children.
+//
+// It will panic if newChild already has a parent or siblings.
+func (n *Node) InsertBefore(newChild, oldChild *Node) {
+       if newChild.Parent != nil || newChild.PrevSibling != nil || newChild.NextSibling != nil {
+               panic("html: InsertBefore called for an attached child Node")
+       }
+       var prev, next *Node
+       if oldChild != nil {
+               prev, next = oldChild.PrevSibling, oldChild
+       } else {
+               prev = n.LastChild
+       }
+       if prev != nil {
+               prev.NextSibling = newChild
+       } else {
+               n.FirstChild = newChild
+       }
+       if next != nil {
+               next.PrevSibling = newChild
+       } else {
+               n.LastChild = newChild
+       }
+       newChild.Parent = n
+       newChild.PrevSibling = prev
+       newChild.NextSibling = next
+}
+
+// AppendChild adds a node c as a child of n.
+//
+// It will panic if c already has a parent or siblings.
+func (n *Node) AppendChild(c *Node) {
+       if c.Parent != nil || c.PrevSibling != nil || c.NextSibling != nil {
+               panic("html: AppendChild called for an attached child Node")
+       }
+       last := n.LastChild
+       if last != nil {
+               last.NextSibling = c
+       } else {
+               n.FirstChild = c
+       }
+       n.LastChild = c
+       c.Parent = n
+       c.PrevSibling = last
+}
+
+// RemoveChild removes a node c that is a child of n. Afterwards, c will have
+// no parent and no siblings.
+//
+// It will panic if c's parent is not n.
+func (n *Node) RemoveChild(c *Node) {
+       if c.Parent != n {
+               panic("html: RemoveChild called for a non-child Node")
+       }
+       if n.FirstChild == c {
+               n.FirstChild = c.NextSibling
+       }
+       if c.NextSibling != nil {
+               c.NextSibling.PrevSibling = c.PrevSibling
+       }
+       if n.LastChild == c {
+               n.LastChild = c.PrevSibling
+       }
+       if c.PrevSibling != nil {
+               c.PrevSibling.NextSibling = c.NextSibling
+       }
+       c.Parent = nil
+       c.PrevSibling = nil
+       c.NextSibling = nil
+}
+
+// reparentChildren reparents all of src's child nodes to dst.
+func reparentChildren(dst, src *Node) {
+       for {
+               child := src.FirstChild
+               if child == nil {
+                       break
+               }
+               src.RemoveChild(child)
+               dst.AppendChild(child)
+       }
+}
+
+// clone returns a new node with the same type, data and attributes.
+// The clone has no parent, no siblings and no children.
+func (n *Node) clone() *Node {
+       m := &Node{
+               Type:     n.Type,
+               DataAtom: n.DataAtom,
+               Data:     n.Data,
+               Attr:     make([]Attribute, len(n.Attr)),
+       }
+       copy(m.Attr, n.Attr)
+       return m
+}
+
+// nodeStack is a stack of nodes.
+type nodeStack []*Node
+
+// pop pops the stack. It will panic if s is empty.
+func (s *nodeStack) pop() *Node {
+       i := len(*s)
+       n := (*s)[i-1]
+       *s = (*s)[:i-1]
+       return n
+}
+
+// top returns the most recently pushed node, or nil if s is empty.
+func (s *nodeStack) top() *Node {
+       if i := len(*s); i > 0 {
+               return (*s)[i-1]
+       }
+       return nil
+}
+
+// index returns the index of the top-most occurrence of n in the stack, or -1
+// if n is not present.
+func (s *nodeStack) index(n *Node) int {
+       for i := len(*s) - 1; i >= 0; i-- {
+               if (*s)[i] == n {
+                       return i
+               }
+       }
+       return -1
+}
+
+// contains returns whether a is within s.
+func (s *nodeStack) contains(a atom.Atom) bool {
+       for _, n := range *s {
+               if n.DataAtom == a && n.Namespace == "" {
+                       return true
+               }
+       }
+       return false
+}
+
+// insert inserts a node at the given index.
+func (s *nodeStack) insert(i int, n *Node) {
+       (*s) = append(*s, nil)
+       copy((*s)[i+1:], (*s)[i:])
+       (*s)[i] = n
+}
+
+// remove removes a node from the stack. It is a no-op if n is not present.
+func (s *nodeStack) remove(n *Node) {
+       i := s.index(n)
+       if i == -1 {
+               return
+       }
+       copy((*s)[i:], (*s)[i+1:])
+       j := len(*s) - 1
+       (*s)[j] = nil
+       *s = (*s)[:j]
+}
+
+type insertionModeStack []insertionMode
+
+func (s *insertionModeStack) pop() (im insertionMode) {
+       i := len(*s)
+       im = (*s)[i-1]
+       *s = (*s)[:i-1]
+       return im
+}
+
+func (s *insertionModeStack) top() insertionMode {
+       if i := len(*s); i > 0 {
+               return (*s)[i-1]
+       }
+       return nil
+}
diff --git a/internal/html/node_test.go b/internal/html/node_test.go
new file mode 100644 (file)
index 0000000..471102f
--- /dev/null
@@ -0,0 +1,146 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+       "fmt"
+)
+
+// checkTreeConsistency checks that a node and its descendants are all
+// consistent in their parent/child/sibling relationships.
+func checkTreeConsistency(n *Node) error {
+       return checkTreeConsistency1(n, 0)
+}
+
+func checkTreeConsistency1(n *Node, depth int) error {
+       if depth == 1e4 {
+               return fmt.Errorf("html: tree looks like it contains a cycle")
+       }
+       if err := checkNodeConsistency(n); err != nil {
+               return err
+       }
+       for c := n.FirstChild; c != nil; c = c.NextSibling {
+               if err := checkTreeConsistency1(c, depth+1); err != nil {
+                       return err
+               }
+       }
+       return nil
+}
+
+// checkNodeConsistency checks that a node's parent/child/sibling relationships
+// are consistent.
+func checkNodeConsistency(n *Node) error {
+       if n == nil {
+               return nil
+       }
+
+       nParent := 0
+       for p := n.Parent; p != nil; p = p.Parent {
+               nParent++
+               if nParent == 1e4 {
+                       return fmt.Errorf("html: parent list looks like an infinite loop")
+               }
+       }
+
+       nForward := 0
+       for c := n.FirstChild; c != nil; c = c.NextSibling {
+               nForward++
+               if nForward == 1e6 {
+                       return fmt.Errorf("html: forward list of children looks like an infinite loop")
+               }
+               if c.Parent != n {
+                       return fmt.Errorf("html: inconsistent child/parent relationship")
+               }
+       }
+
+       nBackward := 0
+       for c := n.LastChild; c != nil; c = c.PrevSibling {
+               nBackward++
+               if nBackward == 1e6 {
+                       return fmt.Errorf("html: backward list of children looks like an infinite loop")
+               }
+               if c.Parent != n {
+                       return fmt.Errorf("html: inconsistent child/parent relationship")
+               }
+       }
+
+       if n.Parent != nil {
+               if n.Parent == n {
+                       return fmt.Errorf("html: inconsistent parent relationship")
+               }
+               if n.Parent == n.FirstChild {
+                       return fmt.Errorf("html: inconsistent parent/first relationship")
+               }
+               if n.Parent == n.LastChild {
+                       return fmt.Errorf("html: inconsistent parent/last relationship")
+               }
+               if n.Parent == n.PrevSibling {
+                       return fmt.Errorf("html: inconsistent parent/prev relationship")
+               }
+               if n.Parent == n.NextSibling {
+                       return fmt.Errorf("html: inconsistent parent/next relationship")
+               }
+
+               parentHasNAsAChild := false
+               for c := n.Parent.FirstChild; c != nil; c = c.NextSibling {
+                       if c == n {
+                               parentHasNAsAChild = true
+                               break
+                       }
+               }
+               if !parentHasNAsAChild {
+                       return fmt.Errorf("html: inconsistent parent/child relationship")
+               }
+       }
+
+       if n.PrevSibling != nil && n.PrevSibling.NextSibling != n {
+               return fmt.Errorf("html: inconsistent prev/next relationship")
+       }
+       if n.NextSibling != nil && n.NextSibling.PrevSibling != n {
+               return fmt.Errorf("html: inconsistent next/prev relationship")
+       }
+
+       if (n.FirstChild == nil) != (n.LastChild == nil) {
+               return fmt.Errorf("html: inconsistent first/last relationship")
+       }
+       if n.FirstChild != nil && n.FirstChild == n.LastChild {
+               // We have a sole child.
+               if n.FirstChild.PrevSibling != nil || n.FirstChild.NextSibling != nil {
+                       return fmt.Errorf("html: inconsistent sole child's sibling relationship")
+               }
+       }
+
+       seen := map[*Node]bool{}
+
+       var last *Node
+       for c := n.FirstChild; c != nil; c = c.NextSibling {
+               if seen[c] {
+                       return fmt.Errorf("html: inconsistent repeated child")
+               }
+               seen[c] = true
+               last = c
+       }
+       if last != n.LastChild {
+               return fmt.Errorf("html: inconsistent last relationship")
+       }
+
+       var first *Node
+       for c := n.LastChild; c != nil; c = c.PrevSibling {
+               if !seen[c] {
+                       return fmt.Errorf("html: inconsistent missing child")
+               }
+               delete(seen, c)
+               first = c
+       }
+       if first != n.FirstChild {
+               return fmt.Errorf("html: inconsistent first relationship")
+       }
+
+       if len(seen) != 0 {
+               return fmt.Errorf("html: inconsistent forwards/backwards child list")
+       }
+
+       return nil
+}
diff --git a/internal/html/parse.go b/internal/html/parse.go
new file mode 100644 (file)
index 0000000..8c1131b
--- /dev/null
@@ -0,0 +1,2454 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+       "errors"
+       "fmt"
+       "io"
+       "strings"
+
+       a "git.earlybird.gay/today-engine/internal/html/atom"
+)
+
+// A parser implements the HTML5 parsing algorithm:
+// https://html.spec.whatwg.org/multipage/syntax.html#tree-construction
+type parser struct {
+       // tokenizer provides the tokens for the parser.
+       tokenizer *Tokenizer
+       // tok is the most recently read token.
+       tok Token
+       // Self-closing tags like <hr/> are treated as start tags, except that
+       // hasSelfClosingToken is set while they are being processed.
+       hasSelfClosingToken bool
+       // doc is the document root element.
+       doc *Node
+       // The stack of open elements (section 12.2.4.2) and active formatting
+       // elements (section 12.2.4.3).
+       oe, afe nodeStack
+       // Element pointers (section 12.2.4.4).
+       head, form *Node
+       // Other parsing state flags (section 12.2.4.5).
+       scripting, framesetOK bool
+       // The stack of template insertion modes
+       templateStack insertionModeStack
+       // im is the current insertion mode.
+       im insertionMode
+       // originalIM is the insertion mode to go back to after completing a text
+       // or inTableText insertion mode.
+       originalIM insertionMode
+       // fosterParenting is whether new elements should be inserted according to
+       // the foster parenting rules (section 12.2.6.1).
+       fosterParenting bool
+       // quirks is whether the parser is operating in "quirks mode."
+       quirks bool
+       // fragment is whether the parser is parsing an HTML fragment.
+       fragment bool
+       // context is the context element when parsing an HTML fragment
+       // (section 12.4).
+       context *Node
+}
+
+func (p *parser) top() *Node {
+       if n := p.oe.top(); n != nil {
+               return n
+       }
+       return p.doc
+}
+
+// Stop tags for use in popUntil. These come from section 12.2.4.2.
+var (
+       defaultScopeStopTags = map[string][]a.Atom{
+               "":     {a.Applet, a.Caption, a.Html, a.Table, a.Td, a.Th, a.Marquee, a.Object, a.Template},
+               "math": {a.AnnotationXml, a.Mi, a.Mn, a.Mo, a.Ms, a.Mtext},
+               "svg":  {a.Desc, a.ForeignObject, a.Title},
+       }
+)
+
+type scope int
+
+const (
+       defaultScope scope = iota
+       listItemScope
+       buttonScope
+       tableScope
+       tableRowScope
+       tableBodyScope
+       selectScope
+)
+
+// popUntil pops the stack of open elements at the highest element whose tag
+// is in matchTags, provided there is no higher element in the scope's stop
+// tags (as defined in section 12.2.4.2). It returns whether or not there was
+// such an element. If there was not, popUntil leaves the stack unchanged.
+//
+// For example, the set of stop tags for table scope is: "html", "table". If
+// the stack was:
+// ["html", "body", "font", "table", "b", "i", "u"]
+// then popUntil(tableScope, "font") would return false, but
+// popUntil(tableScope, "i") would return true and the stack would become:
+// ["html", "body", "font", "table", "b"]
+//
+// If an element's tag is in both the stop tags and matchTags, then the stack
+// will be popped and the function returns true (provided, of course, there was
+// no higher element in the stack that was also in the stop tags). For example,
+// popUntil(tableScope, "table") returns true and leaves:
+// ["html", "body", "font"]
+func (p *parser) popUntil(s scope, matchTags ...a.Atom) bool {
+       if i := p.indexOfElementInScope(s, matchTags...); i != -1 {
+               p.oe = p.oe[:i]
+               return true
+       }
+       return false
+}
+
+// indexOfElementInScope returns the index in p.oe of the highest element whose
+// tag is in matchTags that is in scope. If no matching element is in scope, it
+// returns -1.
+func (p *parser) indexOfElementInScope(s scope, matchTags ...a.Atom) int {
+       for i := len(p.oe) - 1; i >= 0; i-- {
+               tagAtom := p.oe[i].DataAtom
+               if p.oe[i].Namespace == "" {
+                       for _, t := range matchTags {
+                               if t == tagAtom {
+                                       return i
+                               }
+                       }
+                       switch s {
+                       case defaultScope:
+                               // No-op.
+                       case listItemScope:
+                               if tagAtom == a.Ol || tagAtom == a.Ul {
+                                       return -1
+                               }
+                       case buttonScope:
+                               if tagAtom == a.Button {
+                                       return -1
+                               }
+                       case tableScope:
+                               if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template {
+                                       return -1
+                               }
+                       case selectScope:
+                               if tagAtom != a.Optgroup && tagAtom != a.Option {
+                                       return -1
+                               }
+                       default:
+                               panic("unreachable")
+                       }
+               }
+               switch s {
+               case defaultScope, listItemScope, buttonScope:
+                       for _, t := range defaultScopeStopTags[p.oe[i].Namespace] {
+                               if t == tagAtom {
+                                       return -1
+                               }
+                       }
+               }
+       }
+       return -1
+}
+
+// elementInScope is like popUntil, except that it doesn't modify the stack of
+// open elements.
+func (p *parser) elementInScope(s scope, matchTags ...a.Atom) bool {
+       return p.indexOfElementInScope(s, matchTags...) != -1
+}
+
+// clearStackToContext pops elements off the stack of open elements until a
+// scope-defined element is found.
+func (p *parser) clearStackToContext(s scope) {
+       for i := len(p.oe) - 1; i >= 0; i-- {
+               tagAtom := p.oe[i].DataAtom
+               switch s {
+               case tableScope:
+                       if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template {
+                               p.oe = p.oe[:i+1]
+                               return
+                       }
+               case tableRowScope:
+                       if tagAtom == a.Html || tagAtom == a.Tr || tagAtom == a.Template {
+                               p.oe = p.oe[:i+1]
+                               return
+                       }
+               case tableBodyScope:
+                       if tagAtom == a.Html || tagAtom == a.Tbody || tagAtom == a.Tfoot || tagAtom == a.Thead || tagAtom == a.Template {
+                               p.oe = p.oe[:i+1]
+                               return
+                       }
+               default:
+                       panic("unreachable")
+               }
+       }
+}
+
+// parseGenericRawTextElement implements the generic raw text element parsing
+// algorithm defined in 12.2.6.2.
+// https://html.spec.whatwg.org/multipage/parsing.html#parsing-elements-that-contain-only-text
+// TODO: Since both RAWTEXT and RCDATA states are treated as tokenizer's part
+// officially, need to make tokenizer consider both states.
+func (p *parser) parseGenericRawTextElement() {
+       p.addElement()
+       p.originalIM = p.im
+       p.im = textIM
+}
+
+// generateImpliedEndTags pops nodes off the stack of open elements as long as
+// the top node has a tag name of dd, dt, li, optgroup, option, p, rb, rp, rt or rtc.
+// If exceptions are specified, nodes with that name will not be popped off.
+func (p *parser) generateImpliedEndTags(exceptions ...string) {
+       var i int
+loop:
+       for i = len(p.oe) - 1; i >= 0; i-- {
+               n := p.oe[i]
+               if n.Type != ElementNode {
+                       break
+               }
+               switch n.DataAtom {
+               case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc:
+                       for _, except := range exceptions {
+                               if n.Data == except {
+                                       break loop
+                               }
+                       }
+                       continue
+               }
+               break
+       }
+
+       p.oe = p.oe[:i+1]
+}
+
+// addChild adds a child node n to the top element, and pushes n onto the stack
+// of open elements if it is an element node.
+func (p *parser) addChild(n *Node) {
+       if p.shouldFosterParent() {
+               p.fosterParent(n)
+       } else {
+               p.top().AppendChild(n)
+       }
+
+       if n.Type == ElementNode {
+               p.oe = append(p.oe, n)
+       }
+}
+
+// shouldFosterParent returns whether the next node to be added should be
+// foster parented.
+func (p *parser) shouldFosterParent() bool {
+       return false
+}
+
+// fosterParent adds a child node according to the foster parenting rules.
+// Section 12.2.6.1, "foster parenting".
+func (p *parser) fosterParent(n *Node) {
+       var table, parent, prev, template *Node
+       var i int
+       for i = len(p.oe) - 1; i >= 0; i-- {
+               if p.oe[i].DataAtom == a.Table {
+                       table = p.oe[i]
+                       break
+               }
+       }
+
+       var j int
+       for j = len(p.oe) - 1; j >= 0; j-- {
+               if p.oe[j].DataAtom == a.Template {
+                       template = p.oe[j]
+                       break
+               }
+       }
+
+       if template != nil && (table == nil || j > i) {
+               template.AppendChild(n)
+               return
+       }
+
+       if table == nil {
+               // The foster parent is the html element.
+               parent = p.oe[0]
+       } else {
+               parent = table.Parent
+       }
+       if parent == nil {
+               parent = p.oe[i-1]
+       }
+
+       if table != nil {
+               prev = table.PrevSibling
+       } else {
+               prev = parent.LastChild
+       }
+       if prev != nil && prev.Type == TextNode && n.Type == TextNode {
+               prev.Data += n.Data
+               return
+       }
+
+       parent.InsertBefore(n, table)
+}
+
+// addText adds text to the preceding node if it is a text node, or else it
+// calls addChild with a new text node.
+func (p *parser) addText(text string) {
+       if text == "" {
+               return
+       }
+
+       if p.shouldFosterParent() {
+               p.fosterParent(&Node{
+                       Type: TextNode,
+                       Data: text,
+               })
+               return
+       }
+
+       t := p.top()
+       if n := t.LastChild; n != nil && n.Type == TextNode {
+               n.Data += text
+               return
+       }
+       p.addChild(&Node{
+               Type: TextNode,
+               Data: text,
+       })
+}
+
+// addElement adds a child element based on the current token.
+func (p *parser) addElement() {
+       p.addChild(&Node{
+               Type:     ElementNode,
+               DataAtom: p.tok.DataAtom,
+               Data:     p.tok.Data,
+               Attr:     p.tok.Attr,
+       })
+}
+
+// Section 12.2.4.3.
+func (p *parser) addFormattingElement() {
+       tagAtom, attr := p.tok.DataAtom, p.tok.Attr
+       p.addElement()
+
+       // Implement the Noah's Ark clause, but with three per family instead of two.
+       identicalElements := 0
+findIdenticalElements:
+       for i := len(p.afe) - 1; i >= 0; i-- {
+               n := p.afe[i]
+               if n.Type == scopeMarkerNode {
+                       break
+               }
+               if n.Type != ElementNode {
+                       continue
+               }
+               if n.Namespace != "" {
+                       continue
+               }
+               if n.DataAtom != tagAtom {
+                       continue
+               }
+               if len(n.Attr) != len(attr) {
+                       continue
+               }
+       compareAttributes:
+               for _, t0 := range n.Attr {
+                       for _, t1 := range attr {
+                               if t0.Key == t1.Key && t0.Namespace == t1.Namespace && t0.Val == t1.Val {
+                                       // Found a match for this attribute, continue with the next attribute.
+                                       continue compareAttributes
+                               }
+                       }
+                       // If we get here, there is no attribute that matches a.
+                       // Therefore the element is not identical to the new one.
+                       continue findIdenticalElements
+               }
+
+               identicalElements++
+               if identicalElements >= 3 {
+                       p.afe.remove(n)
+               }
+       }
+
+       p.afe = append(p.afe, p.top())
+}
+
+// Section 12.2.4.3.
+func (p *parser) clearActiveFormattingElements() {
+       for {
+               if n := p.afe.pop(); len(p.afe) == 0 || n.Type == scopeMarkerNode {
+                       return
+               }
+       }
+}
+
+// Section 12.2.4.3.
+func (p *parser) reconstructActiveFormattingElements() {
+       n := p.afe.top()
+       if n == nil {
+               return
+       }
+       if n.Type == scopeMarkerNode || p.oe.index(n) != -1 {
+               return
+       }
+       i := len(p.afe) - 1
+       for n.Type != scopeMarkerNode && p.oe.index(n) == -1 {
+               if i == 0 {
+                       i = -1
+                       break
+               }
+               i--
+               n = p.afe[i]
+       }
+       for {
+               i++
+               clone := p.afe[i].clone()
+               p.addChild(clone)
+               p.afe[i] = clone
+               if i == len(p.afe)-1 {
+                       break
+               }
+       }
+}
+
+// Section 12.2.5.
+func (p *parser) acknowledgeSelfClosingTag() {
+       p.hasSelfClosingToken = false
+}
+
+// An insertion mode (section 12.2.4.1) is the state transition function from
+// a particular state in the HTML5 parser's state machine. It updates the
+// parser's fields depending on parser.tok (where ErrorToken means EOF).
+// It returns whether the token was consumed.
+type insertionMode func(*parser) bool
+
+// setOriginalIM sets the insertion mode to return to after completing a text or
+// inTableText insertion mode.
+// Section 12.2.4.1, "using the rules for".
+func (p *parser) setOriginalIM() {
+       if p.originalIM != nil {
+               panic("html: bad parser state: originalIM was set twice")
+       }
+       p.originalIM = p.im
+}
+
+// Section 12.2.4.1, "reset the insertion mode".
+func (p *parser) resetInsertionMode() {
+       for i := len(p.oe) - 1; i >= 0; i-- {
+               n := p.oe[i]
+               last := i == 0
+               if last && p.context != nil {
+                       n = p.context
+               }
+
+               switch n.DataAtom {
+               case a.Select:
+                       if !last {
+                               for ancestor, first := n, p.oe[0]; ancestor != first; {
+                                       ancestor = p.oe[p.oe.index(ancestor)-1]
+                                       switch ancestor.DataAtom {
+                                       case a.Template:
+                                               p.im = inSelectIM
+                                               return
+                                       case a.Table:
+                                               p.im = inSelectInTableIM
+                                               return
+                                       }
+                               }
+                       }
+                       p.im = inSelectIM
+               case a.Td, a.Th:
+                       // TODO: remove this divergence from the HTML5 spec.
+                       //
+                       // See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
+                       p.im = inCellIM
+               case a.Tr:
+                       p.im = inRowIM
+               case a.Tbody, a.Thead, a.Tfoot:
+                       p.im = inTableBodyIM
+               case a.Caption:
+                       p.im = inCaptionIM
+               case a.Colgroup:
+                       p.im = inColumnGroupIM
+               case a.Table:
+                       p.im = inTableIM
+               case a.Template:
+                       // TODO: remove this divergence from the HTML5 spec.
+                       if n.Namespace != "" {
+                               continue
+                       }
+                       p.im = p.templateStack.top()
+               case a.Head:
+                       // TODO: remove this divergence from the HTML5 spec.
+                       //
+                       // See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
+                       p.im = inHeadIM
+               case a.Body:
+                       p.im = inBodyIM
+               case a.Frameset:
+                       p.im = inFramesetIM
+               case a.Html:
+                       if p.head == nil {
+                               p.im = beforeHeadIM
+                       } else {
+                               p.im = afterHeadIM
+                       }
+               default:
+                       if last {
+                               p.im = inBodyIM
+                               return
+                       }
+                       continue
+               }
+               return
+       }
+}
+
+const whitespace = " \t\r\n\f"
+
+// Section 12.2.6.4.1.
+func initialIM(p *parser) bool {
+       switch p.tok.Type {
+       case TextToken:
+               p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
+               if len(p.tok.Data) == 0 {
+                       // It was all whitespace, so ignore it.
+                       return true
+               }
+       case CommentToken:
+               p.doc.AppendChild(&Node{
+                       Type: CommentNode,
+                       Data: p.tok.Data,
+               })
+               return true
+       case DoctypeToken:
+               n, quirks := parseDoctype(p.tok.Data)
+               p.doc.AppendChild(n)
+               p.quirks = quirks
+               p.im = beforeHTMLIM
+               return true
+       }
+       p.quirks = true
+       p.im = beforeHTMLIM
+       return false
+}
+
+// Section 12.2.6.4.2.
+func beforeHTMLIM(p *parser) bool {
+       switch p.tok.Type {
+       case DoctypeToken:
+               // Ignore the token.
+               return true
+       case TextToken:
+               p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
+               if len(p.tok.Data) == 0 {
+                       // It was all whitespace, so ignore it.
+                       return true
+               }
+       case StartTagToken:
+               if p.tok.DataAtom == a.Html {
+                       p.addElement()
+                       p.im = beforeHeadIM
+                       return true
+               }
+       case EndTagToken:
+               switch p.tok.DataAtom {
+               case a.Head, a.Body, a.Html, a.Br:
+                       p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
+                       return false
+               default:
+                       // Ignore the token.
+                       return true
+               }
+       case CommentToken:
+               p.doc.AppendChild(&Node{
+                       Type: CommentNode,
+                       Data: p.tok.Data,
+               })
+               return true
+       }
+       p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
+       return false
+}
+
+// Section 12.2.6.4.3.
+func beforeHeadIM(p *parser) bool {
+       switch p.tok.Type {
+       case TextToken:
+               p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
+               if len(p.tok.Data) == 0 {
+                       // It was all whitespace, so ignore it.
+                       return true
+               }
+       case StartTagToken:
+               switch p.tok.DataAtom {
+               case a.Head:
+                       p.addElement()
+                       p.head = p.top()
+                       p.im = inHeadIM
+                       return true
+               case a.Html:
+                       return inBodyIM(p)
+               }
+       case EndTagToken:
+               switch p.tok.DataAtom {
+               case a.Head, a.Body, a.Html, a.Br:
+                       p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
+                       return false
+               default:
+                       // Ignore the token.
+                       return true
+               }
+       case CommentToken:
+               p.addChild(&Node{
+                       Type: CommentNode,
+                       Data: p.tok.Data,
+               })
+               return true
+       case DoctypeToken:
+               // Ignore the token.
+               return true
+       }
+
+       p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
+       return false
+}
+
+// Section 12.2.6.4.4.
+func inHeadIM(p *parser) bool {
+       switch p.tok.Type {
+       case TextToken:
+               s := strings.TrimLeft(p.tok.Data, whitespace)
+               if len(s) < len(p.tok.Data) {
+                       // Add the initial whitespace to the current node.
+                       p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
+                       if s == "" {
+                               return true
+                       }
+                       p.tok.Data = s
+               }
+       case StartTagToken:
+               switch p.tok.DataAtom {
+               case a.Html:
+                       return inBodyIM(p)
+               case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta:
+                       p.addElement()
+                       p.oe.pop()
+                       p.acknowledgeSelfClosingTag()
+                       return true
+               case a.Noscript:
+                       if p.scripting {
+                               p.parseGenericRawTextElement()
+                               return true
+                       }
+                       p.addElement()
+                       p.im = inHeadNoscriptIM
+                       // Don't let the tokenizer go into raw text mode when scripting is disabled.
+                       p.tokenizer.NextIsNotRawText()
+                       return true
+               case a.Script, a.Title:
+                       p.addElement()
+                       p.setOriginalIM()
+                       p.im = textIM
+                       return true
+               case a.Noframes, a.Style:
+                       p.parseGenericRawTextElement()
+                       return true
+               case a.Head:
+                       // Ignore the token.
+                       return true
+               case a.Template:
+                       // TODO: remove this divergence from the HTML5 spec.
+                       //
+                       // We don't handle all of the corner cases when mixing foreign
+                       // content (i.e. <math> or <svg>) with <template>. Without this
+                       // early return, we can get into an infinite loop, possibly because
+                       // of the "TODO... further divergence" a little below.
+                       //
+                       // As a workaround, if we are mixing foreign content and templates,
+                       // just ignore the rest of the HTML. Foreign content is rare and a
+                       // relatively old HTML feature. Templates are also rare and a
+                       // relatively new HTML feature. Their combination is very rare.
+                       for _, e := range p.oe {
+                               if e.Namespace != "" {
+                                       p.im = ignoreTheRemainingTokens
+                                       return true
+                               }
+                       }
+
+                       p.addElement()
+                       p.afe = append(p.afe, &scopeMarker)
+                       p.framesetOK = false
+                       p.im = inTemplateIM
+                       p.templateStack = append(p.templateStack, inTemplateIM)
+                       return true
+               }
+       case EndTagToken:
+               switch p.tok.DataAtom {
+               case a.Head:
+                       p.oe.pop()
+                       p.im = afterHeadIM
+                       return true
+               case a.Body, a.Html, a.Br:
+                       p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
+                       return false
+               case a.Template:
+                       if !p.oe.contains(a.Template) {
+                               return true
+                       }
+                       // TODO: remove this further divergence from the HTML5 spec.
+                       //
+                       // See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
+                       p.generateImpliedEndTags()
+                       for i := len(p.oe) - 1; i >= 0; i-- {
+                               if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template {
+                                       p.oe = p.oe[:i]
+                                       break
+                               }
+                       }
+                       p.clearActiveFormattingElements()
+                       p.templateStack.pop()
+                       p.resetInsertionMode()
+                       return true
+               default:
+                       // Ignore the token.
+                       return true
+               }
+       case CommentToken:
+               p.addChild(&Node{
+                       Type: CommentNode,
+                       Data: p.tok.Data,
+               })
+               return true
+       case DoctypeToken:
+               // Ignore the token.
+               return true
+       }
+
+       p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
+       return false
+}
+
+// Section 12.2.6.4.5.
+func inHeadNoscriptIM(p *parser) bool {
+       switch p.tok.Type {
+       case DoctypeToken:
+               // Ignore the token.
+               return true
+       case StartTagToken:
+               switch p.tok.DataAtom {
+               case a.Html:
+                       return inBodyIM(p)
+               case a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Style:
+                       return inHeadIM(p)
+               case a.Head:
+                       // Ignore the token.
+                       return true
+               case a.Noscript:
+                       // Don't let the tokenizer go into raw text mode even when a <noscript>
+                       // tag is in "in head noscript" insertion mode.
+                       p.tokenizer.NextIsNotRawText()
+                       // Ignore the token.
+                       return true
+               }
+       case EndTagToken:
+               switch p.tok.DataAtom {
+               case a.Noscript, a.Br:
+               default:
+                       // Ignore the token.
+                       return true
+               }
+       case TextToken:
+               s := strings.TrimLeft(p.tok.Data, whitespace)
+               if len(s) == 0 {
+                       // It was all whitespace.
+                       return inHeadIM(p)
+               }
+       case CommentToken:
+               return inHeadIM(p)
+       }
+       p.oe.pop()
+       if p.top().DataAtom != a.Head {
+               panic("html: the new current node will be a head element.")
+       }
+       p.im = inHeadIM
+       if p.tok.DataAtom == a.Noscript {
+               return true
+       }
+       return false
+}
+
+// Section 12.2.6.4.6.
+func afterHeadIM(p *parser) bool {
+       switch p.tok.Type {
+       case TextToken:
+               s := strings.TrimLeft(p.tok.Data, whitespace)
+               if len(s) < len(p.tok.Data) {
+                       // Add the initial whitespace to the current node.
+                       p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
+                       if s == "" {
+                               return true
+                       }
+                       p.tok.Data = s
+               }
+       case StartTagToken:
+               switch p.tok.DataAtom {
+               case a.Html:
+                       return inBodyIM(p)
+               case a.Body:
+                       p.addElement()
+                       p.framesetOK = false
+                       p.im = inBodyIM
+                       return true
+               case a.Frameset:
+                       p.addElement()
+                       p.im = inFramesetIM
+                       return true
+               case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
+                       p.oe = append(p.oe, p.head)
+                       defer p.oe.remove(p.head)
+                       return inHeadIM(p)
+               case a.Head:
+                       // Ignore the token.
+                       return true
+               }
+       case EndTagToken:
+               switch p.tok.DataAtom {
+               case a.Body, a.Html, a.Br:
+                       // Drop down to creating an implied <body> tag.
+               case a.Template:
+                       return inHeadIM(p)
+               default:
+                       // Ignore the token.
+                       return true
+               }
+       case CommentToken:
+               p.addChild(&Node{
+                       Type: CommentNode,
+                       Data: p.tok.Data,
+               })
+               return true
+       case DoctypeToken:
+               // Ignore the token.
+               return true
+       }
+
+       p.parseImpliedToken(StartTagToken, a.Body, a.Body.String())
+       p.framesetOK = true
+       return false
+}
+
+// copyAttributes copies attributes of src not found on dst to dst.
+func copyAttributes(dst *Node, src Token) {
+       if len(src.Attr) == 0 {
+               return
+       }
+       attr := map[string]string{}
+       for _, t := range dst.Attr {
+               attr[t.Key] = t.Val
+       }
+       for _, t := range src.Attr {
+               if _, ok := attr[t.Key]; !ok {
+                       dst.Attr = append(dst.Attr, t)
+                       attr[t.Key] = t.Val
+               }
+       }
+}
+
+// Section 12.2.6.4.7.
+func inBodyIM(p *parser) bool {
+       switch p.tok.Type {
+       case TextToken:
+               d := p.tok.Data
+               switch n := p.oe.top(); n.DataAtom {
+               case a.Pre, a.Listing:
+                       if n.FirstChild == nil {
+                               // Ignore a newline at the start of a <pre> block.
+                               if d != "" && d[0] == '\r' {
+                                       d = d[1:]
+                               }
+                               if d != "" && d[0] == '\n' {
+                                       d = d[1:]
+                               }
+                       }
+               }
+               d = strings.Replace(d, "\x00", "", -1)
+               if d == "" {
+                       return true
+               }
+               p.reconstructActiveFormattingElements()
+               p.addText(d)
+               if p.framesetOK && strings.TrimLeft(d, whitespace) != "" {
+                       // There were non-whitespace characters inserted.
+                       p.framesetOK = false
+               }
+       case StartTagToken:
+               switch p.tok.DataAtom {
+               case a.Html:
+                       if p.oe.contains(a.Template) {
+                               return true
+                       }
+                       copyAttributes(p.oe[0], p.tok)
+               case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
+                       return inHeadIM(p)
+               case a.Body:
+                       if p.oe.contains(a.Template) {
+                               return true
+                       }
+                       if len(p.oe) >= 2 {
+                               body := p.oe[1]
+                               if body.Type == ElementNode && body.DataAtom == a.Body {
+                                       p.framesetOK = false
+                                       copyAttributes(body, p.tok)
+                               }
+                       }
+               case a.Frameset:
+                       if !p.framesetOK || len(p.oe) < 2 || p.oe[1].DataAtom != a.Body {
+                               // Ignore the token.
+                               return true
+                       }
+                       body := p.oe[1]
+                       if body.Parent != nil {
+                               body.Parent.RemoveChild(body)
+                       }
+                       p.oe = p.oe[:1]
+                       p.addElement()
+                       p.im = inFramesetIM
+                       return true
+               case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dialog, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Main, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul:
+                       p.popUntil(buttonScope, a.P)
+                       p.addElement()
+               case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
+                       p.popUntil(buttonScope, a.P)
+                       switch n := p.top(); n.DataAtom {
+                       case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
+                               p.oe.pop()
+                       }
+                       p.addElement()
+               case a.Pre, a.Listing:
+                       p.popUntil(buttonScope, a.P)
+                       p.addElement()
+                       // The newline, if any, will be dealt with by the TextToken case.
+                       p.framesetOK = false
+               case a.Form:
+                       if p.form != nil && !p.oe.contains(a.Template) {
+                               // Ignore the token
+                               return true
+                       }
+                       p.popUntil(buttonScope, a.P)
+                       p.addElement()
+                       if !p.oe.contains(a.Template) {
+                               p.form = p.top()
+                       }
+               case a.Li:
+                       p.framesetOK = false
+                       for i := len(p.oe) - 1; i >= 0; i-- {
+                               node := p.oe[i]
+                               switch node.DataAtom {
+                               case a.Li:
+                                       p.oe = p.oe[:i]
+                               case a.Address, a.Div, a.P:
+                                       continue
+                               default:
+                                       if !isSpecialElement(node) {
+                                               continue
+                                       }
+                               }
+                               break
+                       }
+                       p.popUntil(buttonScope, a.P)
+                       p.addElement()
+               case a.Dd, a.Dt:
+                       p.framesetOK = false
+                       for i := len(p.oe) - 1; i >= 0; i-- {
+                               node := p.oe[i]
+                               switch node.DataAtom {
+                               case a.Dd, a.Dt:
+                                       p.oe = p.oe[:i]
+                               case a.Address, a.Div, a.P:
+                                       continue
+                               default:
+                                       if !isSpecialElement(node) {
+                                               continue
+                                       }
+                               }
+                               break
+                       }
+                       p.popUntil(buttonScope, a.P)
+                       p.addElement()
+               case a.Plaintext:
+                       p.popUntil(buttonScope, a.P)
+                       p.addElement()
+               case a.Button:
+                       p.popUntil(defaultScope, a.Button)
+                       p.reconstructActiveFormattingElements()
+                       p.addElement()
+                       p.framesetOK = false
+               case a.A:
+                       for i := len(p.afe) - 1; i >= 0 && p.afe[i].Type != scopeMarkerNode; i-- {
+                               if n := p.afe[i]; n.Type == ElementNode && n.DataAtom == a.A {
+                                       p.inBodyEndTagFormatting(a.A, "a")
+                                       p.oe.remove(n)
+                                       p.afe.remove(n)
+                                       break
+                               }
+                       }
+                       p.reconstructActiveFormattingElements()
+                       p.addFormattingElement()
+               case a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
+                       p.reconstructActiveFormattingElements()
+                       p.addFormattingElement()
+               case a.Nobr:
+                       p.reconstructActiveFormattingElements()
+                       if p.elementInScope(defaultScope, a.Nobr) {
+                               p.inBodyEndTagFormatting(a.Nobr, "nobr")
+                               p.reconstructActiveFormattingElements()
+                       }
+                       p.addFormattingElement()
+               case a.Applet, a.Marquee, a.Object:
+                       p.reconstructActiveFormattingElements()
+                       p.addElement()
+                       p.afe = append(p.afe, &scopeMarker)
+                       p.framesetOK = false
+               case a.Table:
+                       if !p.quirks {
+                               p.popUntil(buttonScope, a.P)
+                       }
+                       p.addElement()
+                       p.framesetOK = false
+                       p.im = inTableIM
+                       return true
+               case a.Area, a.Br, a.Embed, a.Img, a.Input, a.Keygen, a.Wbr:
+                       p.reconstructActiveFormattingElements()
+                       p.addElement()
+                       p.oe.pop()
+                       p.acknowledgeSelfClosingTag()
+                       if p.tok.DataAtom == a.Input {
+                               for _, t := range p.tok.Attr {
+                                       if t.Key == "type" {
+                                               if strings.ToLower(t.Val) == "hidden" {
+                                                       // Skip setting framesetOK = false
+                                                       return true
+                                               }
+                                       }
+                               }
+                       }
+                       p.framesetOK = false
+               case a.Param, a.Source, a.Track:
+                       p.addElement()
+                       p.oe.pop()
+                       p.acknowledgeSelfClosingTag()
+               case a.Hr:
+                       p.popUntil(buttonScope, a.P)
+                       p.addElement()
+                       p.oe.pop()
+                       p.acknowledgeSelfClosingTag()
+                       p.framesetOK = false
+               case a.Image:
+                       p.tok.DataAtom = a.Img
+                       p.tok.Data = a.Img.String()
+                       return false
+               case a.Textarea:
+                       p.addElement()
+                       p.setOriginalIM()
+                       p.framesetOK = false
+                       p.im = textIM
+               case a.Xmp:
+                       p.popUntil(buttonScope, a.P)
+                       p.reconstructActiveFormattingElements()
+                       p.framesetOK = false
+                       p.parseGenericRawTextElement()
+               case a.Iframe:
+                       p.framesetOK = false
+                       p.parseGenericRawTextElement()
+               case a.Noembed:
+                       p.parseGenericRawTextElement()
+               case a.Noscript:
+                       if p.scripting {
+                               p.parseGenericRawTextElement()
+                               return true
+                       }
+                       p.reconstructActiveFormattingElements()
+                       p.addElement()
+                       // Don't let the tokenizer go into raw text mode when scripting is disabled.
+                       p.tokenizer.NextIsNotRawText()
+               case a.Select:
+                       p.reconstructActiveFormattingElements()
+                       p.addElement()
+                       p.framesetOK = false
+                       p.im = inSelectIM
+                       return true
+               case a.Optgroup, a.Option:
+                       if p.top().DataAtom == a.Option {
+                               p.oe.pop()
+                       }
+                       p.reconstructActiveFormattingElements()
+                       p.addElement()
+               case a.Rb, a.Rtc:
+                       if p.elementInScope(defaultScope, a.Ruby) {
+                               p.generateImpliedEndTags()
+                       }
+                       p.addElement()
+               case a.Rp, a.Rt:
+                       if p.elementInScope(defaultScope, a.Ruby) {
+                               p.generateImpliedEndTags("rtc")
+                       }
+                       p.addElement()
+               case a.Math, a.Svg:
+                       p.reconstructActiveFormattingElements()
+                       if p.tok.DataAtom == a.Math {
+                               adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
+                       } else {
+                               adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
+                       }
+                       adjustForeignAttributes(p.tok.Attr)
+                       p.addElement()
+                       p.top().Namespace = p.tok.Data
+                       if p.hasSelfClosingToken {
+                               p.oe.pop()
+                               p.acknowledgeSelfClosingTag()
+                       }
+                       return true
+               case a.Caption, a.Col, a.Colgroup, a.Frame, a.Head, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
+                       // Ignore the token.
+               default:
+                       p.reconstructActiveFormattingElements()
+                       p.addElement()
+               }
+       case EndTagToken:
+               switch p.tok.DataAtom {
+               case a.Body:
+                       if p.elementInScope(defaultScope, a.Body) {
+                               p.im = afterBodyIM
+                       }
+               case a.Html:
+                       if p.elementInScope(defaultScope, a.Body) {
+                               p.parseImpliedToken(EndTagToken, a.Body, a.Body.String())
+                               return false
+                       }
+                       return true
+               case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dialog, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Main, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul:
+                       p.popUntil(defaultScope, p.tok.DataAtom)
+               case a.Form:
+                       if p.oe.contains(a.Template) {
+                               i := p.indexOfElementInScope(defaultScope, a.Form)
+                               if i == -1 {
+                                       // Ignore the token.
+                                       return true
+                               }
+                               p.generateImpliedEndTags()
+                               if p.oe[i].DataAtom != a.Form {
+                                       // Ignore the token.
+                                       return true
+                               }
+                               p.popUntil(defaultScope, a.Form)
+                       } else {
+                               node := p.form
+                               p.form = nil
+                               i := p.indexOfElementInScope(defaultScope, a.Form)
+                               if node == nil || i == -1 || p.oe[i] != node {
+                                       // Ignore the token.
+                                       return true
+                               }
+                               p.generateImpliedEndTags()
+                               p.oe.remove(node)
+                       }
+               case a.P:
+                       if !p.elementInScope(buttonScope, a.P) {
+                               p.parseImpliedToken(StartTagToken, a.P, a.P.String())
+                       }
+                       p.popUntil(buttonScope, a.P)
+               case a.Li:
+                       p.popUntil(listItemScope, a.Li)
+               case a.Dd, a.Dt:
+                       p.popUntil(defaultScope, p.tok.DataAtom)
+               case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
+                       p.popUntil(defaultScope, a.H1, a.H2, a.H3, a.H4, a.H5, a.H6)
+               case a.A, a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.Nobr, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
+                       p.inBodyEndTagFormatting(p.tok.DataAtom, p.tok.Data)
+               case a.Applet, a.Marquee, a.Object:
+                       if p.popUntil(defaultScope, p.tok.DataAtom) {
+                               p.clearActiveFormattingElements()
+                       }
+               case a.Br:
+                       p.tok.Type = StartTagToken
+                       return false
+               case a.Template:
+                       return inHeadIM(p)
+               default:
+                       p.inBodyEndTagOther(p.tok.DataAtom, p.tok.Data)
+               }
+       case CommentToken:
+               p.addChild(&Node{
+                       Type: CommentNode,
+                       Data: p.tok.Data,
+               })
+       case ErrorToken:
+               // TODO: remove this divergence from the HTML5 spec.
+               if len(p.templateStack) > 0 {
+                       p.im = inTemplateIM
+                       return false
+               }
+               for _, e := range p.oe {
+                       switch e.DataAtom {
+                       case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc, a.Tbody, a.Td, a.Tfoot, a.Th,
+                               a.Thead, a.Tr, a.Body, a.Html:
+                       default:
+                               return true
+                       }
+               }
+       }
+
+       return true
+}
+
+func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) {
+       // This is the "adoption agency" algorithm, described at
+       // https://html.spec.whatwg.org/multipage/syntax.html#adoptionAgency
+
+       // TODO: this is a fairly literal line-by-line translation of that algorithm.
+       // Once the code successfully parses the comprehensive test suite, we should
+       // refactor this code to be more idiomatic.
+
+       // Steps 1-2
+       if current := p.oe.top(); current.Data == tagName && p.afe.index(current) == -1 {
+               p.oe.pop()
+               return
+       }
+
+       // Steps 3-5. The outer loop.
+       for i := 0; i < 8; i++ {
+               // Step 6. Find the formatting element.
+               var formattingElement *Node
+               for j := len(p.afe) - 1; j >= 0; j-- {
+                       if p.afe[j].Type == scopeMarkerNode {
+                               break
+                       }
+                       if p.afe[j].DataAtom == tagAtom {
+                               formattingElement = p.afe[j]
+                               break
+                       }
+               }
+               if formattingElement == nil {
+                       p.inBodyEndTagOther(tagAtom, tagName)
+                       return
+               }
+
+               // Step 7. Ignore the tag if formatting element is not in the stack of open elements.
+               feIndex := p.oe.index(formattingElement)
+               if feIndex == -1 {
+                       p.afe.remove(formattingElement)
+                       return
+               }
+               // Step 8. Ignore the tag if formatting element is not in the scope.
+               if !p.elementInScope(defaultScope, tagAtom) {
+                       // Ignore the tag.
+                       return
+               }
+
+               // Step 9. This step is omitted because it's just a parse error but no need to return.
+
+               // Steps 10-11. Find the furthest block.
+               var furthestBlock *Node
+               for _, e := range p.oe[feIndex:] {
+                       if isSpecialElement(e) {
+                               furthestBlock = e
+                               break
+                       }
+               }
+               if furthestBlock == nil {
+                       e := p.oe.pop()
+                       for e != formattingElement {
+                               e = p.oe.pop()
+                       }
+                       p.afe.remove(e)
+                       return
+               }
+
+               // Steps 12-13. Find the common ancestor and bookmark node.
+               commonAncestor := p.oe[feIndex-1]
+               bookmark := p.afe.index(formattingElement)
+
+               // Step 14. The inner loop. Find the lastNode to reparent.
+               lastNode := furthestBlock
+               node := furthestBlock
+               x := p.oe.index(node)
+               // Step 14.1.
+               j := 0
+               for {
+                       // Step 14.2.
+                       j++
+                       // Step. 14.3.
+                       x--
+                       node = p.oe[x]
+                       // Step 14.4. Go to the next step if node is formatting element.
+                       if node == formattingElement {
+                               break
+                       }
+                       // Step 14.5. Remove node from the list of active formatting elements if
+                       // inner loop counter is greater than three and node is in the list of
+                       // active formatting elements.
+                       if ni := p.afe.index(node); j > 3 && ni > -1 {
+                               p.afe.remove(node)
+                               // If any element of the list of active formatting elements is removed,
+                               // we need to take care whether bookmark should be decremented or not.
+                               // This is because the value of bookmark may exceed the size of the
+                               // list by removing elements from the list.
+                               if ni <= bookmark {
+                                       bookmark--
+                               }
+                               continue
+                       }
+                       // Step 14.6. Continue the next inner loop if node is not in the list of
+                       // active formatting elements.
+                       if p.afe.index(node) == -1 {
+                               p.oe.remove(node)
+                               continue
+                       }
+                       // Step 14.7.
+                       clone := node.clone()
+                       p.afe[p.afe.index(node)] = clone
+                       p.oe[p.oe.index(node)] = clone
+                       node = clone
+                       // Step 14.8.
+                       if lastNode == furthestBlock {
+                               bookmark = p.afe.index(node) + 1
+                       }
+                       // Step 14.9.
+                       if lastNode.Parent != nil {
+                               lastNode.Parent.RemoveChild(lastNode)
+                       }
+                       node.AppendChild(lastNode)
+                       // Step 14.10.
+                       lastNode = node
+               }
+
+               // Step 15. Reparent lastNode to the common ancestor,
+               // or for misnested table nodes, to the foster parent.
+               if lastNode.Parent != nil {
+                       lastNode.Parent.RemoveChild(lastNode)
+               }
+               switch commonAncestor.DataAtom {
+               case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
+                       p.fosterParent(lastNode)
+               default:
+                       commonAncestor.AppendChild(lastNode)
+               }
+
+               // Steps 16-18. Reparent nodes from the furthest block's children
+               // to a clone of the formatting element.
+               clone := formattingElement.clone()
+               reparentChildren(clone, furthestBlock)
+               furthestBlock.AppendChild(clone)
+
+               // Step 19. Fix up the list of active formatting elements.
+               if oldLoc := p.afe.index(formattingElement); oldLoc != -1 && oldLoc < bookmark {
+                       // Move the bookmark with the rest of the list.
+                       bookmark--
+               }
+               p.afe.remove(formattingElement)
+               p.afe.insert(bookmark, clone)
+
+               // Step 20. Fix up the stack of open elements.
+               p.oe.remove(formattingElement)
+               p.oe.insert(p.oe.index(furthestBlock)+1, clone)
+       }
+}
+
+// inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM.
+// "Any other end tag" handling from 12.2.6.5 The rules for parsing tokens in foreign content
+// https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inforeign
+func (p *parser) inBodyEndTagOther(tagAtom a.Atom, tagName string) {
+       for i := len(p.oe) - 1; i >= 0; i-- {
+               // Two element nodes have the same tag if they have the same Data (a
+               // string-typed field). As an optimization, for common HTML tags, each
+               // Data string is assigned a unique, non-zero DataAtom (a uint32-typed
+               // field), since integer comparison is faster than string comparison.
+               // Uncommon (custom) tags get a zero DataAtom.
+               //
+               // The if condition here is equivalent to (p.oe[i].Data == tagName).
+               if (p.oe[i].DataAtom == tagAtom) &&
+                       ((tagAtom != 0) || (p.oe[i].Data == tagName)) {
+                       p.oe = p.oe[:i]
+                       break
+               }
+               if isSpecialElement(p.oe[i]) {
+                       break
+               }
+       }
+}
+
+// Section 12.2.6.4.8.
+func textIM(p *parser) bool {
+       switch p.tok.Type {
+       case ErrorToken:
+               p.oe.pop()
+       case TextToken:
+               d := p.tok.Data
+               if n := p.oe.top(); n.DataAtom == a.Textarea && n.FirstChild == nil {
+                       // Ignore a newline at the start of a <textarea> block.
+                       if d != "" && d[0] == '\r' {
+                               d = d[1:]
+                       }
+                       if d != "" && d[0] == '\n' {
+                               d = d[1:]
+                       }
+               }
+               if d == "" {
+                       return true
+               }
+               p.addText(d)
+               return true
+       case EndTagToken:
+               p.oe.pop()
+       }
+       p.im = p.originalIM
+       p.originalIM = nil
+       return p.tok.Type == EndTagToken
+}
+
+// Section 12.2.6.4.9.
+func inTableIM(p *parser) bool {
+       switch p.tok.Type {
+       case TextToken:
+               p.tok.Data = strings.Replace(p.tok.Data, "\x00", "", -1)
+               switch p.oe.top().DataAtom {
+               case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
+                       if strings.Trim(p.tok.Data, whitespace) == "" {
+                               p.addText(p.tok.Data)
+                               return true
+                       }
+               }
+       case StartTagToken:
+               switch p.tok.DataAtom {
+               case a.Caption:
+                       p.clearStackToContext(tableScope)
+                       p.afe = append(p.afe, &scopeMarker)
+                       p.addElement()
+                       p.im = inCaptionIM
+                       return true
+               case a.Colgroup:
+                       p.clearStackToContext(tableScope)
+                       p.addElement()
+                       p.im = inColumnGroupIM
+                       return true
+               case a.Col:
+                       p.parseImpliedToken(StartTagToken, a.Colgroup, a.Colgroup.String())
+                       return false
+               case a.Tbody, a.Tfoot, a.Thead:
+                       p.clearStackToContext(tableScope)
+                       p.addElement()
+                       p.im = inTableBodyIM
+                       return true
+               case a.Td, a.Th, a.Tr:
+                       p.parseImpliedToken(StartTagToken, a.Tbody, a.Tbody.String())
+                       return false
+               case a.Table:
+                       if p.popUntil(tableScope, a.Table) {
+                               p.resetInsertionMode()
+                               return false
+                       }
+                       // Ignore the token.
+                       return true
+               case a.Style, a.Script, a.Template:
+                       return inHeadIM(p)
+               case a.Input:
+                       for _, t := range p.tok.Attr {
+                               if t.Key == "type" && strings.ToLower(t.Val) == "hidden" {
+                                       p.addElement()
+                                       p.oe.pop()
+                                       return true
+                               }
+                       }
+                       // Otherwise drop down to the default action.
+               case a.Form:
+                       if p.oe.contains(a.Template) || p.form != nil {
+                               // Ignore the token.
+                               return true
+                       }
+                       p.addElement()
+                       p.form = p.oe.pop()
+               case a.Select:
+                       p.reconstructActiveFormattingElements()
+                       switch p.top().DataAtom {
+                       case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
+                               p.fosterParenting = true
+                       }
+                       p.addElement()
+                       p.fosterParenting = false
+                       p.framesetOK = false
+                       p.im = inSelectInTableIM
+                       return true
+               }
+       case EndTagToken:
+               switch p.tok.DataAtom {
+               case a.Table:
+                       if p.popUntil(tableScope, a.Table) {
+                               p.resetInsertionMode()
+                               return true
+                       }
+                       // Ignore the token.
+                       return true
+               case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
+                       // Ignore the token.
+                       return true
+               case a.Template:
+                       return inHeadIM(p)
+               }
+       case CommentToken:
+               p.addChild(&Node{
+                       Type: CommentNode,
+                       Data: p.tok.Data,
+               })
+               return true
+       case DoctypeToken:
+               // Ignore the token.
+               return true
+       case ErrorToken:
+               return inBodyIM(p)
+       }
+
+       p.fosterParenting = true
+       defer func() { p.fosterParenting = false }()
+
+       return inBodyIM(p)
+}
+
+// Section 12.2.6.4.11.
+func inCaptionIM(p *parser) bool {
+       switch p.tok.Type {
+       case StartTagToken:
+               switch p.tok.DataAtom {
+               case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Thead, a.Tr:
+                       if !p.popUntil(tableScope, a.Caption) {
+                               // Ignore the token.
+                               return true
+                       }
+                       p.clearActiveFormattingElements()
+                       p.im = inTableIM
+                       return false
+               case a.Select:
+                       p.reconstructActiveFormattingElements()
+                       p.addElement()
+                       p.framesetOK = false
+                       p.im = inSelectInTableIM
+                       return true
+               }
+       case EndTagToken:
+               switch p.tok.DataAtom {
+               case a.Caption:
+                       if p.popUntil(tableScope, a.Caption) {
+                               p.clearActiveFormattingElements()
+                               p.im = inTableIM
+                       }
+                       return true
+               case a.Table:
+                       if !p.popUntil(tableScope, a.Caption) {
+                               // Ignore the token.
+                               return true
+                       }
+                       p.clearActiveFormattingElements()
+                       p.im = inTableIM
+                       return false
+               case a.Body, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
+                       // Ignore the token.
+                       return true
+               }
+       }
+       return inBodyIM(p)
+}
+
+// Section 12.2.6.4.12.
+func inColumnGroupIM(p *parser) bool {
+       switch p.tok.Type {
+       case TextToken:
+               s := strings.TrimLeft(p.tok.Data, whitespace)
+               if len(s) < len(p.tok.Data) {
+                       // Add the initial whitespace to the current node.
+                       p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
+                       if s == "" {
+                               return true
+                       }
+                       p.tok.Data = s
+               }
+       case CommentToken:
+               p.addChild(&Node{
+                       Type: CommentNode,
+                       Data: p.tok.Data,
+               })
+               return true
+       case DoctypeToken:
+               // Ignore the token.
+               return true
+       case StartTagToken:
+               switch p.tok.DataAtom {
+               case a.Html:
+                       return inBodyIM(p)
+               case a.Col:
+                       p.addElement()
+                       p.oe.pop()
+                       p.acknowledgeSelfClosingTag()
+                       return true
+               case a.Template:
+                       return inHeadIM(p)
+               }
+       case EndTagToken:
+               switch p.tok.DataAtom {
+               case a.Colgroup:
+                       if p.oe.top().DataAtom == a.Colgroup {
+                               p.oe.pop()
+                               p.im = inTableIM
+                       }
+                       return true
+               case a.Col:
+                       // Ignore the token.
+                       return true
+               case a.Template:
+                       return inHeadIM(p)
+               }
+       case ErrorToken:
+               return inBodyIM(p)
+       }
+       if p.oe.top().DataAtom != a.Colgroup {
+               return true
+       }
+       p.oe.pop()
+       p.im = inTableIM
+       return false
+}
+
+// Section 12.2.6.4.13.
+func inTableBodyIM(p *parser) bool {
+       switch p.tok.Type {
+       case StartTagToken:
+               switch p.tok.DataAtom {
+               case a.Tr:
+                       p.clearStackToContext(tableBodyScope)
+                       p.addElement()
+                       p.im = inRowIM
+                       return true
+               case a.Td, a.Th:
+                       p.parseImpliedToken(StartTagToken, a.Tr, a.Tr.String())
+                       return false
+               case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
+                       if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
+                               p.im = inTableIM
+                               return false
+                       }
+                       // Ignore the token.
+                       return true
+               }
+       case EndTagToken:
+               switch p.tok.DataAtom {
+               case a.Tbody, a.Tfoot, a.Thead:
+                       if p.elementInScope(tableScope, p.tok.DataAtom) {
+                               p.clearStackToContext(tableBodyScope)
+                               p.oe.pop()
+                               p.im = inTableIM
+                       }
+                       return true
+               case a.Table:
+                       if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
+                               p.im = inTableIM
+                               return false
+                       }
+                       // Ignore the token.
+                       return true
+               case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th, a.Tr:
+                       // Ignore the token.
+                       return true
+               }
+       case CommentToken:
+               p.addChild(&Node{
+                       Type: CommentNode,
+                       Data: p.tok.Data,
+               })
+               return true
+       }
+
+       return inTableIM(p)
+}
+
+// Section 12.2.6.4.14.
+func inRowIM(p *parser) bool {
+       switch p.tok.Type {
+       case StartTagToken:
+               switch p.tok.DataAtom {
+               case a.Td, a.Th:
+                       p.clearStackToContext(tableRowScope)
+                       p.addElement()
+                       p.afe = append(p.afe, &scopeMarker)
+                       p.im = inCellIM
+                       return true
+               case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead, a.Tr:
+                       if p.popUntil(tableScope, a.Tr) {
+                               p.im = inTableBodyIM
+                               return false
+                       }
+                       // Ignore the token.
+                       return true
+               }
+       case EndTagToken:
+               switch p.tok.DataAtom {
+               case a.Tr:
+                       if p.popUntil(tableScope, a.Tr) {
+                               p.im = inTableBodyIM
+                               return true
+                       }
+                       // Ignore the token.
+                       return true
+               case a.Table:
+                       if p.popUntil(tableScope, a.Tr) {
+                               p.im = inTableBodyIM
+                               return false
+                       }
+                       // Ignore the token.
+                       return true
+               case a.Tbody, a.Tfoot, a.Thead:
+                       if p.elementInScope(tableScope, p.tok.DataAtom) {
+                               p.parseImpliedToken(EndTagToken, a.Tr, a.Tr.String())
+                               return false
+                       }
+                       // Ignore the token.
+                       return true
+               case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th:
+                       // Ignore the token.
+                       return true
+               }
+       }
+
+       return inTableIM(p)
+}
+
+// Section 12.2.6.4.15.
+func inCellIM(p *parser) bool {
+       switch p.tok.Type {
+       case StartTagToken:
+               switch p.tok.DataAtom {
+               case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
+                       if p.popUntil(tableScope, a.Td, a.Th) {
+                               // Close the cell and reprocess.
+                               p.clearActiveFormattingElements()
+                               p.im = inRowIM
+                               return false
+                       }
+                       // Ignore the token.
+                       return true
+               case a.Select:
+                       p.reconstructActiveFormattingElements()
+                       p.addElement()
+                       p.framesetOK = false
+                       p.im = inSelectInTableIM
+                       return true
+               }
+       case EndTagToken:
+               switch p.tok.DataAtom {
+               case a.Td, a.Th:
+                       if !p.popUntil(tableScope, p.tok.DataAtom) {
+                               // Ignore the token.
+                               return true
+                       }
+                       p.clearActiveFormattingElements()
+                       p.im = inRowIM
+                       return true
+               case a.Body, a.Caption, a.Col, a.Colgroup, a.Html:
+                       // Ignore the token.
+                       return true
+               case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
+                       if !p.elementInScope(tableScope, p.tok.DataAtom) {
+                               // Ignore the token.
+                               return true
+                       }
+                       // Close the cell and reprocess.
+                       if p.popUntil(tableScope, a.Td, a.Th) {
+                               p.clearActiveFormattingElements()
+                       }
+                       p.im = inRowIM
+                       return false
+               }
+       }
+       return inBodyIM(p)
+}
+
+// Section 12.2.6.4.16.
+func inSelectIM(p *parser) bool {
+       switch p.tok.Type {
+       case TextToken:
+               p.addText(strings.Replace(p.tok.Data, "\x00", "", -1))
+       case StartTagToken:
+               switch p.tok.DataAtom {
+               case a.Html:
+                       return inBodyIM(p)
+               case a.Option:
+                       if p.top().DataAtom == a.Option {
+                               p.oe.pop()
+                       }
+                       p.addElement()
+               case a.Optgroup:
+                       if p.top().DataAtom == a.Option {
+                               p.oe.pop()
+                       }
+                       if p.top().DataAtom == a.Optgroup {
+                               p.oe.pop()
+                       }
+                       p.addElement()
+               case a.Select:
+                       if !p.popUntil(selectScope, a.Select) {
+                               // Ignore the token.
+                               return true
+                       }
+                       p.resetInsertionMode()
+               case a.Input, a.Keygen, a.Textarea:
+                       if p.elementInScope(selectScope, a.Select) {
+                               p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
+                               return false
+                       }
+                       // In order to properly ignore <textarea>, we need to change the tokenizer mode.
+                       p.tokenizer.NextIsNotRawText()
+                       // Ignore the token.
+                       return true
+               case a.Script, a.Template:
+                       return inHeadIM(p)
+               case a.Iframe, a.Noembed, a.Noframes, a.Noscript, a.Plaintext, a.Style, a.Title, a.Xmp:
+                       // Don't let the tokenizer go into raw text mode when there are raw tags
+                       // to be ignored. These tags should be ignored from the tokenizer
+                       // properly.
+                       p.tokenizer.NextIsNotRawText()
+                       // Ignore the token.
+                       return true
+               }
+       case EndTagToken:
+               switch p.tok.DataAtom {
+               case a.Option:
+                       if p.top().DataAtom == a.Option {
+                               p.oe.pop()
+                       }
+               case a.Optgroup:
+                       i := len(p.oe) - 1
+                       if p.oe[i].DataAtom == a.Option {
+                               i--
+                       }
+                       if p.oe[i].DataAtom == a.Optgroup {
+                               p.oe = p.oe[:i]
+                       }
+               case a.Select:
+                       if !p.popUntil(selectScope, a.Select) {
+                               // Ignore the token.
+                               return true
+                       }
+                       p.resetInsertionMode()
+               case a.Template:
+                       return inHeadIM(p)
+               }
+       case CommentToken:
+               p.addChild(&Node{
+                       Type: CommentNode,
+                       Data: p.tok.Data,
+               })
+       case DoctypeToken:
+               // Ignore the token.
+               return true
+       case ErrorToken:
+               return inBodyIM(p)
+       }
+
+       return true
+}
+
+// Section 12.2.6.4.17.
+func inSelectInTableIM(p *parser) bool {
+       switch p.tok.Type {
+       case StartTagToken, EndTagToken:
+               switch p.tok.DataAtom {
+               case a.Caption, a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr, a.Td, a.Th:
+                       if p.tok.Type == EndTagToken && !p.elementInScope(tableScope, p.tok.DataAtom) {
+                               // Ignore the token.
+                               return true
+                       }
+                       // This is like p.popUntil(selectScope, a.Select), but it also
+                       // matches <math select>, not just <select>. Matching the MathML
+                       // tag is arguably incorrect (conceptually), but it mimics what
+                       // Chromium does.
+                       for i := len(p.oe) - 1; i >= 0; i-- {
+                               if n := p.oe[i]; n.DataAtom == a.Select {
+                                       p.oe = p.oe[:i]
+                                       break
+                               }
+                       }
+                       p.resetInsertionMode()
+                       return false
+               }
+       }
+       return inSelectIM(p)
+}
+
+// Section 12.2.6.4.18.
+func inTemplateIM(p *parser) bool {
+       switch p.tok.Type {
+       case TextToken, CommentToken, DoctypeToken:
+               return inBodyIM(p)
+       case StartTagToken:
+               switch p.tok.DataAtom {
+               case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
+                       return inHeadIM(p)
+               case a.Caption, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
+                       p.templateStack.pop()
+                       p.templateStack = append(p.templateStack, inTableIM)
+                       p.im = inTableIM
+                       return false
+               case a.Col:
+                       p.templateStack.pop()
+                       p.templateStack = append(p.templateStack, inColumnGroupIM)
+                       p.im = inColumnGroupIM
+                       return false
+               case a.Tr:
+                       p.templateStack.pop()
+                       p.templateStack = append(p.templateStack, inTableBodyIM)
+                       p.im = inTableBodyIM
+                       return false
+               case a.Td, a.Th:
+                       p.templateStack.pop()
+                       p.templateStack = append(p.templateStack, inRowIM)
+                       p.im = inRowIM
+                       return false
+               default:
+                       p.templateStack.pop()
+                       p.templateStack = append(p.templateStack, inBodyIM)
+                       p.im = inBodyIM
+                       return false
+               }
+       case EndTagToken:
+               switch p.tok.DataAtom {
+               case a.Template:
+                       return inHeadIM(p)
+               default:
+                       // Ignore the token.
+                       return true
+               }
+       case ErrorToken:
+               if !p.oe.contains(a.Template) {
+                       // Ignore the token.
+                       return true
+               }
+               // TODO: remove this divergence from the HTML5 spec.
+               //
+               // See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
+               p.generateImpliedEndTags()
+               for i := len(p.oe) - 1; i >= 0; i-- {
+                       if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template {
+                               p.oe = p.oe[:i]
+                               break
+                       }
+               }
+               p.clearActiveFormattingElements()
+               p.templateStack.pop()
+               p.resetInsertionMode()
+               return false
+       }
+       return false
+}
+
+// Section 12.2.6.4.19.
+func afterBodyIM(p *parser) bool {
+       switch p.tok.Type {
+       case ErrorToken:
+               // Stop parsing.
+               return true
+       case TextToken:
+               s := strings.TrimLeft(p.tok.Data, whitespace)
+               if len(s) == 0 {
+                       // It was all whitespace.
+                       return inBodyIM(p)
+               }
+       case StartTagToken:
+               if p.tok.DataAtom == a.Html {
+                       return inBodyIM(p)
+               }
+       case EndTagToken:
+               if p.tok.DataAtom == a.Html {
+                       if !p.fragment {
+                               p.im = afterAfterBodyIM
+                       }
+                       return true
+               }
+       case CommentToken:
+               // The comment is attached to the <html> element.
+               if len(p.oe) < 1 || p.oe[0].DataAtom != a.Html {
+                       panic("html: bad parser state: <html> element not found, in the after-body insertion mode")
+               }
+               p.oe[0].AppendChild(&Node{
+                       Type: CommentNode,
+                       Data: p.tok.Data,
+               })
+               return true
+       }
+       p.im = inBodyIM
+       return false
+}
+
+// Section 12.2.6.4.20.
+func inFramesetIM(p *parser) bool {
+       switch p.tok.Type {
+       case CommentToken:
+               p.addChild(&Node{
+                       Type: CommentNode,
+                       Data: p.tok.Data,
+               })
+       case TextToken:
+               // Ignore all text but whitespace.
+               s := strings.Map(func(c rune) rune {
+                       switch c {
+                       case ' ', '\t', '\n', '\f', '\r':
+                               return c
+                       }
+                       return -1
+               }, p.tok.Data)
+               if s != "" {
+                       p.addText(s)
+               }
+       case StartTagToken:
+               switch p.tok.DataAtom {
+               case a.Html:
+                       return inBodyIM(p)
+               case a.Frameset:
+                       p.addElement()
+               case a.Frame:
+                       p.addElement()
+                       p.oe.pop()
+                       p.acknowledgeSelfClosingTag()
+               case a.Noframes:
+                       return inHeadIM(p)
+               }
+       case EndTagToken:
+               switch p.tok.DataAtom {
+               case a.Frameset:
+                       if p.oe.top().DataAtom != a.Html {
+                               p.oe.pop()
+                               if p.oe.top().DataAtom != a.Frameset {
+                                       p.im = afterFramesetIM
+                                       return true
+                               }
+                       }
+               }
+       default:
+               // Ignore the token.
+       }
+       return true
+}
+
+// Section 12.2.6.4.21.
+func afterFramesetIM(p *parser) bool {
+       switch p.tok.Type {
+       case CommentToken:
+               p.addChild(&Node{
+                       Type: CommentNode,
+                       Data: p.tok.Data,
+               })
+       case TextToken:
+               // Ignore all text but whitespace.
+               s := strings.Map(func(c rune) rune {
+                       switch c {
+                       case ' ', '\t', '\n', '\f', '\r':
+                               return c
+                       }
+                       return -1
+               }, p.tok.Data)
+               if s != "" {
+                       p.addText(s)
+               }
+       case StartTagToken:
+               switch p.tok.DataAtom {
+               case a.Html:
+                       return inBodyIM(p)
+               case a.Noframes:
+                       return inHeadIM(p)
+               }
+       case EndTagToken:
+               switch p.tok.DataAtom {
+               case a.Html:
+                       p.im = afterAfterFramesetIM
+                       return true
+               }
+       default:
+               // Ignore the token.
+       }
+       return true
+}
+
+// Section 12.2.6.4.22.
+func afterAfterBodyIM(p *parser) bool {
+       switch p.tok.Type {
+       case ErrorToken:
+               // Stop parsing.
+               return true
+       case TextToken:
+               s := strings.TrimLeft(p.tok.Data, whitespace)
+               if len(s) == 0 {
+                       // It was all whitespace.
+                       return inBodyIM(p)
+               }
+       case StartTagToken:
+               if p.tok.DataAtom == a.Html {
+                       return inBodyIM(p)
+               }
+       case CommentToken:
+               p.doc.AppendChild(&Node{
+                       Type: CommentNode,
+                       Data: p.tok.Data,
+               })
+               return true
+       case DoctypeToken:
+               return inBodyIM(p)
+       }
+       p.im = inBodyIM
+       return false
+}
+
+// Section 12.2.6.4.23.
+func afterAfterFramesetIM(p *parser) bool {
+       switch p.tok.Type {
+       case CommentToken:
+               p.doc.AppendChild(&Node{
+                       Type: CommentNode,
+                       Data: p.tok.Data,
+               })
+       case TextToken:
+               // Ignore all text but whitespace.
+               s := strings.Map(func(c rune) rune {
+                       switch c {
+                       case ' ', '\t', '\n', '\f', '\r':
+                               return c
+                       }
+                       return -1
+               }, p.tok.Data)
+               if s != "" {
+                       p.tok.Data = s
+                       return inBodyIM(p)
+               }
+       case StartTagToken:
+               switch p.tok.DataAtom {
+               case a.Html:
+                       return inBodyIM(p)
+               case a.Noframes:
+                       return inHeadIM(p)
+               }
+       case DoctypeToken:
+               return inBodyIM(p)
+       default:
+               // Ignore the token.
+       }
+       return true
+}
+
+func ignoreTheRemainingTokens(p *parser) bool {
+       return true
+}
+
+const whitespaceOrNUL = whitespace + "\x00"
+
+// Section 12.2.6.5
+func parseForeignContent(p *parser) bool {
+       switch p.tok.Type {
+       case TextToken:
+               if p.framesetOK {
+                       p.framesetOK = strings.TrimLeft(p.tok.Data, whitespaceOrNUL) == ""
+               }
+               p.tok.Data = strings.Replace(p.tok.Data, "\x00", "\ufffd", -1)
+               p.addText(p.tok.Data)
+       case CommentToken:
+               p.addChild(&Node{
+                       Type: CommentNode,
+                       Data: p.tok.Data,
+               })
+       case StartTagToken:
+               if !p.fragment {
+                       b := breakout[p.tok.Data]
+                       if p.tok.DataAtom == a.Font {
+                       loop:
+                               for _, attr := range p.tok.Attr {
+                                       switch attr.Key {
+                                       case "color", "face", "size":
+                                               b = true
+                                               break loop
+                                       }
+                               }
+                       }
+                       if b {
+                               for i := len(p.oe) - 1; i >= 0; i-- {
+                                       n := p.oe[i]
+                                       if n.Namespace == "" || htmlIntegrationPoint(n) || mathMLTextIntegrationPoint(n) {
+                                               p.oe = p.oe[:i+1]
+                                               break
+                                       }
+                               }
+                               return false
+                       }
+               }
+               current := p.adjustedCurrentNode()
+               switch current.Namespace {
+               case "math":
+                       adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
+               case "svg":
+                       // Adjust SVG tag names. The tokenizer lower-cases tag names, but
+                       // SVG wants e.g. "foreignObject" with a capital second "O".
+                       if x := svgTagNameAdjustments[p.tok.Data]; x != "" {
+                               p.tok.DataAtom = a.Lookup([]byte(x))
+                               p.tok.Data = x
+                       }
+                       adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
+               default:
+                       panic("html: bad parser state: unexpected namespace")
+               }
+               adjustForeignAttributes(p.tok.Attr)
+               namespace := current.Namespace
+               p.addElement()
+               p.top().Namespace = namespace
+               if namespace != "" {
+                       // Don't let the tokenizer go into raw text mode in foreign content
+                       // (e.g. in an SVG <title> tag).
+                       p.tokenizer.NextIsNotRawText()
+               }
+               if p.hasSelfClosingToken {
+                       p.oe.pop()
+                       p.acknowledgeSelfClosingTag()
+               }
+       case EndTagToken:
+               for i := len(p.oe) - 1; i >= 0; i-- {
+                       if p.oe[i].Namespace == "" {
+                               return p.im(p)
+                       }
+                       if strings.EqualFold(p.oe[i].Data, p.tok.Data) {
+                               p.oe = p.oe[:i]
+                               break
+                       }
+               }
+               return true
+       default:
+               // Ignore the token.
+       }
+       return true
+}
+
+// Section 12.2.4.2.
+func (p *parser) adjustedCurrentNode() *Node {
+       if len(p.oe) == 1 && p.fragment && p.context != nil {
+               return p.context
+       }
+       return p.oe.top()
+}
+
+// Section 12.2.6.
+func (p *parser) inForeignContent() bool {
+       if len(p.oe) == 0 {
+               return false
+       }
+       n := p.adjustedCurrentNode()
+       if n.Namespace == "" {
+               return false
+       }
+       if mathMLTextIntegrationPoint(n) {
+               if p.tok.Type == StartTagToken && p.tok.DataAtom != a.Mglyph && p.tok.DataAtom != a.Malignmark {
+                       return false
+               }
+               if p.tok.Type == TextToken {
+                       return false
+               }
+       }
+       if n.Namespace == "math" && n.DataAtom == a.AnnotationXml && p.tok.Type == StartTagToken && p.tok.DataAtom == a.Svg {
+               return false
+       }
+       if htmlIntegrationPoint(n) && (p.tok.Type == StartTagToken || p.tok.Type == TextToken) {
+               return false
+       }
+       if p.tok.Type == ErrorToken {
+               return false
+       }
+       return true
+}
+
+// parseImpliedToken parses a token as though it had appeared in the parser's
+// input.
+func (p *parser) parseImpliedToken(t TokenType, dataAtom a.Atom, data string) {
+       realToken, selfClosing := p.tok, p.hasSelfClosingToken
+       p.tok = Token{
+               Type:     t,
+               DataAtom: dataAtom,
+               Data:     data,
+       }
+       p.hasSelfClosingToken = false
+       p.parseCurrentToken()
+       p.tok, p.hasSelfClosingToken = realToken, selfClosing
+}
+
+// parseCurrentToken runs the current token through the parsing routines
+// until it is consumed.
+func (p *parser) parseCurrentToken() {
+       if p.tok.Type == SelfClosingTagToken {
+               p.hasSelfClosingToken = true
+               p.tok.Type = StartTagToken
+       }
+
+       consumed := false
+       for !consumed {
+               if p.inForeignContent() {
+                       consumed = parseForeignContent(p)
+               } else {
+                       consumed = p.im(p)
+               }
+       }
+
+       if p.hasSelfClosingToken {
+               // This is a parse error, but ignore it.
+               p.hasSelfClosingToken = false
+       }
+}
+
+func (p *parser) parse() error {
+       // Iterate until EOF. Any other error will cause an early return.
+       var err error
+       for err != io.EOF {
+               // CDATA sections are allowed only in foreign content.
+               n := p.oe.top()
+               p.tokenizer.AllowCDATA(n != nil && n.Namespace != "")
+               // Read and parse the next token.
+               p.tokenizer.Next()
+               p.tok = p.tokenizer.Token()
+               if p.tok.Type == ErrorToken {
+                       err = p.tokenizer.Err()
+                       if err != nil && err != io.EOF {
+                               return err
+                       }
+               }
+               p.parseCurrentToken()
+       }
+       return nil
+}
+
+// Parse returns the parse tree for the HTML from the given Reader.
+//
+// It implements the HTML5 parsing algorithm
+// (https://html.spec.whatwg.org/multipage/syntax.html#tree-construction),
+// which is very complicated. The resultant tree can contain implicitly created
+// nodes that have no explicit <tag> listed in r's data, and nodes' parents can
+// differ from the nesting implied by a naive processing of start and end
+// <tag>s. Conversely, explicit <tag>s in r's data can be silently dropped,
+// with no corresponding node in the resulting tree.
+//
+// The input is assumed to be UTF-8 encoded.
+func Parse(r io.Reader) (*Node, error) {
+       return ParseWithOptions(r)
+}
+
+// ParseFragment parses a fragment of HTML and returns the nodes that were
+// found. If the fragment is the InnerHTML for an existing element, pass that
+// element in context.
+//
+// It has the same intricacies as Parse.
+func ParseFragment(r io.Reader, context *Node) ([]*Node, error) {
+       return ParseFragmentWithOptions(r, context)
+}
+
+// ParseOption configures a parser.
+type ParseOption func(p *parser)
+
+// ParseOptionEnableScripting configures the scripting flag.
+// https://html.spec.whatwg.org/multipage/webappapis.html#enabling-and-disabling-scripting
+//
+// By default, scripting is enabled.
+func ParseOptionEnableScripting(enable bool) ParseOption {
+       return func(p *parser) {
+               p.scripting = enable
+       }
+}
+
+// ParseWithOptions is like Parse, with options.
+func ParseWithOptions(r io.Reader, opts ...ParseOption) (*Node, error) {
+       p := &parser{
+               tokenizer: NewTokenizer(r),
+               doc: &Node{
+                       Type: DocumentNode,
+               },
+               scripting:  true,
+               framesetOK: true,
+               im:         initialIM,
+       }
+
+       for _, f := range opts {
+               f(p)
+       }
+
+       if err := p.parse(); err != nil {
+               return nil, err
+       }
+       return p.doc, nil
+}
+
+// ParseFragmentWithOptions is like ParseFragment, with options.
+func ParseFragmentWithOptions(r io.Reader, context *Node, opts ...ParseOption) ([]*Node, error) {
+       contextTag := ""
+       if context != nil {
+               if context.Type != ElementNode {
+                       return nil, errors.New("html: ParseFragment of non-element Node")
+               }
+               // The next check isn't just context.DataAtom.String() == context.Data because
+               // it is valid to pass an element whose tag isn't a known atom. For example,
+               // DataAtom == 0 and Data = "tagfromthefuture" is perfectly consistent.
+               if context.DataAtom != a.Lookup([]byte(context.Data)) {
+                       return nil, fmt.Errorf("html: inconsistent Node: DataAtom=%q, Data=%q", context.DataAtom, context.Data)
+               }
+               contextTag = context.DataAtom.String()
+       }
+       p := &parser{
+               doc: &Node{
+                       Type: DocumentNode,
+               },
+               scripting: true,
+               fragment:  true,
+               context:   context,
+       }
+       if context != nil && context.Namespace != "" {
+               p.tokenizer = NewTokenizer(r)
+       } else {
+               p.tokenizer = NewTokenizerFragment(r, contextTag)
+       }
+
+       for _, f := range opts {
+               f(p)
+       }
+
+       root := &Node{
+               Type:     ElementNode,
+               DataAtom: a.Html,
+               Data:     a.Html.String(),
+       }
+       p.doc.AppendChild(root)
+       p.oe = nodeStack{root}
+       if context != nil && context.DataAtom == a.Template {
+               p.templateStack = append(p.templateStack, inTemplateIM)
+       }
+       p.resetInsertionMode()
+
+       for n := context; n != nil; n = n.Parent {
+               if n.Type == ElementNode && n.DataAtom == a.Form {
+                       p.form = n
+                       break
+               }
+       }
+
+       if err := p.parse(); err != nil {
+               return nil, err
+       }
+
+       parent := p.doc
+       if context != nil {
+               parent = root
+       }
+
+       var result []*Node
+       for c := parent.FirstChild; c != nil; {
+               next := c.NextSibling
+               parent.RemoveChild(c)
+               result = append(result, c)
+               c = next
+       }
+       return result, nil
+}
diff --git a/internal/html/parse_test.go b/internal/html/parse_test.go
new file mode 100644 (file)
index 0000000..b2b7fdc
--- /dev/null
@@ -0,0 +1,490 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+       "bufio"
+       "bytes"
+       "errors"
+       "fmt"
+       "io"
+       "os"
+       "path/filepath"
+       "runtime"
+       "sort"
+       "strings"
+       "testing"
+
+       "git.earlybird.gay/today-engine/internal/html/atom"
+)
+
+type testAttrs struct {
+       text, want, context string
+       scripting           bool
+}
+
+// readParseTest reads a single test case from r.
+func readParseTest(r *bufio.Reader) (*testAttrs, error) {
+       ta := &testAttrs{scripting: true}
+       line, err := r.ReadSlice('\n')
+       if err != nil {
+               return nil, err
+       }
+       var b []byte
+
+       // Read the HTML.
+       if string(line) != "#data\n" {
+               return nil, fmt.Errorf(`got %q want "#data\n"`, line)
+       }
+       for {
+               line, err = r.ReadSlice('\n')
+               if err != nil {
+                       return nil, err
+               }
+               if line[0] == '#' {
+                       break
+               }
+               b = append(b, line...)
+       }
+       ta.text = strings.TrimSuffix(string(b), "\n")
+       b = b[:0]
+
+       // Skip the error list.
+       if string(line) != "#errors\n" {
+               return nil, fmt.Errorf(`got %q want "#errors\n"`, line)
+       }
+       for {
+               line, err = r.ReadSlice('\n')
+               if err != nil {
+                       return nil, err
+               }
+               if line[0] == '#' {
+                       break
+               }
+       }
+
+       // Skip the new-errors list.
+       if string(line) == "#new-errors\n" {
+               for {
+                       line, err = r.ReadSlice('\n')
+                       if err != nil {
+                               return nil, err
+                       }
+                       if line[0] == '#' {
+                               break
+                       }
+               }
+       }
+
+       if ls := string(line); strings.HasPrefix(ls, "#script-") {
+               switch {
+               case strings.HasSuffix(ls, "-on\n"):
+                       ta.scripting = true
+               case strings.HasSuffix(ls, "-off\n"):
+                       ta.scripting = false
+               default:
+                       return nil, fmt.Errorf(`got %q, want "#script-on" or "#script-off"`, line)
+               }
+               for {
+                       line, err = r.ReadSlice('\n')
+                       if err != nil {
+                               return nil, err
+                       }
+                       if line[0] == '#' {
+                               break
+                       }
+               }
+       }
+
+       if string(line) == "#document-fragment\n" {
+               line, err = r.ReadSlice('\n')
+               if err != nil {
+                       return nil, err
+               }
+               ta.context = strings.TrimSpace(string(line))
+               line, err = r.ReadSlice('\n')
+               if err != nil {
+                       return nil, err
+               }
+       }
+
+       // Read the dump of what the parse tree should be.
+       if string(line) != "#document\n" {
+               return nil, fmt.Errorf(`got %q want "#document\n"`, line)
+       }
+       inQuote := false
+       for {
+               line, err = r.ReadSlice('\n')
+               if err != nil && err != io.EOF {
+                       return nil, err
+               }
+               trimmed := bytes.Trim(line, "| \n")
+               if len(trimmed) > 0 {
+                       if line[0] == '|' && trimmed[0] == '"' {
+                               inQuote = true
+                       }
+                       if trimmed[len(trimmed)-1] == '"' && !(line[0] == '|' && len(trimmed) == 1) {
+                               inQuote = false
+                       }
+               }
+               if len(line) == 0 || len(line) == 1 && line[0] == '\n' && !inQuote {
+                       break
+               }
+               b = append(b, line...)
+       }
+       ta.want = string(b)
+       return ta, nil
+}
+
+func dumpIndent(w io.Writer, level int) {
+       io.WriteString(w, "| ")
+       for i := 0; i < level; i++ {
+               io.WriteString(w, "  ")
+       }
+}
+
+type sortedAttributes []Attribute
+
+func (a sortedAttributes) Len() int {
+       return len(a)
+}
+
+func (a sortedAttributes) Less(i, j int) bool {
+       if a[i].Namespace != a[j].Namespace {
+               return a[i].Namespace < a[j].Namespace
+       }
+       return a[i].Key < a[j].Key
+}
+
+func (a sortedAttributes) Swap(i, j int) {
+       a[i], a[j] = a[j], a[i]
+}
+
+func dumpLevel(w io.Writer, n *Node, level int) error {
+       dumpIndent(w, level)
+       level++
+       switch n.Type {
+       case ErrorNode:
+               return errors.New("unexpected ErrorNode")
+       case DocumentNode:
+               return errors.New("unexpected DocumentNode")
+       case ElementNode:
+               if n.Namespace != "" {
+                       fmt.Fprintf(w, "<%s %s>", n.Namespace, n.Data)
+               } else {
+                       fmt.Fprintf(w, "<%s>", n.Data)
+               }
+               attr := sortedAttributes(n.Attr)
+               sort.Sort(attr)
+               for _, a := range attr {
+                       io.WriteString(w, "\n")
+                       dumpIndent(w, level)
+                       if a.Namespace != "" {
+                               fmt.Fprintf(w, `%s %s="%s"`, a.Namespace, a.Key, a.Val)
+                       } else {
+                               fmt.Fprintf(w, `%s="%s"`, a.Key, a.Val)
+                       }
+               }
+               if n.Namespace == "" && n.DataAtom == atom.Template {
+                       io.WriteString(w, "\n")
+                       dumpIndent(w, level)
+                       level++
+                       io.WriteString(w, "content")
+               }
+       case TextNode:
+               fmt.Fprintf(w, `"%s"`, n.Data)
+       case CommentNode:
+               fmt.Fprintf(w, "<!-- %s -->", n.Data)
+       case DoctypeNode:
+               fmt.Fprintf(w, "<!DOCTYPE %s", n.Data)
+               if n.Attr != nil {
+                       var p, s string
+                       for _, a := range n.Attr {
+                               switch a.Key {
+                               case "public":
+                                       p = a.Val
+                               case "system":
+                                       s = a.Val
+                               }
+                       }
+                       if p != "" || s != "" {
+                               fmt.Fprintf(w, ` "%s"`, p)
+                               fmt.Fprintf(w, ` "%s"`, s)
+                       }
+               }
+               io.WriteString(w, ">")
+       case scopeMarkerNode:
+               return errors.New("unexpected scopeMarkerNode")
+       default:
+               return errors.New("unknown node type")
+       }
+       io.WriteString(w, "\n")
+       for c := n.FirstChild; c != nil; c = c.NextSibling {
+               if err := dumpLevel(w, c, level); err != nil {
+                       return err
+               }
+       }
+       return nil
+}
+
+func dump(n *Node) (string, error) {
+       if n == nil || n.FirstChild == nil {
+               return "", nil
+       }
+       var b bytes.Buffer
+       for c := n.FirstChild; c != nil; c = c.NextSibling {
+               if err := dumpLevel(&b, c, 0); err != nil {
+                       return "", err
+               }
+       }
+       return b.String(), nil
+}
+
+var testDataDirs = []string{"testdata/webkit/", "testdata/go/"}
+
+func TestParser(t *testing.T) {
+       for _, testDataDir := range testDataDirs {
+               testFiles, err := filepath.Glob(testDataDir + "*.dat")
+               if err != nil {
+                       t.Fatal(err)
+               }
+               for _, tf := range testFiles {
+                       f, err := os.Open(tf)
+                       if err != nil {
+                               t.Fatal(err)
+                       }
+                       defer f.Close()
+                       r := bufio.NewReader(f)
+
+                       for i := 0; ; i++ {
+                               ta, err := readParseTest(r)
+                               if err == io.EOF {
+                                       break
+                               }
+                               if err != nil {
+                                       t.Fatal(err)
+                               }
+                               if parseTestBlacklist[ta.text] {
+                                       continue
+                               }
+
+                               err = testParseCase(ta.text, ta.want, ta.context, ParseOptionEnableScripting(ta.scripting))
+
+                               if err != nil {
+                                       t.Errorf("%s test #%d %q, %s", tf, i, ta.text, err)
+                               }
+                       }
+               }
+       }
+}
+
+// Issue 16318
+func TestParserWithoutScripting(t *testing.T) {
+       text := `<noscript><img src='https://golang.org/doc/gopher/frontpage.png' /></noscript><p><img src='https://golang.org/doc/gopher/doc.png' /></p>`
+       want := `| <html>
+|   <head>
+|     <noscript>
+|   <body>
+|     <img>
+|       src="https://golang.org/doc/gopher/frontpage.png"
+|     <p>
+|       <img>
+|         src="https://golang.org/doc/gopher/doc.png"
+`
+
+       if err := testParseCase(text, want, "", ParseOptionEnableScripting(false)); err != nil {
+               t.Errorf("test with scripting is disabled, %q, %s", text, err)
+       }
+}
+
+// testParseCase tests one test case from the test files. If the test does not
+// pass, it returns an error that explains the failure.
+// text is the HTML to be parsed, want is a dump of the correct parse tree,
+// and context is the name of the context node, if any.
+func testParseCase(text, want, context string, opts ...ParseOption) (err error) {
+       defer func() {
+               if x := recover(); x != nil {
+                       switch e := x.(type) {
+                       case error:
+                               err = e
+                       default:
+                               err = fmt.Errorf("%v", e)
+                       }
+               }
+       }()
+
+       var doc *Node
+       if context == "" {
+               doc, err = ParseWithOptions(strings.NewReader(text), opts...)
+               if err != nil {
+                       return err
+               }
+       } else {
+               namespace := ""
+               if i := strings.IndexByte(context, ' '); i >= 0 {
+                       namespace, context = context[:i], context[i+1:]
+               }
+               contextNode := &Node{
+                       Data:      context,
+                       DataAtom:  atom.Lookup([]byte(context)),
+                       Namespace: namespace,
+                       Type:      ElementNode,
+               }
+               nodes, err := ParseFragmentWithOptions(strings.NewReader(text), contextNode, opts...)
+               if err != nil {
+                       return err
+               }
+               doc = &Node{
+                       Type: DocumentNode,
+               }
+               for _, n := range nodes {
+                       doc.AppendChild(n)
+               }
+       }
+
+       if err := checkTreeConsistency(doc); err != nil {
+               return err
+       }
+
+       got, err := dump(doc)
+       if err != nil {
+               return err
+       }
+       // Compare the parsed tree to the #document section.
+       if got != want {
+               return fmt.Errorf("got vs want:\n----\n%s----\n%s----", got, want)
+       }
+
+       if renderTestBlacklist[text] || context != "" {
+               return nil
+       }
+
+       // Check that rendering and re-parsing results in an identical tree.
+       pr, pw := io.Pipe()
+       go func() {
+               pw.CloseWithError(Render(pw, doc))
+       }()
+       doc1, err := ParseWithOptions(pr, opts...)
+       if err != nil {
+               return err
+       }
+       got1, err := dump(doc1)
+       if err != nil {
+               return err
+       }
+       if got != got1 {
+               return fmt.Errorf("got vs got1:\n----\n%s----\n%s----", got, got1)
+       }
+
+       return nil
+}
+
+// Some test inputs are simply skipped - we would otherwise fail the test. We
+// blacklist such inputs from the parse test.
+var parseTestBlacklist = map[string]bool{
+       // See the a.Template TODO in inHeadIM.
+       `<math><template><mo><template>`:                                     true,
+       `<template><svg><foo><template><foreignObject><div></template><div>`: true,
+}
+
+// Some test input result in parse trees are not 'well-formed' despite
+// following the HTML5 recovery algorithms. Rendering and re-parsing such a
+// tree will not result in an exact clone of that tree. We blacklist such
+// inputs from the render test.
+var renderTestBlacklist = map[string]bool{
+       // The second <a> will be reparented to the first <table>'s parent. This
+       // results in an <a> whose parent is an <a>, which is not 'well-formed'.
+       `<a><table><td><a><table></table><a></tr><a></table><b>X</b>C<a>Y`: true,
+       // The same thing with a <p>:
+       `<p><table></p>`: true,
+       // More cases of <a> being reparented:
+       `<a href="blah">aba<table><a href="foo">br<tr><td></td></tr>x</table>aoe`: true,
+       `<a><table><a></table><p><a><div><a>`:                                     true,
+       `<a><table><td><a><table></table><a></tr><a></table><a>`:                  true,
+       `<template><a><table><a>`:                                                 true,
+       // A similar reparenting situation involving <nobr>:
+       `<!DOCTYPE html><body><b><nobr>1<table><nobr></b><i><nobr>2<nobr></i>3`: true,
+       // A <plaintext> element is reparented, putting it before a table.
+       // A <plaintext> element can't have anything after it in HTML.
+       `<table><plaintext><td>`:                                   true,
+       `<!doctype html><table><plaintext></plaintext>`:            true,
+       `<!doctype html><table><tbody><plaintext></plaintext>`:     true,
+       `<!doctype html><table><tbody><tr><plaintext></plaintext>`: true,
+       // A form inside a table inside a form doesn't work either.
+       `<!doctype html><form><table></form><form></table></form>`: true,
+       // A script that ends at EOF may escape its own closing tag when rendered.
+       `<!doctype html><script><!--<script `:          true,
+       `<!doctype html><script><!--<script <`:         true,
+       `<!doctype html><script><!--<script <a`:        true,
+       `<!doctype html><script><!--<script </`:        true,
+       `<!doctype html><script><!--<script </s`:       true,
+       `<!doctype html><script><!--<script </script`:  true,
+       `<!doctype html><script><!--<script </scripta`: true,
+       `<!doctype html><script><!--<script -`:         true,
+       `<!doctype html><script><!--<script -a`:        true,
+       `<!doctype html><script><!--<script -<`:        true,
+       `<!doctype html><script><!--<script --`:        true,
+       `<!doctype html><script><!--<script --a`:       true,
+       `<!doctype html><script><!--<script --<`:       true,
+       `<script><!--<script `:                         true,
+       `<script><!--<script <a`:                       true,
+       `<script><!--<script </script`:                 true,
+       `<script><!--<script </scripta`:                true,
+       `<script><!--<script -`:                        true,
+       `<script><!--<script -a`:                       true,
+       `<script><!--<script --`:                       true,
+       `<script><!--<script --a`:                      true,
+       `<script><!--<script <`:                        true,
+       `<script><!--<script </`:                       true,
+       `<script><!--<script </s`:                      true,
+       // Reconstructing the active formatting elements results in a <plaintext>
+       // element that contains an <a> element.
+       `<!doctype html><p><a><plaintext>b`:                       true,
+       `<table><math><select><mi><select></table>`:               true,
+       `<!doctype html><table><colgroup><plaintext></plaintext>`: true,
+       `<!doctype html><svg><plaintext>a</plaintext>b`:           true,
+}
+
+func TestNodeConsistency(t *testing.T) {
+       // inconsistentNode is a Node whose DataAtom and Data do not agree.
+       inconsistentNode := &Node{
+               Type:     ElementNode,
+               DataAtom: atom.Frameset,
+               Data:     "table",
+       }
+       if _, err := ParseFragment(strings.NewReader("<p>hello</p>"), inconsistentNode); err == nil {
+               t.Errorf("got nil error, want non-nil")
+       }
+}
+
+func TestParseFragmentWithNilContext(t *testing.T) {
+       // This shouldn't panic.
+       ParseFragment(strings.NewReader("<p>hello</p>"), nil)
+}
+
+func TestParseFragmentForeignContentTemplates(t *testing.T) {
+       srcs := []string{
+               "<math><html><template><mn><template></template></template>",
+               "<math><math><head><mi><template>",
+       }
+       for _, src := range srcs {
+               // The next line shouldn't infinite-loop.
+               ParseFragment(strings.NewReader(src), nil)
+       }
+}
+
+func BenchmarkParser(b *testing.B) {
+       buf, err := os.ReadFile("testdata/go1.html")
+       if err != nil {
+               b.Fatalf("could not read testdata/go1.html: %v", err)
+       }
+       b.SetBytes(int64(len(buf)))
+       runtime.GC()
+       b.ReportAllocs()
+       b.ResetTimer()
+       for i := 0; i < b.N; i++ {
+               Parse(bytes.NewBuffer(buf))
+       }
+}
diff --git a/internal/html/render.go b/internal/html/render.go
new file mode 100644 (file)
index 0000000..e8c1233
--- /dev/null
@@ -0,0 +1,293 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+       "bufio"
+       "errors"
+       "fmt"
+       "io"
+       "strings"
+)
+
+type writer interface {
+       io.Writer
+       io.ByteWriter
+       WriteString(string) (int, error)
+}
+
+// Render renders the parse tree n to the given writer.
+//
+// Rendering is done on a 'best effort' basis: calling Parse on the output of
+// Render will always result in something similar to the original tree, but it
+// is not necessarily an exact clone unless the original tree was 'well-formed'.
+// 'Well-formed' is not easily specified; the HTML5 specification is
+// complicated.
+//
+// Calling Parse on arbitrary input typically results in a 'well-formed' parse
+// tree. However, it is possible for Parse to yield a 'badly-formed' parse tree.
+// For example, in a 'well-formed' parse tree, no <a> element is a child of
+// another <a> element: parsing "<a><a>" results in two sibling elements.
+// Similarly, in a 'well-formed' parse tree, no <a> element is a child of a
+// <table> element: parsing "<p><table><a>" results in a <p> with two sibling
+// children; the <a> is reparented to the <table>'s parent. However, calling
+// Parse on "<a><table><a>" does not return an error, but the result has an <a>
+// element with an <a> child, and is therefore not 'well-formed'.
+//
+// Programmatically constructed trees are typically also 'well-formed', but it
+// is possible to construct a tree that looks innocuous but, when rendered and
+// re-parsed, results in a different tree. A simple example is that a solitary
+// text node would become a tree containing <html>, <head> and <body> elements.
+// Another example is that the programmatic equivalent of "a<head>b</head>c"
+// becomes "<html><head><head/><body>abc</body></html>".
+func Render(w io.Writer, n *Node) error {
+       if x, ok := w.(writer); ok {
+               return render(x, n)
+       }
+       buf := bufio.NewWriter(w)
+       if err := render(buf, n); err != nil {
+               return err
+       }
+       return buf.Flush()
+}
+
+// plaintextAbort is returned from render1 when a <plaintext> element
+// has been rendered. No more end tags should be rendered after that.
+var plaintextAbort = errors.New("html: internal error (plaintext abort)")
+
+func render(w writer, n *Node) error {
+       err := render1(w, n)
+       if err == plaintextAbort {
+               err = nil
+       }
+       return err
+}
+
+func render1(w writer, n *Node) error {
+       // Render non-element nodes; these are the easy cases.
+       switch n.Type {
+       case ErrorNode:
+               return errors.New("html: cannot render an ErrorNode node")
+       case TextNode:
+               return escape(w, n.Data)
+       case DocumentNode:
+               for c := n.FirstChild; c != nil; c = c.NextSibling {
+                       if err := render1(w, c); err != nil {
+                               return err
+                       }
+               }
+               return nil
+       case ElementNode:
+               // No-op.
+       case CommentNode:
+               if _, err := w.WriteString("<!--"); err != nil {
+                       return err
+               }
+               if err := escapeComment(w, n.Data); err != nil {
+                       return err
+               }
+               if _, err := w.WriteString("-->"); err != nil {
+                       return err
+               }
+               return nil
+       case DoctypeNode:
+               if _, err := w.WriteString("<!DOCTYPE "); err != nil {
+                       return err
+               }
+               if err := escape(w, n.Data); err != nil {
+                       return err
+               }
+               if n.Attr != nil {
+                       var p, s string
+                       for _, a := range n.Attr {
+                               switch a.Key {
+                               case "public":
+                                       p = a.Val
+                               case "system":
+                                       s = a.Val
+                               }
+                       }
+                       if p != "" {
+                               if _, err := w.WriteString(" PUBLIC "); err != nil {
+                                       return err
+                               }
+                               if err := writeQuoted(w, p); err != nil {
+                                       return err
+                               }
+                               if s != "" {
+                                       if err := w.WriteByte(' '); err != nil {
+                                               return err
+                                       }
+                                       if err := writeQuoted(w, s); err != nil {
+                                               return err
+                                       }
+                               }
+                       } else if s != "" {
+                               if _, err := w.WriteString(" SYSTEM "); err != nil {
+                                       return err
+                               }
+                               if err := writeQuoted(w, s); err != nil {
+                                       return err
+                               }
+                       }
+               }
+               return w.WriteByte('>')
+       case RawNode:
+               _, err := w.WriteString(n.Data)
+               return err
+       default:
+               return errors.New("html: unknown node type")
+       }
+
+       // Render the <xxx> opening tag.
+       if err := w.WriteByte('<'); err != nil {
+               return err
+       }
+       if _, err := w.WriteString(n.Data); err != nil {
+               return err
+       }
+       for _, a := range n.Attr {
+               if err := w.WriteByte(' '); err != nil {
+                       return err
+               }
+               if a.Namespace != "" {
+                       if _, err := w.WriteString(a.Namespace); err != nil {
+                               return err
+                       }
+                       if err := w.WriteByte(':'); err != nil {
+                               return err
+                       }
+               }
+               if _, err := w.WriteString(a.Key); err != nil {
+                       return err
+               }
+               if _, err := w.WriteString(`="`); err != nil {
+                       return err
+               }
+               if err := escape(w, a.Val); err != nil {
+                       return err
+               }
+               if err := w.WriteByte('"'); err != nil {
+                       return err
+               }
+       }
+       if voidElements[n.Data] {
+               if n.FirstChild != nil {
+                       return fmt.Errorf("html: void element <%s> has child nodes", n.Data)
+               }
+               _, err := w.WriteString("/>")
+               return err
+       }
+       if err := w.WriteByte('>'); err != nil {
+               return err
+       }
+
+       // Add initial newline where there is danger of a newline beging ignored.
+       if c := n.FirstChild; c != nil && c.Type == TextNode && strings.HasPrefix(c.Data, "\n") {
+               switch n.Data {
+               case "pre", "listing", "textarea":
+                       if err := w.WriteByte('\n'); err != nil {
+                               return err
+                       }
+               }
+       }
+
+       // Render any child nodes
+       if childTextNodesAreLiteral(n) {
+               for c := n.FirstChild; c != nil; c = c.NextSibling {
+                       if c.Type == TextNode {
+                               if _, err := w.WriteString(c.Data); err != nil {
+                                       return err
+                               }
+                       } else {
+                               if err := render1(w, c); err != nil {
+                                       return err
+                               }
+                       }
+               }
+               if n.Data == "plaintext" {
+                       // Don't render anything else. <plaintext> must be the
+                       // last element in the file, with no closing tag.
+                       return plaintextAbort
+               }
+       } else {
+               for c := n.FirstChild; c != nil; c = c.NextSibling {
+                       if err := render1(w, c); err != nil {
+                               return err
+                       }
+               }
+       }
+
+       // Render the </xxx> closing tag.
+       if _, err := w.WriteString("</"); err != nil {
+               return err
+       }
+       if _, err := w.WriteString(n.Data); err != nil {
+               return err
+       }
+       return w.WriteByte('>')
+}
+
+func childTextNodesAreLiteral(n *Node) bool {
+       // Per WHATWG HTML 13.3, if the parent of the current node is a style,
+       // script, xmp, iframe, noembed, noframes, or plaintext element, and the
+       // current node is a text node, append the value of the node's data
+       // literally. The specification is not explicit about it, but we only
+       // enforce this if we are in the HTML namespace (i.e. when the namespace is
+       // "").
+       // NOTE: we also always include noscript elements, although the
+       // specification states that they should only be rendered as such if
+       // scripting is enabled for the node (which is not something we track).
+       if n.Namespace != "" {
+               return false
+       }
+       switch n.Data {
+       case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp":
+               return true
+       default:
+               return false
+       }
+}
+
+// writeQuoted writes s to w surrounded by quotes. Normally it will use double
+// quotes, but if s contains a double quote, it will use single quotes.
+// It is used for writing the identifiers in a doctype declaration.
+// In valid HTML, they can't contain both types of quotes.
+func writeQuoted(w writer, s string) error {
+       var q byte = '"'
+       if strings.Contains(s, `"`) {
+               q = '\''
+       }
+       if err := w.WriteByte(q); err != nil {
+               return err
+       }
+       if _, err := w.WriteString(s); err != nil {
+               return err
+       }
+       if err := w.WriteByte(q); err != nil {
+               return err
+       }
+       return nil
+}
+
+// Section 12.1.2, "Elements", gives this list of void elements. Void elements
+// are those that can't have any contents.
+var voidElements = map[string]bool{
+       "area":   true,
+       "base":   true,
+       "br":     true,
+       "col":    true,
+       "embed":  true,
+       "hr":     true,
+       "img":    true,
+       "input":  true,
+       "keygen": true, // "keygen" has been removed from the spec, but are kept here for backwards compatibility.
+       "link":   true,
+       "meta":   true,
+       "param":  true,
+       "source": true,
+       "track":  true,
+       "wbr":    true,
+}
diff --git a/internal/html/render_test.go b/internal/html/render_test.go
new file mode 100644 (file)
index 0000000..22d0864
--- /dev/null
@@ -0,0 +1,207 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+       "bytes"
+       "fmt"
+       "strings"
+       "testing"
+)
+
+func TestRenderer(t *testing.T) {
+       nodes := [...]*Node{
+               0: {
+                       Type: ElementNode,
+                       Data: "html",
+               },
+               1: {
+                       Type: ElementNode,
+                       Data: "head",
+               },
+               2: {
+                       Type: ElementNode,
+                       Data: "body",
+               },
+               3: {
+                       Type: TextNode,
+                       Data: "0<1",
+               },
+               4: {
+                       Type: ElementNode,
+                       Data: "p",
+                       Attr: []Attribute{
+                               {
+                                       Key: "id",
+                                       Val: "A",
+                               },
+                               {
+                                       Key: "foo",
+                                       Val: `abc"def`,
+                               },
+                       },
+               },
+               5: {
+                       Type: TextNode,
+                       Data: "2",
+               },
+               6: {
+                       Type: ElementNode,
+                       Data: "b",
+                       Attr: []Attribute{
+                               {
+                                       Key: "empty",
+                                       Val: "",
+                               },
+                       },
+               },
+               7: {
+                       Type: TextNode,
+                       Data: "3",
+               },
+               8: {
+                       Type: ElementNode,
+                       Data: "i",
+                       Attr: []Attribute{
+                               {
+                                       Key: "backslash",
+                                       Val: `\`,
+                               },
+                       },
+               },
+               9: {
+                       Type: TextNode,
+                       Data: "&4",
+               },
+               10: {
+                       Type: TextNode,
+                       Data: "5",
+               },
+               11: {
+                       Type: ElementNode,
+                       Data: "blockquote",
+               },
+               12: {
+                       Type: ElementNode,
+                       Data: "br",
+               },
+               13: {
+                       Type: TextNode,
+                       Data: "6",
+               },
+               14: {
+                       Type: CommentNode,
+                       Data: "comm",
+               },
+               15: {
+                       Type: CommentNode,
+                       Data: "x-->y", // Needs escaping.
+               },
+               16: {
+                       Type: RawNode,
+                       Data: "7<pre>8</pre>9",
+               },
+       }
+
+       // Build a tree out of those nodes, based on a textual representation.
+       // Only the ".\t"s are significant. The trailing HTML-like text is
+       // just commentary. The "0:" prefixes are for easy cross-reference with
+       // the nodes array.
+       treeAsText := [...]string{
+               0:  `<html>`,
+               1:  `.  <head>`,
+               2:  `.  <body>`,
+               3:  `.  .       "0&lt;1"`,
+               4:  `.  .       <p id="A" foo="abc&#34;def">`,
+               5:  `.  .       .       "2"`,
+               6:  `.  .       .       <b empty="">`,
+               7:  `.  .       .       .       "3"`,
+               8:  `.  .       .       <i backslash="\">`,
+               9:  `.  .       .       .       "&amp;4"`,
+               10: `.  .       "5"`,
+               11: `.  .       <blockquote>`,
+               12: `.  .       <br>`,
+               13: `.  .       "6"`,
+               14: `.  .       "<!--comm-->"`,
+               15: `.  .       "<!--x--&gt;y-->"`,
+               16: `.  .       "7<pre>8</pre>9"`,
+       }
+       if len(nodes) != len(treeAsText) {
+               t.Fatal("len(nodes) != len(treeAsText)")
+       }
+       var stack [8]*Node
+       for i, line := range treeAsText {
+               level := 0
+               for line[0] == '.' {
+                       // Strip a leading ".\t".
+                       line = line[2:]
+                       level++
+               }
+               n := nodes[i]
+               if level == 0 {
+                       if stack[0] != nil {
+                               t.Fatal("multiple root nodes")
+                       }
+                       stack[0] = n
+               } else {
+                       stack[level-1].AppendChild(n)
+                       stack[level] = n
+                       for i := level + 1; i < len(stack); i++ {
+                               stack[i] = nil
+                       }
+               }
+               // At each stage of tree construction, we check all nodes for consistency.
+               for j, m := range nodes {
+                       if err := checkNodeConsistency(m); err != nil {
+                               t.Fatalf("i=%d, j=%d: %v", i, j, err)
+                       }
+               }
+       }
+
+       want := `<html><head></head><body>0&lt;1<p id="A" foo="abc&#34;def">` +
+               `2<b empty="">3</b><i backslash="\">&amp;4</i></p>` +
+               `5<blockquote></blockquote><br/>6<!--comm--><!--x--&gt;y-->7<pre>8</pre>9</body></html>`
+       b := new(bytes.Buffer)
+       if err := Render(b, nodes[0]); err != nil {
+               t.Fatal(err)
+       }
+       if got := b.String(); got != want {
+               t.Errorf("got vs want:\n%s\n%s\n", got, want)
+       }
+}
+
+func TestRenderTextNodes(t *testing.T) {
+       elements := []string{"style", "script", "xmp", "iframe", "noembed", "noframes", "plaintext", "noscript"}
+       for _, namespace := range []string{
+               "", // html
+               "svg",
+               "math",
+       } {
+               for _, e := range elements {
+                       var namespaceOpen, namespaceClose string
+                       if namespace != "" {
+                               namespaceOpen, namespaceClose = fmt.Sprintf("<%s>", namespace), fmt.Sprintf("</%s>", namespace)
+                       }
+                       doc := fmt.Sprintf(`<html><head></head><body>%s<%s>&</%s>%s</body></html>`, namespaceOpen, e, e, namespaceClose)
+                       n, err := Parse(strings.NewReader(doc))
+                       if err != nil {
+                               t.Fatal(err)
+                       }
+                       b := bytes.NewBuffer(nil)
+                       if err := Render(b, n); err != nil {
+                               t.Fatal(err)
+                       }
+
+                       expected := doc
+                       if namespace != "" {
+                               expected = strings.Replace(expected, "&", "&amp;", 1)
+                       }
+
+                       if b.String() != expected {
+                               t.Errorf("unexpected output: got %q, want %q", b.String(), expected)
+                       }
+               }
+       }
+}
diff --git a/internal/html/testdata/go/issue_30600_parse_panics_in_cell_mode.dat b/internal/html/testdata/go/issue_30600_parse_panics_in_cell_mode.dat
new file mode 100644 (file)
index 0000000..741f4b1
--- /dev/null
@@ -0,0 +1,12 @@
+#data
+<table><math><th><mo><select></table>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math th>
+|         <math mo>
+|           <select>
+|     <table>
diff --git a/internal/html/testdata/go/issue_30961_error_nested_unknown_tag_types.dat b/internal/html/testdata/go/issue_30961_error_nested_unknown_tag_types.dat
new file mode 100644 (file)
index 0000000..e314964
--- /dev/null
@@ -0,0 +1,11 @@
+#data
+<html><head></head><body><tag1><tag2 /><p></p></tag1><div></div></body></html>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <tag1>
+|       <tag2>
+|         <p>
+|     <div>
diff --git a/internal/html/testdata/go/raw_tags_to_be_ignored.dat b/internal/html/testdata/go/raw_tags_to_be_ignored.dat
new file mode 100644 (file)
index 0000000..50bac59
--- /dev/null
@@ -0,0 +1,97 @@
+#data
+<!doctype html><table><select><iframe>a<caption>b
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       "a"
+|     <table>
+|       <caption>
+|         "b"
+
+#data
+<!doctype html><table><select><noembed>a<caption>b
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       "a"
+|     <table>
+|       <caption>
+|         "b"
+
+#data
+<!doctype html><table><select><noframes>a<caption>b
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       "a"
+|     <table>
+|       <caption>
+|         "b"
+
+#data
+<!doctype html><table><select><noscript>a<caption>b
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       "a"
+|     <table>
+|       <caption>
+|         "b"
+
+#data
+<!doctype html><table><select><style>a<caption>b
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       "a"
+|     <table>
+|       <caption>
+|         "b"
+
+#data
+<!doctype html><table><select><title>a<caption>b
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       "a"
+|     <table>
+|       <caption>
+|         "b"
+
+#data
+<!doctype html><table><select><xmp>a<caption>b
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       "a"
+|     <table>
+|       <caption>
+|         "b"
diff --git a/internal/html/testdata/go/select.dat b/internal/html/testdata/go/select.dat
new file mode 100644 (file)
index 0000000..684554c
--- /dev/null
@@ -0,0 +1,12 @@
+#data
+<table><math><select><mi><select></table>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math select>
+|         <math mi>
+|           <select>
+|     <table>
diff --git a/internal/html/testdata/go/template.dat b/internal/html/testdata/go/template.dat
new file mode 100644 (file)
index 0000000..b923b0f
--- /dev/null
@@ -0,0 +1,64 @@
+#data
+<body><template><yt-icon-button></yt-icon-button><form><paper-input></paper-input></form><style></style></template>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <yt-icon-button>
+|         <form>
+|           <paper-input>
+|         <style>
+
+#data
+<template><tBody><isindex/action=0>
+#errors
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <tbody>
+|         <isindex>
+|           action="0"
+|   <body>
+
+#data
+<math><template><mo><template>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math template>
+|         <math mo>
+|           <template>
+|             content
+
+#data
+<svg><template><desc><t><svg></template>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg template>
+|         <svg desc>
+|           <t>
+|             <svg svg>
+
+#data
+<math><template><mn><b></template>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math template>
+|         <math mn>
+|           <b>
diff --git a/internal/html/testdata/go1.html b/internal/html/testdata/go1.html
new file mode 100644 (file)
index 0000000..086c011
--- /dev/null
@@ -0,0 +1,2237 @@
+<!DOCTYPE html>
+<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+
+  <title>Go 1 Release Notes - The Go Programming Language</title>
+
+<link type="text/css" rel="stylesheet" href="/doc/style.css">
+<script type="text/javascript" src="/doc/godocs.js"></script>
+
+<link rel="search" type="application/opensearchdescription+xml" title="godoc" href="/opensearch.xml" />
+
+<script type="text/javascript">
+var _gaq = _gaq || [];
+_gaq.push(["_setAccount", "UA-11222381-2"]);
+_gaq.push(["_trackPageview"]);
+</script>
+</head>
+<body>
+
+<div id="topbar"><div class="container wide">
+
+<form method="GET" action="/search">
+<div id="menu">
+<a href="/doc/">Documents</a>
+<a href="/ref/">References</a>
+<a href="/pkg/">Packages</a>
+<a href="/project/">The Project</a>
+<a href="/help/">Help</a>
+<input type="text" id="search" name="q" class="inactive" value="Search">
+</div>
+<div id="heading"><a href="/">The Go Programming Language</a></div>
+</form>
+
+</div></div>
+
+<div id="page" class="wide">
+
+
+  <div id="minusone"><g:minusone size="small" annotation="none"></g:minusone></div>
+  <h1>Go 1 Release Notes</h1>
+
+
+
+
+<div id="nav"></div>
+
+
+
+
+<h2 id="introduction">Introduction to Go 1</h2>
+
+<p>
+Go version 1, Go 1 for short, defines a language and a set of core libraries
+that provide a stable foundation for creating reliable products, projects, and
+publications.
+</p>
+
+<p>
+The driving motivation for Go 1 is stability for its users. People should be able to
+write Go programs and expect that they will continue to compile and run without
+change, on a time scale of years, including in production environments such as
+Google App Engine. Similarly, people should be able to write books about Go, be
+able to say which version of Go the book is describing, and have that version
+number still be meaningful much later.
+</p>
+
+<p>
+Code that compiles in Go 1 should, with few exceptions, continue to compile and
+run throughout the lifetime of that version, even as we issue updates and bug
+fixes such as Go version 1.1, 1.2, and so on. Other than critical fixes, changes
+made to the language and library for subsequent releases of Go 1 may
+add functionality but will not break existing Go 1 programs.
+<a href="go1compat.html">The Go 1 compatibility document</a>
+explains the compatibility guidelines in more detail.
+</p>
+
+<p>
+Go 1 is a representation of Go as it used today, not a wholesale rethinking of
+the language. We avoided designing new features and instead focused on cleaning
+up problems and inconsistencies and improving portability. There are a number
+changes to the Go language and packages that we had considered for some time and
+prototyped but not released primarily because they are significant and
+backwards-incompatible. Go 1 was an opportunity to get them out, which is
+helpful for the long term, but also means that Go 1 introduces incompatibilities
+for old programs. Fortunately, the <code>go</code> <code>fix</code> tool can
+automate much of the work needed to bring programs up to the Go 1 standard.
+</p>
+
+<p>
+This document outlines the major changes in Go 1 that will affect programmers
+updating existing code; its reference point is the prior release, r60 (tagged as
+r60.3). It also explains how to update code from r60 to run under Go 1.
+</p>
+
+<h2 id="language">Changes to the language</h2>
+
+<h3 id="append">Append</h3>
+
+<p>
+The <code>append</code> predeclared variadic function makes it easy to grow a slice
+by adding elements to the end.
+A common use is to add bytes to the end of a byte slice when generating output.
+However, <code>append</code> did not provide a way to append a string to a <code>[]byte</code>,
+which is another common case.
+</p>
+
+<pre><!--{{code "/doc/progs/go1.go" `/greeting := ..byte/` `/append.*hello/`}}
+-->    greeting := []byte{}
+    greeting = append(greeting, []byte(&#34;hello &#34;)...)</pre>
+
+<p>
+By analogy with the similar property of <code>copy</code>, Go 1
+permits a string to be appended (byte-wise) directly to a byte
+slice, reducing the friction between strings and byte slices.
+The conversion is no longer necessary:
+</p>
+
+<pre><!--{{code "/doc/progs/go1.go" `/append.*world/`}}
+-->    greeting = append(greeting, &#34;world&#34;...)</pre>
+
+<p>
+<em>Updating</em>:
+This is a new feature, so existing code needs no changes.
+</p>
+
+<h3 id="close">Close</h3>
+
+<p>
+The <code>close</code> predeclared function provides a mechanism
+for a sender to signal that no more values will be sent.
+It is important to the implementation of <code>for</code> <code>range</code>
+loops over channels and is helpful in other situations.
+Partly by design and partly because of race conditions that can occur otherwise,
+it is intended for use only by the goroutine sending on the channel,
+not by the goroutine receiving data.
+However, before Go 1 there was no compile-time checking that <code>close</code>
+was being used correctly.
+</p>
+
+<p>
+To close this gap, at least in part, Go 1 disallows <code>close</code> on receive-only channels.
+Attempting to close such a channel is a compile-time error.
+</p>
+
+<pre>
+    var c chan int
+    var csend chan&lt;- int = c
+    var crecv &lt;-chan int = c
+    close(c)     // legal
+    close(csend) // legal
+    close(crecv) // illegal
+</pre>
+
+<p>
+<em>Updating</em>:
+Existing code that attempts to close a receive-only channel was
+erroneous even before Go 1 and should be fixed.  The compiler will
+now reject such code.
+</p>
+
+<h3 id="literals">Composite literals</h3>
+
+<p>
+In Go 1, a composite literal of array, slice, or map type can elide the
+type specification for the elements' initializers if they are of pointer type.
+All four of the initializations in this example are legal; the last one was illegal before Go 1.
+</p>
+
+<pre><!--{{code "/doc/progs/go1.go" `/type Date struct/` `/STOP/`}}
+-->    type Date struct {
+        month string
+        day   int
+    }
+    <span class="comment">// Struct values, fully qualified; always legal.</span>
+    holiday1 := []Date{
+        Date{&#34;Feb&#34;, 14},
+        Date{&#34;Nov&#34;, 11},
+        Date{&#34;Dec&#34;, 25},
+    }
+    <span class="comment">// Struct values, type name elided; always legal.</span>
+    holiday2 := []Date{
+        {&#34;Feb&#34;, 14},
+        {&#34;Nov&#34;, 11},
+        {&#34;Dec&#34;, 25},
+    }
+    <span class="comment">// Pointers, fully qualified, always legal.</span>
+    holiday3 := []*Date{
+        &amp;Date{&#34;Feb&#34;, 14},
+        &amp;Date{&#34;Nov&#34;, 11},
+        &amp;Date{&#34;Dec&#34;, 25},
+    }
+    <span class="comment">// Pointers, type name elided; legal in Go 1.</span>
+    holiday4 := []*Date{
+        {&#34;Feb&#34;, 14},
+        {&#34;Nov&#34;, 11},
+        {&#34;Dec&#34;, 25},
+    }</pre>
+
+<p>
+<em>Updating</em>:
+This change has no effect on existing code, but the command
+<code>gofmt</code> <code>-s</code> applied to existing source
+will, among other things, elide explicit element types wherever permitted.
+</p>
+
+
+<h3 id="init">Goroutines during init</h3>
+
+<p>
+The old language defined that <code>go</code> statements executed during initialization created goroutines but that they did not begin to run until initialization of the entire program was complete.
+This introduced clumsiness in many places and, in effect, limited the utility
+of the <code>init</code> construct:
+if it was possible for another package to use the library during initialization, the library
+was forced to avoid goroutines.
+This design was done for reasons of simplicity and safety but,
+as our confidence in the language grew, it seemed unnecessary.
+Running goroutines during initialization is no more complex or unsafe than running them during normal execution.
+</p>
+
+<p>
+In Go 1, code that uses goroutines can be called from
+<code>init</code> routines and global initialization expressions
+without introducing a deadlock.
+</p>
+
+<pre><!--{{code "/doc/progs/go1.go" `/PackageGlobal/` `/^}/`}}
+-->var PackageGlobal int
+
+func init() {
+    c := make(chan int)
+    go initializationFunction(c)
+    PackageGlobal = &lt;-c
+}</pre>
+
+<p>
+<em>Updating</em>:
+This is a new feature, so existing code needs no changes,
+although it's possible that code that depends on goroutines not starting before <code>main</code> will break.
+There was no such code in the standard repository.
+</p>
+
+<h3 id="rune">The rune type</h3>
+
+<p>
+The language spec allows the <code>int</code> type to be 32 or 64 bits wide, but current implementations set <code>int</code> to 32 bits even on 64-bit platforms.
+It would be preferable to have <code>int</code> be 64 bits on 64-bit platforms.
+(There are important consequences for indexing large slices.)
+However, this change would waste space when processing Unicode characters with
+the old language because the <code>int</code> type was also used to hold Unicode code points: each code point would waste an extra 32 bits of storage if <code>int</code> grew from 32 bits to 64.
+</p>
+
+<p>
+To make changing to 64-bit <code>int</code> feasible,
+Go 1 introduces a new basic type, <code>rune</code>, to represent
+individual Unicode code points.
+It is an alias for <code>int32</code>, analogous to <code>byte</code>
+as an alias for <code>uint8</code>.
+</p>
+
+<p>
+Character literals such as <code>'a'</code>, <code>'語'</code>, and <code>'\u0345'</code>
+now have default type <code>rune</code>,
+analogous to <code>1.0</code> having default type <code>float64</code>.
+A variable initialized to a character constant will therefore
+have type <code>rune</code> unless otherwise specified.
+</p>
+
+<p>
+Libraries have been updated to use <code>rune</code> rather than <code>int</code>
+when appropriate. For instance, the functions <code>unicode.ToLower</code> and
+relatives now take and return a <code>rune</code>.
+</p>
+
+<pre><!--{{code "/doc/progs/go1.go" `/STARTRUNE/` `/ENDRUNE/`}}
+-->    delta := &#39;δ&#39; <span class="comment">// delta has type rune.</span>
+    var DELTA rune
+    DELTA = unicode.ToUpper(delta)
+    epsilon := unicode.ToLower(DELTA + 1)
+    if epsilon != &#39;δ&#39;+1 {
+        log.Fatal(&#34;inconsistent casing for Greek&#34;)
+    }</pre>
+
+<p>
+<em>Updating</em>:
+Most source code will be unaffected by this because the type inference from
+<code>:=</code> initializers introduces the new type silently, and it propagates
+from there.
+Some code may get type errors that a trivial conversion will resolve.
+</p>
+
+<h3 id="error">The error type</h3>
+
+<p>
+Go 1 introduces a new built-in type, <code>error</code>, which has the following definition:
+</p>
+
+<pre>
+    type error interface {
+        Error() string
+    }
+</pre>
+
+<p>
+Since the consequences of this type are all in the package library,
+it is discussed <a href="#errors">below</a>.
+</p>
+
+<h3 id="delete">Deleting from maps</h3>
+
+<p>
+In the old language, to delete the entry with key <code>k</code> from map <code>m</code>, one wrote the statement,
+</p>
+
+<pre>
+    m[k] = value, false
+</pre>
+
+<p>
+This syntax was a peculiar special case, the only two-to-one assignment.
+It required passing a value (usually ignored) that is evaluated but discarded,
+plus a boolean that was nearly always the constant <code>false</code>.
+It did the job but was odd and a point of contention.
+</p>
+
+<p>
+In Go 1, that syntax has gone; instead there is a new built-in
+function, <code>delete</code>.  The call
+</p>
+
+<pre><!--{{code "/doc/progs/go1.go" `/delete\(m, k\)/`}}
+-->    delete(m, k)</pre>
+
+<p>
+will delete the map entry retrieved by the expression <code>m[k]</code>.
+There is no return value. Deleting a non-existent entry is a no-op.
+</p>
+
+<p>
+<em>Updating</em>:
+Running <code>go</code> <code>fix</code> will convert expressions of the form <code>m[k] = value,
+false</code> into <code>delete(m, k)</code> when it is clear that
+the ignored value can be safely discarded from the program and
+<code>false</code> refers to the predefined boolean constant.
+The fix tool
+will flag other uses of the syntax for inspection by the programmer.
+</p>
+
+<h3 id="iteration">Iterating in maps</h3>
+
+<p>
+The old language specification did not define the order of iteration for maps,
+and in practice it differed across hardware platforms.
+This caused tests that iterated over maps to be fragile and non-portable, with the
+unpleasant property that a test might always pass on one machine but break on another.
+</p>
+
+<p>
+In Go 1, the order in which elements are visited when iterating
+over a map using a <code>for</code> <code>range</code> statement
+is defined to be unpredictable, even if the same loop is run multiple
+times with the same map.
+Code should not assume that the elements are visited in any particular order.
+</p>
+
+<p>
+This change means that code that depends on iteration order is very likely to break early and be fixed long before it becomes a problem.
+Just as important, it allows the map implementation to ensure better map balancing even when programs are using range loops to select an element from a map.
+</p>
+
+<pre><!--{{code "/doc/progs/go1.go" `/Sunday/` `/^     }/`}}
+-->    m := map[string]int{&#34;Sunday&#34;: 0, &#34;Monday&#34;: 1}
+    for name, value := range m {
+        <span class="comment">// This loop should not assume Sunday will be visited first.</span>
+        f(name, value)
+    }</pre>
+
+<p>
+<em>Updating</em>:
+This is one change where tools cannot help.  Most existing code
+will be unaffected, but some programs may break or misbehave; we
+recommend manual checking of all range statements over maps to
+verify they do not depend on iteration order. There were a few such
+examples in the standard repository; they have been fixed.
+Note that it was already incorrect to depend on the iteration order, which
+was unspecified. This change codifies the unpredictability.
+</p>
+
+<h3 id="multiple_assignment">Multiple assignment</h3>
+
+<p>
+The language specification has long guaranteed that in assignments
+the right-hand-side expressions are all evaluated before any left-hand-side expressions are assigned.
+To guarantee predictable behavior,
+Go 1 refines the specification further.
+</p>
+
+<p>
+If the left-hand side of the assignment
+statement contains expressions that require evaluation, such as
+function calls or array indexing operations, these will all be done
+using the usual left-to-right rule before any variables are assigned
+their value.  Once everything is evaluated, the actual assignments
+proceed in left-to-right order.
+</p>
+
+<p>
+These examples illustrate the behavior.
+</p>
+
+<pre><!--{{code "/doc/progs/go1.go" `/sa :=/` `/then sc.0. = 2/`}}
+-->    sa := []int{1, 2, 3}
+    i := 0
+    i, sa[i] = 1, 2 <span class="comment">// sets i = 1, sa[0] = 2</span>
+
+    sb := []int{1, 2, 3}
+    j := 0
+    sb[j], j = 2, 1 <span class="comment">// sets sb[0] = 2, j = 1</span>
+
+    sc := []int{1, 2, 3}
+    sc[0], sc[0] = 1, 2 <span class="comment">// sets sc[0] = 1, then sc[0] = 2 (so sc[0] = 2 at end)</span></pre>
+
+<p>
+<em>Updating</em>:
+This is one change where tools cannot help, but breakage is unlikely.
+No code in the standard repository was broken by this change, and code
+that depended on the previous unspecified behavior was already incorrect.
+</p>
+
+<h3 id="shadowing">Returns and shadowed variables</h3>
+
+<p>
+A common mistake is to use <code>return</code> (without arguments) after an assignment to a variable that has the same name as a result variable but is not the same variable.
+This situation is called <em>shadowing</em>: the result variable has been shadowed by another variable with the same name declared in an inner scope.
+</p>
+
+<p>
+In functions with named return values,
+the Go 1 compilers disallow return statements without arguments if any of the named return values is shadowed at the point of the return statement.
+(It isn't part of the specification, because this is one area we are still exploring;
+the situation is analogous to the compilers rejecting functions that do not end with an explicit return statement.)
+</p>
+
+<p>
+This function implicitly returns a shadowed return value and will be rejected by the compiler:
+</p>
+
+<pre>
+    func Bug() (i, j, k int) {
+        for i = 0; i &lt; 5; i++ {
+            for j := 0; j &lt; 5; j++ { // Redeclares j.
+                k += i*j
+                if k > 100 {
+                    return // Rejected: j is shadowed here.
+                }
+            }
+        }
+        return // OK: j is not shadowed here.
+    }
+</pre>
+
+<p>
+<em>Updating</em>:
+Code that shadows return values in this way will be rejected by the compiler and will need to be fixed by hand.
+The few cases that arose in the standard repository were mostly bugs.
+</p>
+
+<h3 id="unexported">Copying structs with unexported fields</h3>
+
+<p>
+The old language did not allow a package to make a copy of a struct value containing unexported fields belonging to a different package.
+There was, however, a required exception for a method receiver;
+also, the implementations of <code>copy</code> and <code>append</code> have never honored the restriction.
+</p>
+
+<p>
+Go 1 will allow packages to copy struct values containing unexported fields from other packages.
+Besides resolving the inconsistency,
+this change admits a new kind of API: a package can return an opaque value without resorting to a pointer or interface.
+The new implementations of <code>time.Time</code> and
+<code>reflect.Value</code> are examples of types taking advantage of this new property.
+</p>
+
+<p>
+As an example, if package <code>p</code> includes the definitions,
+</p>
+
+<pre>
+    type Struct struct {
+        Public int
+        secret int
+    }
+    func NewStruct(a int) Struct {  // Note: not a pointer.
+        return Struct{a, f(a)}
+    }
+    func (s Struct) String() string {
+        return fmt.Sprintf("{%d (secret %d)}", s.Public, s.secret)
+    }
+</pre>
+
+<p>
+a package that imports <code>p</code> can assign and copy values of type
+<code>p.Struct</code> at will.
+Behind the scenes the unexported fields will be assigned and copied just
+as if they were exported,
+but the client code will never be aware of them. The code
+</p>
+
+<pre>
+    import "p"
+
+    myStruct := p.NewStruct(23)
+    copyOfMyStruct := myStruct
+    fmt.Println(myStruct, copyOfMyStruct)
+</pre>
+
+<p>
+will show that the secret field of the struct has been copied to the new value.
+</p>
+
+<p>
+<em>Updating</em>:
+This is a new feature, so existing code needs no changes.
+</p>
+
+<h3 id="equality">Equality</h3>
+
+<p>
+Before Go 1, the language did not define equality on struct and array values.
+This meant,
+among other things, that structs and arrays could not be used as map keys.
+On the other hand, Go did define equality on function and map values.
+Function equality was problematic in the presence of closures
+(when are two closures equal?)
+while map equality compared pointers, not the maps' content, which was usually
+not what the user would want.
+</p>
+
+<p>
+Go 1 addressed these issues.
+First, structs and arrays can be compared for equality and inequality
+(<code>==</code> and <code>!=</code>),
+and therefore be used as map keys,
+provided they are composed from elements for which equality is also defined,
+using element-wise comparison.
+</p>
+
+<pre><!--{{code "/doc/progs/go1.go" `/type Day struct/` `/Printf/`}}
+-->    type Day struct {
+        long  string
+        short string
+    }
+    Christmas := Day{&#34;Christmas&#34;, &#34;XMas&#34;}
+    Thanksgiving := Day{&#34;Thanksgiving&#34;, &#34;Turkey&#34;}
+    holiday := map[Day]bool{
+        Christmas:    true,
+        Thanksgiving: true,
+    }
+    fmt.Printf(&#34;Christmas is a holiday: %t\n&#34;, holiday[Christmas])</pre>
+
+<p>
+Second, Go 1 removes the definition of equality for function values,
+except for comparison with <code>nil</code>.
+Finally, map equality is gone too, also except for comparison with <code>nil</code>.
+</p>
+
+<p>
+Note that equality is still undefined for slices, for which the
+calculation is in general infeasible.  Also note that the ordered
+comparison operators (<code>&lt;</code> <code>&lt;=</code>
+<code>&gt;</code> <code>&gt;=</code>) are still undefined for
+structs and arrays.
+
+<p>
+<em>Updating</em>:
+Struct and array equality is a new feature, so existing code needs no changes.
+Existing code that depends on function or map equality will be
+rejected by the compiler and will need to be fixed by hand.
+Few programs will be affected, but the fix may require some
+redesign.
+</p>
+
+<h2 id="packages">The package hierarchy</h2>
+
+<p>
+Go 1 addresses many deficiencies in the old standard library and
+cleans up a number of packages, making them more internally consistent
+and portable.
+</p>
+
+<p>
+This section describes how the packages have been rearranged in Go 1.
+Some have moved, some have been renamed, some have been deleted.
+New packages are described in later sections.
+</p>
+
+<h3 id="hierarchy">The package hierarchy</h3>
+
+<p>
+Go 1 has a rearranged package hierarchy that groups related items
+into subdirectories. For instance, <code>utf8</code> and
+<code>utf16</code> now occupy subdirectories of <code>unicode</code>.
+Also, <a href="#subrepo">some packages</a> have moved into
+subrepositories of
+<a href="http://code.google.com/p/go"><code>code.google.com/p/go</code></a>
+while <a href="#deleted">others</a> have been deleted outright.
+</p>
+
+<table class="codetable" frame="border" summary="Moved packages">
+<colgroup align="left" width="60%"></colgroup>
+<colgroup align="left" width="40%"></colgroup>
+<tr>
+<th align="left">Old path</th>
+<th align="left">New path</th>
+</tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>asn1</td> <td>encoding/asn1</td></tr>
+<tr><td>csv</td> <td>encoding/csv</td></tr>
+<tr><td>gob</td> <td>encoding/gob</td></tr>
+<tr><td>json</td> <td>encoding/json</td></tr>
+<tr><td>xml</td> <td>encoding/xml</td></tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>exp/template/html</td> <td>html/template</td></tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>big</td> <td>math/big</td></tr>
+<tr><td>cmath</td> <td>math/cmplx</td></tr>
+<tr><td>rand</td> <td>math/rand</td></tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>http</td> <td>net/http</td></tr>
+<tr><td>http/cgi</td> <td>net/http/cgi</td></tr>
+<tr><td>http/fcgi</td> <td>net/http/fcgi</td></tr>
+<tr><td>http/httptest</td> <td>net/http/httptest</td></tr>
+<tr><td>http/pprof</td> <td>net/http/pprof</td></tr>
+<tr><td>mail</td> <td>net/mail</td></tr>
+<tr><td>rpc</td> <td>net/rpc</td></tr>
+<tr><td>rpc/jsonrpc</td> <td>net/rpc/jsonrpc</td></tr>
+<tr><td>smtp</td> <td>net/smtp</td></tr>
+<tr><td>url</td> <td>net/url</td></tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>exec</td> <td>os/exec</td></tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>scanner</td> <td>text/scanner</td></tr>
+<tr><td>tabwriter</td> <td>text/tabwriter</td></tr>
+<tr><td>template</td> <td>text/template</td></tr>
+<tr><td>template/parse</td> <td>text/template/parse</td></tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>utf8</td> <td>unicode/utf8</td></tr>
+<tr><td>utf16</td> <td>unicode/utf16</td></tr>
+</table>
+
+<p>
+Note that the package names for the old <code>cmath</code> and
+<code>exp/template/html</code> packages have changed to <code>cmplx</code>
+and <code>template</code>.
+</p>
+
+<p>
+<em>Updating</em>:
+Running <code>go</code> <code>fix</code> will update all imports and package renames for packages that
+remain inside the standard repository.  Programs that import packages
+that are no longer in the standard repository will need to be edited
+by hand.
+</p>
+
+<h3 id="exp">The package tree exp</h3>
+
+<p>
+Because they are not standardized, the packages under the <code>exp</code> directory will not be available in the
+standard Go 1 release distributions, although they will be available in source code form
+in <a href="http://code.google.com/p/go/">the repository</a> for
+developers who wish to use them.
+</p>
+
+<p>
+Several packages have moved under <code>exp</code> at the time of Go 1's release:
+</p>
+
+<ul>
+<li><code>ebnf</code></li>
+<li><code>html</code><sup>&#8224;</sup></li>
+<li><code>go/types</code></li>
+</ul>
+
+<p>
+(<sup>&#8224;</sup>The <code>EscapeString</code> and <code>UnescapeString</code> types remain
+in package <code>html</code>.)
+</p>
+
+<p>
+All these packages are available under the same names, with the prefix <code>exp/</code>: <code>exp/ebnf</code> etc.
+</p>
+
+<p>
+Also, the <code>utf8.String</code> type has been moved to its own package, <code>exp/utf8string</code>.
+</p>
+
+<p>
+Finally, the <code>gotype</code> command now resides in <code>exp/gotype</code>, while
+<code>ebnflint</code> is now in <code>exp/ebnflint</code>.
+If they are installed, they now reside in <code>$GOROOT/bin/tool</code>.
+</p>
+
+<p>
+<em>Updating</em>:
+Code that uses packages in <code>exp</code> will need to be updated by hand,
+or else compiled from an installation that has <code>exp</code> available.
+The <code>go</code> <code>fix</code> tool or the compiler will complain about such uses.
+</p>
+
+<h3 id="old">The package tree old</h3>
+
+<p>
+Because they are deprecated, the packages under the <code>old</code> directory will not be available in the
+standard Go 1 release distributions, although they will be available in source code form for
+developers who wish to use them.
+</p>
+
+<p>
+The packages in their new locations are:
+</p>
+
+<ul>
+<li><code>old/netchan</code></li>
+<li><code>old/regexp</code></li>
+<li><code>old/template</code></li>
+</ul>
+
+<p>
+<em>Updating</em>:
+Code that uses packages now in <code>old</code> will need to be updated by hand,
+or else compiled from an installation that has <code>old</code> available.
+The <code>go</code> <code>fix</code> tool will warn about such uses.
+</p>
+
+<h3 id="deleted">Deleted packages</h3>
+
+<p>
+Go 1 deletes several packages outright:
+</p>
+
+<ul>
+<li><code>container/vector</code></li>
+<li><code>exp/datafmt</code></li>
+<li><code>go/typechecker</code></li>
+<li><code>try</code></li>
+</ul>
+
+<p>
+and also the command <code>gotry</code>.
+</p>
+
+<p>
+<em>Updating</em>:
+Code that uses <code>container/vector</code> should be updated to use
+slices directly.  See
+<a href="http://code.google.com/p/go-wiki/wiki/SliceTricks">the Go
+Language Community Wiki</a> for some suggestions.
+Code that uses the other packages (there should be almost zero) will need to be rethought.
+</p>
+
+<h3 id="subrepo">Packages moving to subrepositories</h3>
+
+<p>
+Go 1 has moved a number of packages into other repositories, usually sub-repositories of
+<a href="http://code.google.com/p/go/">the main Go repository</a>.
+This table lists the old and new import paths:
+
+<table class="codetable" frame="border" summary="Sub-repositories">
+<colgroup align="left" width="40%"></colgroup>
+<colgroup align="left" width="60%"></colgroup>
+<tr>
+<th align="left">Old</th>
+<th align="left">New</th>
+</tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>crypto/bcrypt</td> <td>code.google.com/p/go.crypto/bcrypt</tr>
+<tr><td>crypto/blowfish</td> <td>code.google.com/p/go.crypto/blowfish</tr>
+<tr><td>crypto/cast5</td> <td>code.google.com/p/go.crypto/cast5</tr>
+<tr><td>crypto/md4</td> <td>code.google.com/p/go.crypto/md4</tr>
+<tr><td>crypto/ocsp</td> <td>code.google.com/p/go.crypto/ocsp</tr>
+<tr><td>crypto/openpgp</td> <td>code.google.com/p/go.crypto/openpgp</tr>
+<tr><td>crypto/openpgp/armor</td> <td>code.google.com/p/go.crypto/openpgp/armor</tr>
+<tr><td>crypto/openpgp/elgamal</td> <td>code.google.com/p/go.crypto/openpgp/elgamal</tr>
+<tr><td>crypto/openpgp/errors</td> <td>code.google.com/p/go.crypto/openpgp/errors</tr>
+<tr><td>crypto/openpgp/packet</td> <td>code.google.com/p/go.crypto/openpgp/packet</tr>
+<tr><td>crypto/openpgp/s2k</td> <td>code.google.com/p/go.crypto/openpgp/s2k</tr>
+<tr><td>crypto/ripemd160</td> <td>code.google.com/p/go.crypto/ripemd160</tr>
+<tr><td>crypto/twofish</td> <td>code.google.com/p/go.crypto/twofish</tr>
+<tr><td>crypto/xtea</td> <td>code.google.com/p/go.crypto/xtea</tr>
+<tr><td>exp/ssh</td> <td>code.google.com/p/go.crypto/ssh</tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>image/bmp</td> <td>code.google.com/p/go.image/bmp</tr>
+<tr><td>image/tiff</td> <td>code.google.com/p/go.image/tiff</tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>net/dict</td> <td>code.google.com/p/go.net/dict</tr>
+<tr><td>net/websocket</td> <td>code.google.com/p/go.net/websocket</tr>
+<tr><td>exp/spdy</td> <td>code.google.com/p/go.net/spdy</tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>encoding/git85</td> <td>code.google.com/p/go.codereview/git85</tr>
+<tr><td>patch</td> <td>code.google.com/p/go.codereview/patch</tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>exp/wingui</td> <td>code.google.com/p/gowingui</tr>
+</table>
+
+<p>
+<em>Updating</em>:
+Running <code>go</code> <code>fix</code> will update imports of these packages to use the new import paths.
+Installations that depend on these packages will need to install them using
+a <code>go get</code> command.
+</p>
+
+<h2 id="major">Major changes to the library</h2>
+
+<p>
+This section describes significant changes to the core libraries, the ones that
+affect the most programs.
+</p>
+
+<h3 id="errors">The error type and errors package</h3>
+
+<p>
+The placement of <code>os.Error</code> in package <code>os</code> is mostly historical: errors first came up when implementing package <code>os</code>, and they seemed system-related at the time.
+Since then it has become clear that errors are more fundamental than the operating system.  For example, it would be nice to use <code>Errors</code> in packages that <code>os</code> depends on, like <code>syscall</code>.
+Also, having <code>Error</code> in <code>os</code> introduces many dependencies on <code>os</code> that would otherwise not exist.
+</p>
+
+<p>
+Go 1 solves these problems by introducing a built-in <code>error</code> interface type and a separate <code>errors</code> package (analogous to <code>bytes</code> and <code>strings</code>) that contains utility functions.
+It replaces <code>os.NewError</code> with
+<a href="/pkg/errors/#New"><code>errors.New</code></a>,
+giving errors a more central place in the environment.
+</p>
+
+<p>
+So the widely-used <code>String</code> method does not cause accidental satisfaction
+of the <code>error</code> interface, the <code>error</code> interface uses instead
+the name <code>Error</code> for that method:
+</p>
+
+<pre>
+    type error interface {
+        Error() string
+    }
+</pre>
+
+<p>
+The <code>fmt</code> library automatically invokes <code>Error</code>, as it already
+does for <code>String</code>, for easy printing of error values.
+</p>
+
+<pre><!--{{code "/doc/progs/go1.go" `/START ERROR EXAMPLE/` `/END ERROR EXAMPLE/`}}
+-->type SyntaxError struct {
+    File    string
+    Line    int
+    Message string
+}
+
+func (se *SyntaxError) Error() string {
+    return fmt.Sprintf(&#34;%s:%d: %s&#34;, se.File, se.Line, se.Message)
+}</pre>
+
+<p>
+All standard packages have been updated to use the new interface; the old <code>os.Error</code> is gone.
+</p>
+
+<p>
+A new package, <a href="/pkg/errors/"><code>errors</code></a>, contains the function
+</p>
+
+<pre>
+func New(text string) error
+</pre>
+
+<p>
+to turn a string into an error. It replaces the old <code>os.NewError</code>.
+</p>
+
+<pre><!--{{code "/doc/progs/go1.go" `/ErrSyntax/`}}
+-->    var ErrSyntax = errors.New(&#34;syntax error&#34;)</pre>
+               
+<p>
+<em>Updating</em>:
+Running <code>go</code> <code>fix</code> will update almost all code affected by the change.
+Code that defines error types with a <code>String</code> method will need to be updated
+by hand to rename the methods to <code>Error</code>.
+</p>
+
+<h3 id="errno">System call errors</h3>
+
+<p>
+The old <code>syscall</code> package, which predated <code>os.Error</code>
+(and just about everything else),
+returned errors as <code>int</code> values.
+In turn, the <code>os</code> package forwarded many of these errors, such
+as <code>EINVAL</code>, but using a different set of errors on each platform.
+This behavior was unpleasant and unportable.
+</p>
+
+<p>
+In Go 1, the
+<a href="/pkg/syscall/"><code>syscall</code></a>
+package instead returns an <code>error</code> for system call errors.
+On Unix, the implementation is done by a
+<a href="/pkg/syscall/#Errno"><code>syscall.Errno</code></a> type
+that satisfies <code>error</code> and replaces the old <code>os.Errno</code>.
+</p>
+
+<p>
+The changes affecting <code>os.EINVAL</code> and relatives are
+described <a href="#os">elsewhere</a>.
+
+<p>
+<em>Updating</em>:
+Running <code>go</code> <code>fix</code> will update almost all code affected by the change.
+Regardless, most code should use the <code>os</code> package
+rather than <code>syscall</code> and so will be unaffected.
+</p>
+
+<h3 id="time">Time</h3>
+
+<p>
+Time is always a challenge to support well in a programming language.
+The old Go <code>time</code> package had <code>int64</code> units, no
+real type safety,
+and no distinction between absolute times and durations.
+</p>
+
+<p>
+One of the most sweeping changes in the Go 1 library is therefore a
+complete redesign of the
+<a href="/pkg/time/"><code>time</code></a> package.
+Instead of an integer number of nanoseconds as an <code>int64</code>,
+and a separate <code>*time.Time</code> type to deal with human
+units such as hours and years,
+there are now two fundamental types:
+<a href="/pkg/time/#Time"><code>time.Time</code></a>
+(a value, so the <code>*</code> is gone), which represents a moment in time;
+and <a href="/pkg/time/#Duration"><code>time.Duration</code></a>,
+which represents an interval.
+Both have nanosecond resolution.
+A <code>Time</code> can represent any time into the ancient
+past and remote future, while a <code>Duration</code> can
+span plus or minus only about 290 years.
+There are methods on these types, plus a number of helpful
+predefined constant durations such as <code>time.Second</code>.
+</p>
+
+<p>
+Among the new methods are things like
+<a href="/pkg/time/#Time.Add"><code>Time.Add</code></a>,
+which adds a <code>Duration</code> to a <code>Time</code>, and
+<a href="/pkg/time/#Time.Sub"><code>Time.Sub</code></a>,
+which subtracts two <code>Times</code> to yield a <code>Duration</code>.
+</p>
+
+<p>
+The most important semantic change is that the Unix epoch (Jan 1, 1970) is now
+relevant only for those functions and methods that mention Unix:
+<a href="/pkg/time/#Unix"><code>time.Unix</code></a>
+and the <a href="/pkg/time/#Time.Unix"><code>Unix</code></a>
+and <a href="/pkg/time/#Time.UnixNano"><code>UnixNano</code></a> methods
+of the <code>Time</code> type.
+In particular,
+<a href="/pkg/time/#Now"><code>time.Now</code></a>
+returns a <code>time.Time</code> value rather than, in the old
+API, an integer nanosecond count since the Unix epoch.
+</p>
+
+<pre><!--{{code "/doc/progs/go1.go" `/sleepUntil/` `/^}/`}}
+--><span class="comment">// sleepUntil sleeps until the specified time. It returns immediately if it&#39;s too late.</span>
+func sleepUntil(wakeup time.Time) {
+    now := time.Now() <span class="comment">// A Time.</span>
+    if !wakeup.After(now) {
+        return
+    }
+    delta := wakeup.Sub(now) <span class="comment">// A Duration.</span>
+    fmt.Printf(&#34;Sleeping for %.3fs\n&#34;, delta.Seconds())
+    time.Sleep(delta)
+}</pre>
+
+<p>
+The new types, methods, and constants have been propagated through
+all the standard packages that use time, such as <code>os</code> and
+its representation of file time stamps.
+</p>
+
+<p>
+<em>Updating</em>:
+The <code>go</code> <code>fix</code> tool will update many uses of the old <code>time</code> package to use the new
+types and methods, although it does not replace values such as <code>1e9</code>
+representing nanoseconds per second.
+Also, because of type changes in some of the values that arise,
+some of the expressions rewritten by the fix tool may require
+further hand editing; in such cases the rewrite will include
+the correct function or method for the old functionality, but
+may have the wrong type or require further analysis.
+</p>
+
+<h2 id="minor">Minor changes to the library</h2>
+
+<p>
+This section describes smaller changes, such as those to less commonly
+used packages or that affect
+few programs beyond the need to run <code>go</code> <code>fix</code>.
+This category includes packages that are new in Go 1.
+Collectively they improve portability, regularize behavior, and
+make the interfaces more modern and Go-like.
+</p>
+
+<h3 id="archive_zip">The archive/zip package</h3>
+
+<p>
+In Go 1, <a href="/pkg/archive/zip/#Writer"><code>*zip.Writer</code></a> no
+longer has a <code>Write</code> method. Its presence was a mistake.
+</p>
+
+<p>
+<em>Updating</em>:
+What little code is affected will be caught by the compiler and must be updated by hand.
+</p>
+
+<h3 id="bufio">The bufio package</h3>
+
+<p>
+In Go 1, <a href="/pkg/bufio/#NewReaderSize"><code>bufio.NewReaderSize</code></a>
+and
+<a href="/pkg/bufio/#NewWriterSize"><code>bufio.NewWriterSize</code></a>
+functions no longer return an error for invalid sizes.
+If the argument size is too small or invalid, it is adjusted.
+</p>
+
+<p>
+<em>Updating</em>:
+Running <code>go</code> <code>fix</code> will update calls that assign the error to _.
+Calls that aren't fixed will be caught by the compiler and must be updated by hand.
+</p>
+
+<h3 id="compress">The compress/flate, compress/gzip and compress/zlib packages</h3>
+
+<p>
+In Go 1, the <code>NewWriterXxx</code> functions in
+<a href="/pkg/compress/flate"><code>compress/flate</code></a>,
+<a href="/pkg/compress/gzip"><code>compress/gzip</code></a> and
+<a href="/pkg/compress/zlib"><code>compress/zlib</code></a>
+all return <code>(*Writer, error)</code> if they take a compression level,
+and <code>*Writer</code> otherwise. Package <code>gzip</code>'s
+<code>Compressor</code> and <code>Decompressor</code> types have been renamed
+to <code>Writer</code> and <code>Reader</code>. Package <code>flate</code>'s
+<code>WrongValueError</code> type has been removed.
+</p>
+
+<p>
+<em>Updating</em>
+Running <code>go</code> <code>fix</code> will update old names and calls that assign the error to _.
+Calls that aren't fixed will be caught by the compiler and must be updated by hand.
+</p>
+
+<h3 id="crypto_aes_des">The crypto/aes and crypto/des packages</h3>
+
+<p>
+In Go 1, the <code>Reset</code> method has been removed. Go does not guarantee
+that memory is not copied and therefore this method was misleading.
+</p>
+
+<p>
+The cipher-specific types <code>*aes.Cipher</code>, <code>*des.Cipher</code>,
+and <code>*des.TripleDESCipher</code> have been removed in favor of
+<code>cipher.Block</code>.
+</p>
+
+<p>
+<em>Updating</em>:
+Remove the calls to Reset. Replace uses of the specific cipher types with
+cipher.Block.
+</p>
+
+<h3 id="crypto_elliptic">The crypto/elliptic package</h3>
+
+<p>
+In Go 1, <a href="/pkg/crypto/elliptic/#Curve"><code>elliptic.Curve</code></a>
+has been made an interface to permit alternative implementations. The curve
+parameters have been moved to the
+<a href="/pkg/crypto/elliptic/#CurveParams"><code>elliptic.CurveParams</code></a>
+structure.
+</p>
+
+<p>
+<em>Updating</em>:
+Existing users of <code>*elliptic.Curve</code> will need to change to
+simply <code>elliptic.Curve</code>. Calls to <code>Marshal</code>,
+<code>Unmarshal</code> and <code>GenerateKey</code> are now functions
+in <code>crypto/elliptic</code> that take an <code>elliptic.Curve</code>
+as their first argument.
+</p>
+
+<h3 id="crypto_hmac">The crypto/hmac package</h3>
+
+<p>
+In Go 1, the hash-specific functions, such as <code>hmac.NewMD5</code>, have
+been removed from <code>crypto/hmac</code>. Instead, <code>hmac.New</code> takes
+a function that returns a <code>hash.Hash</code>, such as <code>md5.New</code>.
+</p>
+
+<p>
+<em>Updating</em>:
+Running <code>go</code> <code>fix</code> will perform the needed changes.
+</p>
+
+<h3 id="crypto_x509">The crypto/x509 package</h3>
+
+<p>
+In Go 1, the
+<a href="/pkg/crypto/x509/#CreateCertificate"><code>CreateCertificate</code></a>
+and
+<a href="/pkg/crypto/x509/#CreateCRL"><code>CreateCRL</code></a>
+functions in <code>crypto/x509</code> have been altered to take an
+<code>interface{}</code> where they previously took a <code>*rsa.PublicKey</code>
+or <code>*rsa.PrivateKey</code>. This will allow other public key algorithms
+to be implemented in the future.
+</p>
+
+<p>
+<em>Updating</em>:
+No changes will be needed.
+</p>
+
+<h3 id="encoding_binary">The encoding/binary package</h3>
+
+<p>
+In Go 1, the <code>binary.TotalSize</code> function has been replaced by
+<a href="/pkg/encoding/binary/#Size"><code>Size</code></a>,
+which takes an <code>interface{}</code> argument rather than
+a <code>reflect.Value</code>.
+</p>
+
+<p>
+<em>Updating</em>:
+What little code is affected will be caught by the compiler and must be updated by hand.
+</p>
+
+<h3 id="encoding_xml">The encoding/xml package</h3>
+
+<p>
+In Go 1, the <a href="/pkg/encoding/xml/"><code>xml</code></a> package
+has been brought closer in design to the other marshaling packages such
+as <a href="/pkg/encoding/gob/"><code>encoding/gob</code></a>.
+</p>
+
+<p>
+The old <code>Parser</code> type is renamed
+<a href="/pkg/encoding/xml/#Decoder"><code>Decoder</code></a> and has a new
+<a href="/pkg/encoding/xml/#Decoder.Decode"><code>Decode</code></a> method. An
+<a href="/pkg/encoding/xml/#Encoder"><code>Encoder</code></a> type was also introduced.
+</p>
+
+<p>
+The functions <a href="/pkg/encoding/xml/#Marshal"><code>Marshal</code></a>
+and <a href="/pkg/encoding/xml/#Unmarshal"><code>Unmarshal</code></a>
+work with <code>[]byte</code> values now. To work with streams,
+use the new <a href="/pkg/encoding/xml/#Encoder"><code>Encoder</code></a>
+and <a href="/pkg/encoding/xml/#Decoder"><code>Decoder</code></a> types.
+</p>
+
+<p>
+When marshaling or unmarshaling values, the format of supported flags in
+field tags has changed to be closer to the
+<a href="/pkg/encoding/json"><code>json</code></a> package
+(<code>`xml:"name,flag"`</code>). The matching done between field tags, field
+names, and the XML attribute and element names is now case-sensitive.
+The <code>XMLName</code> field tag, if present, must also match the name
+of the XML element being marshaled.
+</p>
+
+<p>
+<em>Updating</em>:
+Running <code>go</code> <code>fix</code> will update most uses of the package except for some calls to
+<code>Unmarshal</code>. Special care must be taken with field tags,
+since the fix tool will not update them and if not fixed by hand they will
+misbehave silently in some cases. For example, the old
+<code>"attr"</code> is now written <code>",attr"</code> while plain
+<code>"attr"</code> remains valid but with a different meaning.
+</p>
+
+<h3 id="expvar">The expvar package</h3>
+
+<p>
+In Go 1, the <code>RemoveAll</code> function has been removed.
+The <code>Iter</code> function and Iter method on <code>*Map</code> have
+been replaced by
+<a href="/pkg/expvar/#Do"><code>Do</code></a>
+and
+<a href="/pkg/expvar/#Map.Do"><code>(*Map).Do</code></a>.
+</p>
+
+<p>
+<em>Updating</em>:
+Most code using <code>expvar</code> will not need changing. The rare code that used
+<code>Iter</code> can be updated to pass a closure to <code>Do</code> to achieve the same effect.
+</p>
+
+<h3 id="flag">The flag package</h3>
+
+<p>
+In Go 1, the interface <a href="/pkg/flag/#Value"><code>flag.Value</code></a> has changed slightly.
+The <code>Set</code> method now returns an <code>error</code> instead of
+a <code>bool</code> to indicate success or failure.
+</p>
+
+<p>
+There is also a new kind of flag, <code>Duration</code>, to support argument
+values specifying time intervals.
+Values for such flags must be given units, just as <code>time.Duration</code>
+formats them: <code>10s</code>, <code>1h30m</code>, etc.
+</p>
+
+<pre><!--{{code "/doc/progs/go1.go" `/timeout/`}}
+-->var timeout = flag.Duration(&#34;timeout&#34;, 30*time.Second, &#34;how long to wait for completion&#34;)</pre>
+
+<p>
+<em>Updating</em>:
+Programs that implement their own flags will need minor manual fixes to update their
+<code>Set</code> methods.
+The <code>Duration</code> flag is new and affects no existing code.
+</p>
+
+
+<h3 id="go">The go/* packages</h3>
+
+<p>
+Several packages under <code>go</code> have slightly revised APIs.
+</p>
+
+<p>
+A concrete <code>Mode</code> type was introduced for configuration mode flags
+in the packages
+<a href="/pkg/go/scanner/"><code>go/scanner</code></a>,
+<a href="/pkg/go/parser/"><code>go/parser</code></a>,
+<a href="/pkg/go/printer/"><code>go/printer</code></a>, and
+<a href="/pkg/go/doc/"><code>go/doc</code></a>.
+</p>
+
+<p>
+The modes <code>AllowIllegalChars</code> and <code>InsertSemis</code> have been removed
+from the <a href="/pkg/go/scanner/"><code>go/scanner</code></a> package. They were mostly
+useful for scanning text other then Go source files. Instead, the
+<a href="/pkg/text/scanner/"><code>text/scanner</code></a> package should be used
+for that purpose.
+</p>
+
+<p>
+The <a href="/pkg/go/scanner/#ErrorHandler"><code>ErrorHandler</code></a> provided
+to the scanner's <a href="/pkg/go/scanner/#Scanner.Init"><code>Init</code></a> method is
+now simply a function rather than an interface. The <code>ErrorVector</code> type has
+been removed in favor of the (existing) <a href="/pkg/go/scanner/#ErrorList"><code>ErrorList</code></a>
+type, and the <code>ErrorVector</code> methods have been migrated. Instead of embedding
+an <code>ErrorVector</code> in a client of the scanner, now a client should maintain
+an <code>ErrorList</code>.
+</p>
+
+<p>
+The set of parse functions provided by the <a href="/pkg/go/parser/"><code>go/parser</code></a>
+package has been reduced to the primary parse function
+<a href="/pkg/go/parser/#ParseFile"><code>ParseFile</code></a>, and a couple of
+convenience functions <a href="/pkg/go/parser/#ParseDir"><code>ParseDir</code></a>
+and <a href="/pkg/go/parser/#ParseExpr"><code>ParseExpr</code></a>.
+</p>
+
+<p>
+The <a href="/pkg/go/printer/"><code>go/printer</code></a> package supports an additional
+configuration mode <a href="/pkg/go/printer/#Mode"><code>SourcePos</code></a>;
+if set, the printer will emit <code>//line</code> comments such that the generated
+output contains the original source code position information. The new type
+<a href="/pkg/go/printer/#CommentedNode"><code>CommentedNode</code></a> can be
+used to provide comments associated with an arbitrary
+<a href="/pkg/go/ast/#Node"><code>ast.Node</code></a> (until now only
+<a href="/pkg/go/ast/#File"><code>ast.File</code></a> carried comment information).
+</p>
+
+<p>
+The type names of the <a href="/pkg/go/doc/"><code>go/doc</code></a> package have been
+streamlined by removing the <code>Doc</code> suffix: <code>PackageDoc</code>
+is now <code>Package</code>, <code>ValueDoc</code> is <code>Value</code>, etc.
+Also, all types now consistently have a <code>Name</code> field (or <code>Names</code>,
+in the case of type <code>Value</code>) and <code>Type.Factories</code> has become
+<code>Type.Funcs</code>.
+Instead of calling <code>doc.NewPackageDoc(pkg, importpath)</code>,
+documentation for a package is created with:
+</p>
+
+<pre>
+    doc.New(pkg, importpath, mode)
+</pre>
+
+<p>
+where the new <code>mode</code> parameter specifies the operation mode:
+if set to <a href="/pkg/go/doc/#AllDecls"><code>AllDecls</code></a>, all declarations
+(not just exported ones) are considered.
+The function <code>NewFileDoc</code> was removed, and the function
+<code>CommentText</code> has become the method
+<a href="/pkg/go/ast/#Text"><code>Text</code></a> of
+<a href="/pkg/go/ast/#CommentGroup"><code>ast.CommentGroup</code></a>.
+</p>
+
+<p>
+In package <a href="/pkg/go/token/"><code>go/token</code></a>, the
+<a href="/pkg/go/token/#FileSet"><code>token.FileSet</code></a> method <code>Files</code>
+(which originally returned a channel of <code>*token.File</code>s) has been replaced
+with the iterator <a href="/pkg/go/token/#FileSet.Iterate"><code>Iterate</code></a> that
+accepts a function argument instead.
+</p>
+
+<p>
+In package <a href="/pkg/go/build/"><code>go/build</code></a>, the API
+has been nearly completely replaced.
+The package still computes Go package information
+but it does not run the build: the <code>Cmd</code> and <code>Script</code>
+types are gone.
+(To build code, use the new
+<a href="/cmd/go/"><code>go</code></a> command instead.)
+The <code>DirInfo</code> type is now named
+<a href="/pkg/go/build/#Package"><code>Package</code></a>.
+<code>FindTree</code> and <code>ScanDir</code> are replaced by
+<a href="/pkg/go/build/#Import"><code>Import</code></a>
+and
+<a href="/pkg/go/build/#ImportDir"><code>ImportDir</code></a>.
+</p>
+
+<p>
+<em>Updating</em>:
+Code that uses packages in <code>go</code> will have to be updated by hand; the
+compiler will reject incorrect uses. Templates used in conjunction with any of the
+<code>go/doc</code> types may need manual fixes; the renamed fields will lead
+to run-time errors.
+</p>
+
+<h3 id="hash">The hash package</h3>
+
+<p>
+In Go 1, the definition of <a href="/pkg/hash/#Hash"><code>hash.Hash</code></a> includes
+a new method, <code>BlockSize</code>.  This new method is used primarily in the
+cryptographic libraries.
+</p>
+
+<p>
+The <code>Sum</code> method of the
+<a href="/pkg/hash/#Hash"><code>hash.Hash</code></a> interface now takes a
+<code>[]byte</code> argument, to which the hash value will be appended.
+The previous behavior can be recreated by adding a <code>nil</code> argument to the call.
+</p>
+
+<p>
+<em>Updating</em>:
+Existing implementations of <code>hash.Hash</code> will need to add a
+<code>BlockSize</code> method.  Hashes that process the input one byte at
+a time can implement <code>BlockSize</code> to return 1.
+Running <code>go</code> <code>fix</code> will update calls to the <code>Sum</code> methods of the various
+implementations of <code>hash.Hash</code>.
+</p>
+
+<p>
+<em>Updating</em>:
+Since the package's functionality is new, no updating is necessary.
+</p>
+
+<h3 id="http">The http package</h3>
+
+<p>
+In Go 1 the <a href="/pkg/net/http/"><code>http</code></a> package is refactored,
+putting some of the utilities into a
+<a href="/pkg/net/http/httputil/"><code>httputil</code></a> subdirectory.
+These pieces are only rarely needed by HTTP clients.
+The affected items are:
+</p>
+
+<ul>
+<li>ClientConn</li>
+<li>DumpRequest</li>
+<li>DumpRequestOut</li>
+<li>DumpResponse</li>
+<li>NewChunkedReader</li>
+<li>NewChunkedWriter</li>
+<li>NewClientConn</li>
+<li>NewProxyClientConn</li>
+<li>NewServerConn</li>
+<li>NewSingleHostReverseProxy</li>
+<li>ReverseProxy</li>
+<li>ServerConn</li>
+</ul>
+
+<p>
+The <code>Request.RawURL</code> field has been removed; it was a
+historical artifact.
+</p>
+
+<p>
+The <code>Handle</code> and <code>HandleFunc</code>
+functions, and the similarly-named methods of <code>ServeMux</code>,
+now panic if an attempt is made to register the same pattern twice.
+</p>
+
+<p>
+<em>Updating</em>:
+Running <code>go</code> <code>fix</code> will update the few programs that are affected except for
+uses of <code>RawURL</code>, which must be fixed by hand.
+</p>
+
+<h3 id="image">The image package</h3>
+
+<p>
+The <a href="/pkg/image/"><code>image</code></a> package has had a number of
+minor changes, rearrangements and renamings.
+</p>
+
+<p>
+Most of the color handling code has been moved into its own package,
+<a href="/pkg/image/color/"><code>image/color</code></a>.
+For the elements that moved, a symmetry arises; for instance,
+each pixel of an
+<a href="/pkg/image/#RGBA"><code>image.RGBA</code></a>
+is a
+<a href="/pkg/image/color/#RGBA"><code>color.RGBA</code></a>.
+</p>
+
+<p>
+The old <code>image/ycbcr</code> package has been folded, with some
+renamings, into the
+<a href="/pkg/image/"><code>image</code></a>
+and
+<a href="/pkg/image/color/"><code>image/color</code></a>
+packages.
+</p>
+
+<p>
+The old <code>image.ColorImage</code> type is still in the <code>image</code>
+package but has been renamed
+<a href="/pkg/image/#Uniform"><code>image.Uniform</code></a>,
+while <code>image.Tiled</code> has been removed.
+</p>
+
+<p>
+This table lists the renamings.
+</p>
+
+<table class="codetable" frame="border" summary="image renames">
+<colgroup align="left" width="50%"></colgroup>
+<colgroup align="left" width="50%"></colgroup>
+<tr>
+<th align="left">Old</th>
+<th align="left">New</th>
+</tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>image.Color</td> <td>color.Color</td></tr>
+<tr><td>image.ColorModel</td> <td>color.Model</td></tr>
+<tr><td>image.ColorModelFunc</td> <td>color.ModelFunc</td></tr>
+<tr><td>image.PalettedColorModel</td> <td>color.Palette</td></tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>image.RGBAColor</td> <td>color.RGBA</td></tr>
+<tr><td>image.RGBA64Color</td> <td>color.RGBA64</td></tr>
+<tr><td>image.NRGBAColor</td> <td>color.NRGBA</td></tr>
+<tr><td>image.NRGBA64Color</td> <td>color.NRGBA64</td></tr>
+<tr><td>image.AlphaColor</td> <td>color.Alpha</td></tr>
+<tr><td>image.Alpha16Color</td> <td>color.Alpha16</td></tr>
+<tr><td>image.GrayColor</td> <td>color.Gray</td></tr>
+<tr><td>image.Gray16Color</td> <td>color.Gray16</td></tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>image.RGBAColorModel</td> <td>color.RGBAModel</td></tr>
+<tr><td>image.RGBA64ColorModel</td> <td>color.RGBA64Model</td></tr>
+<tr><td>image.NRGBAColorModel</td> <td>color.NRGBAModel</td></tr>
+<tr><td>image.NRGBA64ColorModel</td> <td>color.NRGBA64Model</td></tr>
+<tr><td>image.AlphaColorModel</td> <td>color.AlphaModel</td></tr>
+<tr><td>image.Alpha16ColorModel</td> <td>color.Alpha16Model</td></tr>
+<tr><td>image.GrayColorModel</td> <td>color.GrayModel</td></tr>
+<tr><td>image.Gray16ColorModel</td> <td>color.Gray16Model</td></tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>ycbcr.RGBToYCbCr</td> <td>color.RGBToYCbCr</td></tr>
+<tr><td>ycbcr.YCbCrToRGB</td> <td>color.YCbCrToRGB</td></tr>
+<tr><td>ycbcr.YCbCrColorModel</td> <td>color.YCbCrModel</td></tr>
+<tr><td>ycbcr.YCbCrColor</td> <td>color.YCbCr</td></tr>
+<tr><td>ycbcr.YCbCr</td> <td>image.YCbCr</td></tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>ycbcr.SubsampleRatio444</td> <td>image.YCbCrSubsampleRatio444</td></tr>
+<tr><td>ycbcr.SubsampleRatio422</td> <td>image.YCbCrSubsampleRatio422</td></tr>
+<tr><td>ycbcr.SubsampleRatio420</td> <td>image.YCbCrSubsampleRatio420</td></tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>image.ColorImage</td> <td>image.Uniform</td></tr>
+</table>
+
+<p>
+The image package's <code>New</code> functions
+(<a href="/pkg/image/#NewRGBA"><code>NewRGBA</code></a>,
+<a href="/pkg/image/#NewRGBA64"><code>NewRGBA64</code></a>, etc.)
+take an <a href="/pkg/image/#Rectangle"><code>image.Rectangle</code></a> as an argument
+instead of four integers.
+</p>
+
+<p>
+Finally, there are new predefined <code>color.Color</code> variables
+<a href="/pkg/image/color/#Black"><code>color.Black</code></a>,
+<a href="/pkg/image/color/#White"><code>color.White</code></a>,
+<a href="/pkg/image/color/#Opaque"><code>color.Opaque</code></a>
+and
+<a href="/pkg/image/color/#Transparent"><code>color.Transparent</code></a>.
+</p>
+
+<p>
+<em>Updating</em>:
+Running <code>go</code> <code>fix</code> will update almost all code affected by the change.
+</p>
+
+<h3 id="log_syslog">The log/syslog package</h3>
+
+<p>
+In Go 1, the <a href="/pkg/log/syslog/#NewLogger"><code>syslog.NewLogger</code></a>
+function returns an error as well as a <code>log.Logger</code>.
+</p>
+
+<p>
+<em>Updating</em>:
+What little code is affected will be caught by the compiler and must be updated by hand.
+</p>
+
+<h3 id="mime">The mime package</h3>
+
+<p>
+In Go 1, the <a href="/pkg/mime/#FormatMediaType"><code>FormatMediaType</code></a> function
+of the <code>mime</code> package has  been simplified to make it
+consistent with
+<a href="/pkg/mime/#ParseMediaType"><code>ParseMediaType</code></a>.
+It now takes <code>"text/html"</code> rather than <code>"text"</code> and <code>"html"</code>.
+</p>
+
+<p>
+<em>Updating</em>:
+What little code is affected will be caught by the compiler and must be updated by hand.
+</p>
+
+<h3 id="net">The net package</h3>
+
+<p>
+In Go 1, the various <code>SetTimeout</code>,
+<code>SetReadTimeout</code>, and <code>SetWriteTimeout</code> methods
+have been replaced with
+<a href="/pkg/net/#IPConn.SetDeadline"><code>SetDeadline</code></a>,
+<a href="/pkg/net/#IPConn.SetReadDeadline"><code>SetReadDeadline</code></a>, and
+<a href="/pkg/net/#IPConn.SetWriteDeadline"><code>SetWriteDeadline</code></a>,
+respectively.  Rather than taking a timeout value in nanoseconds that
+apply to any activity on the connection, the new methods set an
+absolute deadline (as a <code>time.Time</code> value) after which
+reads and writes will time out and no longer block.
+</p>
+
+<p>
+There are also new functions
+<a href="/pkg/net/#DialTimeout"><code>net.DialTimeout</code></a>
+to simplify timing out dialing a network address and
+<a href="/pkg/net/#ListenMulticastUDP"><code>net.ListenMulticastUDP</code></a>
+to allow multicast UDP to listen concurrently across multiple listeners.
+The <code>net.ListenMulticastUDP</code> function replaces the old
+<code>JoinGroup</code> and <code>LeaveGroup</code> methods.
+</p>
+
+<p>
+<em>Updating</em>:
+Code that uses the old methods will fail to compile and must be updated by hand.
+The semantic change makes it difficult for the fix tool to update automatically.
+</p>
+
+<h3 id="os">The os package</h3>
+
+<p>
+The <code>Time</code> function has been removed; callers should use
+the <a href="/pkg/time/#Time"><code>Time</code></a> type from the
+<code>time</code> package.
+</p>
+
+<p>
+The <code>Exec</code> function has been removed; callers should use
+<code>Exec</code> from the <code>syscall</code> package, where available.
+</p>
+
+<p>
+The <code>ShellExpand</code> function has been renamed to <a
+href="/pkg/os/#ExpandEnv"><code>ExpandEnv</code></a>.
+</p>
+
+<p>
+The <a href="/pkg/os/#NewFile"><code>NewFile</code></a> function
+now takes a <code>uintptr</code> fd, instead of an <code>int</code>.
+The <a href="/pkg/os/#File.Fd"><code>Fd</code></a> method on files now
+also returns a <code>uintptr</code>.
+</p>
+
+<p>
+There are no longer error constants such as <code>EINVAL</code>
+in the <code>os</code> package, since the set of values varied with
+the underlying operating system. There are new portable functions like
+<a href="/pkg/os/#IsPermission"><code>IsPermission</code></a>
+to test common error properties, plus a few new error values
+with more Go-like names, such as
+<a href="/pkg/os/#ErrPermission"><code>ErrPermission</code></a>
+and
+<a href="/pkg/os/#ErrNoEnv"><code>ErrNoEnv</code></a>.
+</p>
+
+<p>
+The <code>Getenverror</code> function has been removed. To distinguish
+between a non-existent environment variable and an empty string,
+use <a href="/pkg/os/#Environ"><code>os.Environ</code></a> or
+<a href="/pkg/syscall/#Getenv"><code>syscall.Getenv</code></a>.
+</p>
+
+
+<p>
+The <a href="/pkg/os/#Process.Wait"><code>Process.Wait</code></a> method has
+dropped its option argument and the associated constants are gone
+from the package.
+Also, the function <code>Wait</code> is gone; only the method of
+the <code>Process</code> type persists.
+</p>
+
+<p>
+The <code>Waitmsg</code> type returned by
+<a href="/pkg/os/#Process.Wait"><code>Process.Wait</code></a>
+has been replaced with a more portable
+<a href="/pkg/os/#ProcessState"><code>ProcessState</code></a>
+type with accessor methods to recover information about the
+process.
+Because of changes to <code>Wait</code>, the <code>ProcessState</code>
+value always describes an exited process.
+Portability concerns simplified the interface in other ways, but the values returned by the
+<a href="/pkg/os/#ProcessState.Sys"><code>ProcessState.Sys</code></a> and
+<a href="/pkg/os/#ProcessState.SysUsage"><code>ProcessState.SysUsage</code></a>
+methods can be type-asserted to underlying system-specific data structures such as
+<a href="/pkg/syscall/#WaitStatus"><code>syscall.WaitStatus</code></a> and
+<a href="/pkg/syscall/#Rusage"><code>syscall.Rusage</code></a> on Unix.
+</p>
+
+<p>
+<em>Updating</em>:
+Running <code>go</code> <code>fix</code> will drop a zero argument to <code>Process.Wait</code>.
+All other changes will be caught by the compiler and must be updated by hand.
+</p>
+
+<h4 id="os_fileinfo">The os.FileInfo type</h4>
+
+<p>
+Go 1 redefines the <a href="/pkg/os/#FileInfo"><code>os.FileInfo</code></a> type,
+changing it from a struct to an interface:
+</p>
+
+<pre>
+    type FileInfo interface {
+        Name() string       // base name of the file
+        Size() int64        // length in bytes
+        Mode() FileMode     // file mode bits
+        ModTime() time.Time // modification time
+        IsDir() bool        // abbreviation for Mode().IsDir()
+        Sys() interface{}   // underlying data source (can return nil)
+    }
+</pre>
+
+<p>
+The file mode information has been moved into a subtype called
+<a href="/pkg/os/#FileMode"><code>os.FileMode</code></a>,
+a simple integer type with <code>IsDir</code>, <code>Perm</code>, and <code>String</code>
+methods.
+</p>
+
+<p>
+The system-specific details of file modes and properties such as (on Unix)
+i-number have been removed from <code>FileInfo</code> altogether.
+Instead, each operating system's <code>os</code> package provides an
+implementation of the <code>FileInfo</code> interface, which
+has a <code>Sys</code> method that returns the
+system-specific representation of file metadata.
+For instance, to discover the i-number of a file on a Unix system, unpack
+the <code>FileInfo</code> like this:
+</p>
+
+<pre>
+    fi, err := os.Stat("hello.go")
+    if err != nil {
+        log.Fatal(err)
+    }
+    // Check that it's a Unix file.
+    unixStat, ok := fi.Sys().(*syscall.Stat_t)
+    if !ok {
+        log.Fatal("hello.go: not a Unix file")
+    }
+    fmt.Printf("file i-number: %d\n", unixStat.Ino)
+</pre>
+
+<p>
+Assuming (which is unwise) that <code>"hello.go"</code> is a Unix file,
+the i-number expression could be contracted to
+</p>
+
+<pre>
+    fi.Sys().(*syscall.Stat_t).Ino
+</pre>
+
+<p>
+The vast majority of uses of <code>FileInfo</code> need only the methods
+of the standard interface.
+</p>
+
+<p>
+The <code>os</code> package no longer contains wrappers for the POSIX errors
+such as <code>ENOENT</code>.
+For the few programs that need to verify particular error conditions, there are
+now the boolean functions
+<a href="/pkg/os/#IsExist"><code>IsExist</code></a>,
+<a href="/pkg/os/#IsNotExist"><code>IsNotExist</code></a>
+and
+<a href="/pkg/os/#IsPermission"><code>IsPermission</code></a>.
+</p>
+
+<pre><!--{{code "/doc/progs/go1.go" `/os\.Open/` `/}/`}}
+-->    f, err := os.OpenFile(name, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0600)
+    if os.IsExist(err) {
+        log.Printf(&#34;%s already exists&#34;, name)
+    }</pre>
+
+<p>
+<em>Updating</em>:
+Running <code>go</code> <code>fix</code> will update code that uses the old equivalent of the current <code>os.FileInfo</code>
+and <code>os.FileMode</code> API.
+Code that needs system-specific file details will need to be updated by hand.
+Code that uses the old POSIX error values from the <code>os</code> package
+will fail to compile and will also need to be updated by hand.
+</p>
+
+<h3 id="os_signal">The os/signal package</h3>
+
+<p>
+The <code>os/signal</code> package in Go 1 replaces the
+<code>Incoming</code> function, which returned a channel
+that received all incoming signals,
+with the selective <code>Notify</code> function, which asks
+for delivery of specific signals on an existing channel.
+</p>
+
+<p>
+<em>Updating</em>:
+Code must be updated by hand.
+A literal translation of
+</p>
+<pre>
+c := signal.Incoming()
+</pre>
+<p>
+is
+</p>
+<pre>
+c := make(chan os.Signal)
+signal.Notify(c) // ask for all signals
+</pre>
+<p>
+but most code should list the specific signals it wants to handle instead:
+</p>
+<pre>
+c := make(chan os.Signal)
+signal.Notify(c, syscall.SIGHUP, syscall.SIGQUIT)
+</pre>
+
+<h3 id="path_filepath">The path/filepath package</h3>
+
+<p>
+In Go 1, the <a href="/pkg/path/filepath/#Walk"><code>Walk</code></a> function of the
+<code>path/filepath</code> package
+has been changed to take a function value of type
+<a href="/pkg/path/filepath/#WalkFunc"><code>WalkFunc</code></a>
+instead of a <code>Visitor</code> interface value.
+<code>WalkFunc</code> unifies the handling of both files and directories.
+</p>
+
+<pre>
+    type WalkFunc func(path string, info os.FileInfo, err error) error
+</pre>
+
+<p>
+The <code>WalkFunc</code> function will be called even for files or directories that could not be opened;
+in such cases the error argument will describe the failure.
+If a directory's contents are to be skipped,
+the function should return the value <a href="/pkg/path/filepath/#variables"><code>filepath.SkipDir</code></a>
+</p>
+
+<pre><!--{{code "/doc/progs/go1.go" `/STARTWALK/` `/ENDWALK/`}}
+-->    markFn := func(path string, info os.FileInfo, err error) error {
+        if path == &#34;pictures&#34; { <span class="comment">// Will skip walking of directory pictures and its contents.</span>
+            return filepath.SkipDir
+        }
+        if err != nil {
+            return err
+        }
+        log.Println(path)
+        return nil
+    }
+    err := filepath.Walk(&#34;.&#34;, markFn)
+    if err != nil {
+        log.Fatal(err)
+    }</pre>
+
+<p>
+<em>Updating</em>:
+The change simplifies most code but has subtle consequences, so affected programs
+will need to be updated by hand.
+The compiler will catch code using the old interface.
+</p>
+
+<h3 id="regexp">The regexp package</h3>
+
+<p>
+The <a href="/pkg/regexp/"><code>regexp</code></a> package has been rewritten.
+It has the same interface but the specification of the regular expressions
+it supports has changed from the old "egrep" form to that of
+<a href="http://code.google.com/p/re2/">RE2</a>.
+</p>
+
+<p>
+<em>Updating</em>:
+Code that uses the package should have its regular expressions checked by hand.
+</p>
+
+<h3 id="runtime">The runtime package</h3>
+
+<p>
+In Go 1, much of the API exported by package
+<code>runtime</code> has been removed in favor of
+functionality provided by other packages.
+Code using the <code>runtime.Type</code> interface
+or its specific concrete type implementations should
+now use package <a href="/pkg/reflect/"><code>reflect</code></a>.
+Code using <code>runtime.Semacquire</code> or <code>runtime.Semrelease</code>
+should use channels or the abstractions in package <a href="/pkg/sync/"><code>sync</code></a>.
+The <code>runtime.Alloc</code>, <code>runtime.Free</code>,
+and <code>runtime.Lookup</code> functions, an unsafe API created for
+debugging the memory allocator, have no replacement.
+</p>
+
+<p>
+Before, <code>runtime.MemStats</code> was a global variable holding
+statistics about memory allocation, and calls to <code>runtime.UpdateMemStats</code>
+ensured that it was up to date.
+In Go 1, <code>runtime.MemStats</code> is a struct type, and code should use
+<a href="/pkg/runtime/#ReadMemStats"><code>runtime.ReadMemStats</code></a>
+to obtain the current statistics.
+</p>
+
+<p>
+The package adds a new function,
+<a href="/pkg/runtime/#NumCPU"><code>runtime.NumCPU</code></a>, that returns the number of CPUs available
+for parallel execution, as reported by the operating system kernel.
+Its value can inform the setting of <code>GOMAXPROCS</code>.
+The <code>runtime.Cgocalls</code> and <code>runtime.Goroutines</code> functions
+have been renamed to <code>runtime.NumCgoCall</code> and <code>runtime.NumGoroutine</code>.
+</p>
+
+<p>
+<em>Updating</em>:
+Running <code>go</code> <code>fix</code> will update code for the function renamings.
+Other code will need to be updated by hand.
+</p>
+
+<h3 id="strconv">The strconv package</h3>
+
+<p>
+In Go 1, the
+<a href="/pkg/strconv/"><code>strconv</code></a>
+package has been significantly reworked to make it more Go-like and less C-like,
+although <code>Atoi</code> lives on (it's similar to
+<code>int(ParseInt(x, 10, 0))</code>, as does
+<code>Itoa(x)</code> (<code>FormatInt(int64(x), 10)</code>).
+There are also new variants of some of the functions that append to byte slices rather than
+return strings, to allow control over allocation.
+</p>
+
+<p>
+This table summarizes the renamings; see the
+<a href="/pkg/strconv/">package documentation</a>
+for full details.
+</p>
+
+<table class="codetable" frame="border" summary="strconv renames">
+<colgroup align="left" width="50%"></colgroup>
+<colgroup align="left" width="50%"></colgroup>
+<tr>
+<th align="left">Old call</th>
+<th align="left">New call</th>
+</tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>Atob(x)</td> <td>ParseBool(x)</td></tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>Atof32(x)</td> <td>ParseFloat(x, 32)§</td></tr>
+<tr><td>Atof64(x)</td> <td>ParseFloat(x, 64)</td></tr>
+<tr><td>AtofN(x, n)</td> <td>ParseFloat(x, n)</td></tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>Atoi(x)</td> <td>Atoi(x)</td></tr>
+<tr><td>Atoi(x)</td> <td>ParseInt(x, 10, 0)§</td></tr>
+<tr><td>Atoi64(x)</td> <td>ParseInt(x, 10, 64)</td></tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>Atoui(x)</td> <td>ParseUint(x, 10, 0)§</td></tr>
+<tr><td>Atoui64(x)</td> <td>ParseUint(x, 10, 64)</td></tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>Btoi64(x, b)</td> <td>ParseInt(x, b, 64)</td></tr>
+<tr><td>Btoui64(x, b)</td> <td>ParseUint(x, b, 64)</td></tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>Btoa(x)</td> <td>FormatBool(x)</td></tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>Ftoa32(x, f, p)</td> <td>FormatFloat(float64(x), f, p, 32)</td></tr>
+<tr><td>Ftoa64(x, f, p)</td> <td>FormatFloat(x, f, p, 64)</td></tr>
+<tr><td>FtoaN(x, f, p, n)</td> <td>FormatFloat(x, f, p, n)</td></tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>Itoa(x)</td> <td>Itoa(x)</td></tr>
+<tr><td>Itoa(x)</td> <td>FormatInt(int64(x), 10)</td></tr>
+<tr><td>Itoa64(x)</td> <td>FormatInt(x, 10)</td></tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>Itob(x, b)</td> <td>FormatInt(int64(x), b)</td></tr>
+<tr><td>Itob64(x, b)</td> <td>FormatInt(x, b)</td></tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>Uitoa(x)</td> <td>FormatUint(uint64(x), 10)</td></tr>
+<tr><td>Uitoa64(x)</td> <td>FormatUint(x, 10)</td></tr>
+<tr>
+<td colspan="2"><hr></td>
+</tr>
+<tr><td>Uitob(x, b)</td> <td>FormatUint(uint64(x), b)</td></tr>
+<tr><td>Uitob64(x, b)</td> <td>FormatUint(x, b)</td></tr>
+</table>
+               
+<p>
+<em>Updating</em>:
+Running <code>go</code> <code>fix</code> will update almost all code affected by the change.
+<br>
+§ <code>Atoi</code> persists but <code>Atoui</code> and <code>Atof32</code> do not, so
+they may require
+a cast that must be added by hand; the <code>go</code> <code>fix</code> tool will warn about it.
+</p>
+
+
+<h3 id="templates">The template packages</h3>
+
+<p>
+The <code>template</code> and <code>exp/template/html</code> packages have moved to 
+<a href="/pkg/text/template/"><code>text/template</code></a> and
+<a href="/pkg/html/template/"><code>html/template</code></a>.
+More significant, the interface to these packages has been simplified.
+The template language is the same, but the concept of "template set" is gone
+and the functions and methods of the packages have changed accordingly,
+often by elimination.
+</p>
+
+<p>
+Instead of sets, a <code>Template</code> object
+may contain multiple named template definitions,
+in effect constructing
+name spaces for template invocation.
+A template can invoke any other template associated with it, but only those
+templates associated with it.
+The simplest way to associate templates is to parse them together, something
+made easier with the new structure of the packages.
+</p>
+
+<p>
+<em>Updating</em>:
+The imports will be updated by fix tool.
+Single-template uses will be otherwise be largely unaffected.
+Code that uses multiple templates in concert will need to be updated by hand.
+The <a href="/pkg/text/template/#examples">examples</a> in
+the documentation for <code>text/template</code> can provide guidance.
+</p>
+
+<h3 id="testing">The testing package</h3>
+
+<p>
+The testing package has a type, <code>B</code>, passed as an argument to benchmark functions.
+In Go 1, <code>B</code> has new methods, analogous to those of <code>T</code>, enabling
+logging and failure reporting.
+</p>
+
+<pre><!--{{code "/doc/progs/go1.go" `/func.*Benchmark/` `/^}/`}}
+-->func BenchmarkSprintf(b *testing.B) {
+    <span class="comment">// Verify correctness before running benchmark.</span>
+    b.StopTimer()
+    got := fmt.Sprintf(&#34;%x&#34;, 23)
+    const expect = &#34;17&#34;
+    if expect != got {
+        b.Fatalf(&#34;expected %q; got %q&#34;, expect, got)
+    }
+    b.StartTimer()
+    for i := 0; i &lt; b.N; i++ {
+        fmt.Sprintf(&#34;%x&#34;, 23)
+    }
+}</pre>
+
+<p>
+<em>Updating</em>:
+Existing code is unaffected, although benchmarks that use <code>println</code>
+or <code>panic</code> should be updated to use the new methods.
+</p>
+
+<h3 id="testing_script">The testing/script package</h3>
+
+<p>
+The testing/script package has been deleted. It was a dreg.
+</p>
+
+<p>
+<em>Updating</em>:
+No code is likely to be affected.
+</p>
+
+<h3 id="unsafe">The unsafe package</h3>
+
+<p>
+In Go 1, the functions
+<code>unsafe.Typeof</code>, <code>unsafe.Reflect</code>,
+<code>unsafe.Unreflect</code>, <code>unsafe.New</code>, and
+<code>unsafe.NewArray</code> have been removed;
+they duplicated safer functionality provided by
+package <a href="/pkg/reflect/"><code>reflect</code></a>.
+</p>
+
+<p>
+<em>Updating</em>:
+Code using these functions must be rewritten to use
+package <a href="/pkg/reflect/"><code>reflect</code></a>.
+The changes to <a href="http://code.google.com/p/go/source/detail?r=2646dc956207">encoding/gob</a> and the <a href="http://code.google.com/p/goprotobuf/source/detail?r=5340ad310031">protocol buffer library</a>
+may be helpful as examples.
+</p>
+
+<h3 id="url">The url package</h3>
+
+<p>
+In Go 1 several fields from the <a href="/pkg/net/url/#URL"><code>url.URL</code></a> type
+were removed or replaced.
+</p>
+
+<p>
+The <a href="/pkg/net/url/#URL.String"><code>String</code></a> method now
+predictably rebuilds an encoded URL string using all of <code>URL</code>'s
+fields as necessary. The resulting string will also no longer have
+passwords escaped.
+</p>
+
+<p>
+The <code>Raw</code> field has been removed. In most cases the <code>String</code>
+method may be used in its place.
+</p>
+
+<p>
+The old <code>RawUserinfo</code> field is replaced by the <code>User</code>
+field, of type <a href="/pkg/net/url/#Userinfo"><code>*net.Userinfo</code></a>.
+Values of this type may be created using the new <a href="/pkg/net/url/#User"><code>net.User</code></a>
+and <a href="/pkg/net/url/#UserPassword"><code>net.UserPassword</code></a>
+functions. The <code>EscapeUserinfo</code> and <code>UnescapeUserinfo</code>
+functions are also gone.
+</p>
+
+<p>
+The <code>RawAuthority</code> field has been removed. The same information is
+available in the <code>Host</code> and <code>User</code> fields.
+</p>
+
+<p>
+The <code>RawPath</code> field and the <code>EncodedPath</code> method have
+been removed. The path information in rooted URLs (with a slash following the
+schema) is now available only in decoded form in the <code>Path</code> field.
+Occasionally, the encoded data may be required to obtain information that
+was lost in the decoding process. These cases must be handled by accessing
+the data the URL was built from.
+</p>
+
+<p>
+URLs with non-rooted paths, such as <code>"mailto:dev@golang.org?subject=Hi"</code>,
+are also handled differently. The <code>OpaquePath</code> boolean field has been
+removed and a new <code>Opaque</code> string field introduced to hold the encoded
+path for such URLs. In Go 1, the cited URL parses as:
+</p>
+
+<pre>
+    URL{
+        Scheme: "mailto",
+        Opaque: "dev@golang.org",
+        RawQuery: "subject=Hi",
+    }
+</pre>
+
+<p>
+A new <a href="/pkg/net/url/#URL.RequestURI"><code>RequestURI</code></a> method was
+added to <code>URL</code>.
+</p>
+
+<p>
+The <code>ParseWithReference</code> function has been renamed to <code>ParseWithFragment</code>.
+</p>
+
+<p>
+<em>Updating</em>:
+Code that uses the old fields will fail to compile and must be updated by hand.
+The semantic changes make it difficult for the fix tool to update automatically.
+</p>
+
+<h2 id="cmd_go">The go command</h2>
+
+<p>
+Go 1 introduces the <a href="/cmd/go/">go command</a>, a tool for fetching,
+building, and installing Go packages and commands. The <code>go</code> command
+does away with makefiles, instead using Go source code to find dependencies and
+determine build conditions. Most existing Go programs will no longer require
+makefiles to be built.
+</p>
+
+<p>
+See <a href="/doc/code.html">How to Write Go Code</a> for a primer on the
+<code>go</code> command and the <a href="/cmd/go/">go command documentation</a>
+for the full details.
+</p>
+
+<p>
+<em>Updating</em>:
+Projects that depend on the Go project's old makefile-based build
+infrastructure (<code>Make.pkg</code>, <code>Make.cmd</code>, and so on) should
+switch to using the <code>go</code> command for building Go code and, if
+necessary, rewrite their makefiles to perform any auxiliary build tasks.
+</p>
+
+<h2 id="cmd_cgo">The cgo command</h2>
+
+<p>
+In Go 1, the <a href="/cmd/cgo">cgo command</a>
+uses a different <code>_cgo_export.h</code>
+file, which is generated for packages containing <code>//export</code> lines.
+The <code>_cgo_export.h</code> file now begins with the C preamble comment,
+so that exported function definitions can use types defined there.
+This has the effect of compiling the preamble multiple times, so a
+package using <code>//export</code> must not put function definitions
+or variable initializations in the C preamble.
+</p>
+
+<h2 id="releases">Packaged releases</h2>
+
+<p>
+One of the most significant changes associated with Go 1 is the availability
+of prepackaged, downloadable distributions.
+They are available for many combinations of architecture and operating system
+(including Windows) and the list will grow.
+Installation details are described on the
+<a href="/doc/install">Getting Started</a> page, while
+the distributions themselves are listed on the
+<a href="http://code.google.com/p/go/downloads/list">downloads page</a>.
+
+
+</div>
+
+<div id="footer">
+Build version go1.0.1.<br>
+A link <a href="http://code.google.com/policies.html#restrictions">noted</a>,
+and then, coming up on the very next line, we will
+find yet another link, link 3.0 if you will,
+after a few more words <a href="/LINK">link text</a>.<br>
+<a href="/doc/tos.html">Terms of Service</a> | 
+<a href="http://www.google.com/intl/en/privacy/privacy-policy.html">Privacy Policy</a>
+</div>
+
+<script type="text/javascript">
+(function() {
+  var ga = document.createElement("script"); ga.type = "text/javascript"; ga.async = true;
+  ga.src = ("https:" == document.location.protocol ? "https://ssl" : "http://www") + ".google-analytics.com/ga.js";
+  var s = document.getElementsByTagName("script")[0]; s.parentNode.insertBefore(ga, s);
+})();
+</script>
+</body>
+<script type="text/javascript">
+  (function() {
+    var po = document.createElement('script'); po.type = 'text/javascript'; po.async = true;
+    po.src = 'https://apis.google.com/js/minusone.js';
+    var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(po, s);
+  })();
+</script>
+</html>
+
diff --git a/internal/html/testdata/webkit/README b/internal/html/testdata/webkit/README
new file mode 100644 (file)
index 0000000..9b4c2d8
--- /dev/null
@@ -0,0 +1,28 @@
+The *.dat files in this directory are copied from The WebKit Open Source
+Project, specifically $WEBKITROOT/LayoutTests/html5lib/resources.
+WebKit is licensed under a BSD style license.
+http://webkit.org/coding/bsd-license.html says:
+
+Copyright (C) 2009 Apple Inc. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS "AS IS" AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/internal/html/testdata/webkit/adoption01.dat b/internal/html/testdata/webkit/adoption01.dat
new file mode 100644 (file)
index 0000000..38f98ef
--- /dev/null
@@ -0,0 +1,354 @@
+#data
+<a><p></a></p>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,10): adoption-agency-1.3
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|     <p>
+|       <a>
+
+#data
+<a>1<p>2</a>3</p>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,12): adoption-agency-1.3
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       "1"
+|     <p>
+|       <a>
+|         "2"
+|       "3"
+
+#data
+<a>1<button>2</a>3</button>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,17): adoption-agency-1.3
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       "1"
+|     <button>
+|       <a>
+|         "2"
+|       "3"
+
+#data
+<a>1<b>2</a>3</b>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,12): adoption-agency-1.3
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       "1"
+|       <b>
+|         "2"
+|     <b>
+|       "3"
+
+#data
+<a>1<div>2<div>3</a>4</div>5</div>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,20): adoption-agency-1.3
+(1,20): adoption-agency-1.3
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       "1"
+|     <div>
+|       <a>
+|         "2"
+|       <div>
+|         <a>
+|           "3"
+|         "4"
+|       "5"
+
+#data
+<table><a>1<p>2</a>3</p>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,10): unexpected-start-tag-implies-table-voodoo
+(1,11): unexpected-character-implies-table-voodoo
+(1,14): unexpected-start-tag-implies-table-voodoo
+(1,15): unexpected-character-implies-table-voodoo
+(1,19): unexpected-end-tag-implies-table-voodoo
+(1,19): adoption-agency-1.3
+(1,20): unexpected-character-implies-table-voodoo
+(1,24): unexpected-end-tag-implies-table-voodoo
+(1,24): eof-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       "1"
+|     <p>
+|       <a>
+|         "2"
+|       "3"
+|     <table>
+
+#data
+<b><b><a><p></a>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,16): adoption-agency-1.3
+(1,16): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <b>
+|         <a>
+|         <p>
+|           <a>
+
+#data
+<b><a><b><p></a>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,16): adoption-agency-1.3
+(1,16): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <a>
+|         <b>
+|       <b>
+|         <p>
+|           <a>
+
+#data
+<a><b><b><p></a>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,16): adoption-agency-1.3
+(1,16): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       <b>
+|         <b>
+|     <b>
+|       <b>
+|         <p>
+|           <a>
+
+#data
+<p>1<s id="A">2<b id="B">3</p>4</s>5</b>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,30): unexpected-end-tag
+(1,35): adoption-agency-1.3
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "1"
+|       <s>
+|         id="A"
+|         "2"
+|         <b>
+|           id="B"
+|           "3"
+|     <s>
+|       id="A"
+|       <b>
+|         id="B"
+|         "4"
+|     <b>
+|       id="B"
+|       "5"
+
+#data
+<table><a>1<td>2</td>3</table>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,10): unexpected-start-tag-implies-table-voodoo
+(1,11): unexpected-character-implies-table-voodoo
+(1,15): unexpected-cell-in-table-body
+(1,30): unexpected-implied-end-tag-in-table-view
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       "1"
+|     <a>
+|       "3"
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             "2"
+
+#data
+<table>A<td>B</td>C</table>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,8): unexpected-character-implies-table-voodoo
+(1,12): unexpected-cell-in-table-body
+(1,22): unexpected-character-implies-table-voodoo
+#document
+| <html>
+|   <head>
+|   <body>
+|     "AC"
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             "B"
+
+#data
+<a><svg><tr><input></a>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,23): unexpected-end-tag
+(1,23): adoption-agency-1.3
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       <svg svg>
+|         <svg tr>
+|           <svg input>
+
+#data
+<div><a><b><div><div><div><div><div><div><div><div><div><div></a>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,65): adoption-agency-1.3
+(1,65): adoption-agency-1.3
+(1,65): adoption-agency-1.3
+(1,65): adoption-agency-1.3
+(1,65): adoption-agency-1.3
+(1,65): adoption-agency-1.3
+(1,65): adoption-agency-1.3
+(1,65): adoption-agency-1.3
+(1,65): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <a>
+|         <b>
+|       <b>
+|         <div>
+|           <a>
+|           <div>
+|             <a>
+|             <div>
+|               <a>
+|               <div>
+|                 <a>
+|                 <div>
+|                   <a>
+|                   <div>
+|                     <a>
+|                     <div>
+|                       <a>
+|                       <div>
+|                         <a>
+|                           <div>
+|                             <div>
+
+#data
+<div><a><b><u><i><code><div></a>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,32): adoption-agency-1.3
+(1,32): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <a>
+|         <b>
+|           <u>
+|             <i>
+|               <code>
+|       <u>
+|         <i>
+|           <code>
+|             <div>
+|               <a>
+
+#data
+<b><b><b><b>x</b></b></b></b>y
+#errors
+(1,3): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <b>
+|         <b>
+|           <b>
+|             "x"
+|     "y"
+
+#data
+<p><b><b><b><b><p>x
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,18): unexpected-end-tag
+(1,19): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <b>
+|         <b>
+|           <b>
+|             <b>
+|     <p>
+|       <b>
+|         <b>
+|           <b>
+|             "x"
+
+#data
+<b><em><foo><foob><fooc><aside></b></em>
+#errors
+(1,35): adoption-agency-1.3
+(1,40): adoption-agency-1.3
+(1,40): expected-closing-tag-but-got-eof
+#document-fragment
+div
+#document
+| <b>
+|   <em>
+|     <foo>
+|       <foob>
+|         <fooc>
+| <aside>
+|   <b>
diff --git a/internal/html/testdata/webkit/adoption02.dat b/internal/html/testdata/webkit/adoption02.dat
new file mode 100644 (file)
index 0000000..e54d803
--- /dev/null
@@ -0,0 +1,39 @@
+#data
+<b>1<i>2<p>3</b>4
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,16): adoption-agency-1.3
+(1,17): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       "1"
+|       <i>
+|         "2"
+|     <i>
+|       <p>
+|         <b>
+|           "3"
+|         "4"
+
+#data
+<a><div><style></style><address><a>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,35): unexpected-start-tag-implies-end-tag
+(1,35): adoption-agency-1.3
+(1,35): adoption-agency-1.3
+(1,35): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|     <div>
+|       <a>
+|         <style>
+|       <address>
+|         <a>
+|         <a>
diff --git a/internal/html/testdata/webkit/blocks.dat b/internal/html/testdata/webkit/blocks.dat
new file mode 100644 (file)
index 0000000..5d3871e
--- /dev/null
@@ -0,0 +1,719 @@
+#data
+<!doctype html><p>foo<address>bar<p>baz
+#errors
+(1,39): expected-closing-tag-but-got-eof
+30: Unclosed element “address”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <address>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><address><p>foo</address>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <address>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!doctype html><p>foo<article>bar<p>baz
+#errors
+(1,39): expected-closing-tag-but-got-eof
+30: Unclosed element “article”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <article>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><article><p>foo</article>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <article>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!doctype html><p>foo<aside>bar<p>baz
+#errors
+(1,37): expected-closing-tag-but-got-eof
+28: Unclosed element “aside”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <aside>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><aside><p>foo</aside>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <aside>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!doctype html><p>foo<blockquote>bar<p>baz
+#errors
+(1,42): expected-closing-tag-but-got-eof
+33: Unclosed element “blockquote”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <blockquote>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><blockquote><p>foo</blockquote>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <blockquote>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!doctype html><p>foo<center>bar<p>baz
+#errors
+(1,38): expected-closing-tag-but-got-eof
+29: Unclosed element “center”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <center>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><center><p>foo</center>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <center>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!doctype html><p>foo<details>bar<p>baz
+#errors
+(1,39): expected-closing-tag-but-got-eof
+30: Unclosed element “details”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <details>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><details><p>foo</details>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <details>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!doctype html><p>foo<dialog>bar<p>baz
+#errors
+(1,38): expected-closing-tag-but-got-eof
+29: Unclosed element “dialog”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <dialog>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><dialog><p>foo</dialog>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <dialog>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!doctype html><p>foo<dir>bar<p>baz
+#errors
+(1,35): expected-closing-tag-but-got-eof
+26: Unclosed element “dir”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <dir>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><dir><p>foo</dir>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <dir>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!doctype html><p>foo<div>bar<p>baz
+#errors
+(1,35): expected-closing-tag-but-got-eof
+26: Unclosed element “div”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <div>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><div><p>foo</div>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!doctype html><p>foo<dl>bar<p>baz
+#errors
+(1,34): expected-closing-tag-but-got-eof
+25: Unclosed element “dl”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <dl>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><dl><p>foo</dl>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <dl>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!doctype html><p>foo<fieldset>bar<p>baz
+#errors
+(1,40): expected-closing-tag-but-got-eof
+31: Unclosed element “fieldset”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <fieldset>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><fieldset><p>foo</fieldset>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <fieldset>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!doctype html><p>foo<figcaption>bar<p>baz
+#errors
+(1,42): expected-closing-tag-but-got-eof
+33: Unclosed element “figcaption”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <figcaption>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><figcaption><p>foo</figcaption>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <figcaption>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!doctype html><p>foo<figure>bar<p>baz
+#errors
+(1,38): expected-closing-tag-but-got-eof
+29: Unclosed element “figure”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <figure>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><figure><p>foo</figure>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <figure>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!doctype html><p>foo<footer>bar<p>baz
+#errors
+(1,38): expected-closing-tag-but-got-eof
+29: Unclosed element “footer”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <footer>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><footer><p>foo</footer>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <footer>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!doctype html><p>foo<header>bar<p>baz
+#errors
+(1,38): expected-closing-tag-but-got-eof
+29: Unclosed element “header”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <header>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><header><p>foo</header>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <header>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!doctype html><p>foo<hgroup>bar<p>baz
+#errors
+(1,38): expected-closing-tag-but-got-eof
+29: Unclosed element “hgroup”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <hgroup>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><hgroup><p>foo</hgroup>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <hgroup>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!doctype html><p>foo<listing>bar<p>baz
+#errors
+(1,39): expected-closing-tag-but-got-eof
+30: Unclosed element “listing”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <listing>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><listing><p>foo</listing>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <listing>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!doctype html><p>foo<menu>bar<p>baz
+#errors
+(1,36): expected-closing-tag-but-got-eof
+27: Unclosed element “menu”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <menu>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><menu><p>foo</menu>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <menu>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!doctype html><p>foo<nav>bar<p>baz
+#errors
+(1,35): expected-closing-tag-but-got-eof
+26: Unclosed element “nav”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <nav>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><nav><p>foo</nav>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <nav>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!doctype html><p>foo<ol>bar<p>baz
+#errors
+(1,34): expected-closing-tag-but-got-eof
+25: Unclosed element “ol”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <ol>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><ol><p>foo</ol>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <ol>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!doctype html><p>foo<pre>bar<p>baz
+#errors
+(1,35): expected-closing-tag-but-got-eof
+26: Unclosed element “pre”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <pre>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><pre><p>foo</pre>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <pre>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!doctype html><p>foo<section>bar<p>baz
+#errors
+(1,39): expected-closing-tag-but-got-eof
+30: Unclosed element “section”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <section>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><section><p>foo</section>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <section>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!doctype html><p>foo<summary>bar<p>baz
+#errors
+(1,39): expected-closing-tag-but-got-eof
+30: Unclosed element “summary”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <summary>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><summary><p>foo</summary>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <summary>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!doctype html><p>foo<ul>bar<p>baz
+#errors
+(1,34): expected-closing-tag-but-got-eof
+25: Unclosed element “ul”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <ul>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><ul><p>foo</ul>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <ul>
+|       <p>
+|         "foo"
+|     "bar"
diff --git a/internal/html/testdata/webkit/comments01.dat b/internal/html/testdata/webkit/comments01.dat
new file mode 100644 (file)
index 0000000..fa79c2b
--- /dev/null
@@ -0,0 +1,224 @@
+#data
+FOO<!-- BAR -->BAZ
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <!--  BAR  -->
+|     "BAZ"
+
+#data
+FOO<!-- BAR --!>BAZ
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,15): unexpected-bang-after-double-dash-in-comment
+#new-errors
+(1:16) incorrectly-closed-comment
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <!--  BAR  -->
+|     "BAZ"
+
+#data
+FOO<!-- BAR --! >BAZ
+#errors
+(1,3): expected-doctype-but-got-chars
+#new-errors
+(1:20) eof-in-comment
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <!--  BAR --! >BAZ -->
+
+#data
+FOO<!-- BAR --!
+>BAZ
+#errors
+(1,3): expected-doctype-but-got-chars
+#new-errors
+(1:20) eof-in-comment
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <!--  BAR --!
+>BAZ -->
+
+#data
+FOO<!-- BAR --   >BAZ
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,15): unexpected-char-in-comment
+(1,21): eof-in-comment
+#new-errors
+(1:22) eof-in-comment
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <!--  BAR --   >BAZ -->
+
+#data
+FOO<!-- BAR -- <QUX> -- MUX -->BAZ
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,15): unexpected-char-in-comment
+(1,24): unexpected-char-in-comment
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <!--  BAR -- <QUX> -- MUX  -->
+|     "BAZ"
+
+#data
+FOO<!-- BAR -- <QUX> -- MUX --!>BAZ
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,15): unexpected-char-in-comment
+(1,24): unexpected-char-in-comment
+(1,31): unexpected-bang-after-double-dash-in-comment
+#new-errors
+(1:32) incorrectly-closed-comment
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <!--  BAR -- <QUX> -- MUX  -->
+|     "BAZ"
+
+#data
+FOO<!-- BAR -- <QUX> -- MUX -- >BAZ
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,15): unexpected-char-in-comment
+(1,24): unexpected-char-in-comment
+(1,31): unexpected-char-in-comment
+(1,35): eof-in-comment
+#new-errors
+(1:36) eof-in-comment
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <!--  BAR -- <QUX> -- MUX -- >BAZ -->
+
+#data
+FOO<!---->BAZ
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <!--  -->
+|     "BAZ"
+
+#data
+FOO<!--->BAZ
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,9): incorrect-comment
+#new-errors
+(1:9) abrupt-closing-of-empty-comment
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <!--  -->
+|     "BAZ"
+
+#data
+FOO<!-->BAZ
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,8): incorrect-comment
+#new-errors
+(1:8) abrupt-closing-of-empty-comment
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <!--  -->
+|     "BAZ"
+
+#data
+<?xml version="1.0">Hi
+#errors
+(1,1): expected-tag-name-but-got-question-mark
+(1,22): expected-doctype-but-got-chars
+#new-errors
+(1:2) unexpected-question-mark-instead-of-tag-name
+#document
+| <!-- ?xml version="1.0" -->
+| <html>
+|   <head>
+|   <body>
+|     "Hi"
+
+#data
+<?xml version="1.0">
+#errors
+(1,1): expected-tag-name-but-got-question-mark
+(1,20): expected-doctype-but-got-eof
+#new-errors
+(1:2) unexpected-question-mark-instead-of-tag-name
+#document
+| <!-- ?xml version="1.0" -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<?xml version
+#errors
+(1,1): expected-tag-name-but-got-question-mark
+(1,13): expected-doctype-but-got-eof
+#new-errors
+(1:2) unexpected-question-mark-instead-of-tag-name
+#document
+| <!-- ?xml version -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+FOO<!----->BAZ
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,10): unexpected-dash-after-double-dash-in-comment
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <!-- - -->
+|     "BAZ"
+
+#data
+<html><!-- comment --><title>Comment before head</title>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <!--  comment  -->
+|   <head>
+|     <title>
+|       "Comment before head"
+|   <body>
diff --git a/internal/html/testdata/webkit/doctype01.dat b/internal/html/testdata/webkit/doctype01.dat
new file mode 100644 (file)
index 0000000..c845bec
--- /dev/null
@@ -0,0 +1,470 @@
+#data
+<!DOCTYPE html>Hello
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!dOctYpE HtMl>Hello
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPEhtml>Hello
+#errors
+(1,9): need-space-after-doctype
+#new-errors
+(1:10) missing-whitespace-before-doctype-name
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE>Hello
+#errors
+(1,9): need-space-after-doctype
+(1,10): expected-doctype-name-but-got-right-bracket
+(1,10): unknown-doctype
+#new-errors
+(1:10) missing-doctype-name
+#document
+| <!DOCTYPE >
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE >Hello
+#errors
+(1,11): expected-doctype-name-but-got-right-bracket
+(1,11): unknown-doctype
+#new-errors
+(1:11) missing-doctype-name
+#document
+| <!DOCTYPE >
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato>Hello
+#errors
+(1,17): unknown-doctype
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato >Hello
+#errors
+(1,18): unknown-doctype
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato taco>Hello
+#errors
+(1,17): expected-space-or-right-bracket-in-doctype
+(1,22): unknown-doctype
+#new-errors
+(1:18) invalid-character-sequence-after-doctype-name
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato taco "ddd>Hello
+#errors
+(1,17): expected-space-or-right-bracket-in-doctype
+(1,27): unknown-doctype
+#new-errors
+(1:18) invalid-character-sequence-after-doctype-name
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato sYstEM>Hello
+#errors
+(1,24): unexpected-char-in-doctype
+(1,24): unknown-doctype
+#new-errors
+(1:24) missing-doctype-system-identifier
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato sYstEM    >Hello
+#errors
+(1,28): unexpected-char-in-doctype
+(1,28): unknown-doctype
+#new-errors
+(1:28) missing-doctype-system-identifier
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE   potato       sYstEM  ggg>Hello
+#errors
+(1,34): unexpected-char-in-doctype
+(1,37): unknown-doctype
+#new-errors
+(1:34) missing-quote-before-doctype-system-identifier
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato SYSTEM taco  >Hello
+#errors
+(1,25): unexpected-char-in-doctype
+(1,31): unknown-doctype
+#new-errors
+(1:25) missing-quote-before-doctype-system-identifier
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato SYSTEM 'taco"'>Hello
+#errors
+(1,32): unknown-doctype
+#document
+| <!DOCTYPE potato "" "taco"">
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato SYSTEM "taco">Hello
+#errors
+(1,31): unknown-doctype
+#document
+| <!DOCTYPE potato "" "taco">
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato SYSTEM "tai'co">Hello
+#errors
+(1,33): unknown-doctype
+#document
+| <!DOCTYPE potato "" "tai'co">
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato SYSTEMtaco "ddd">Hello
+#errors
+(1,24): unexpected-char-in-doctype
+(1,34): unknown-doctype
+#new-errors
+(1:24) missing-quote-before-doctype-system-identifier
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato grass SYSTEM taco>Hello
+#errors
+(1,17): expected-space-or-right-bracket-in-doctype
+(1,35): unknown-doctype
+#new-errors
+(1:18) invalid-character-sequence-after-doctype-name
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato pUbLIc>Hello
+#errors
+(1,24): unexpected-end-of-doctype
+(1,24): unknown-doctype
+#new-errors
+(1:24) missing-doctype-public-identifier
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato pUbLIc >Hello
+#errors
+(1,25): unexpected-end-of-doctype
+(1,25): unknown-doctype
+#new-errors
+(1:25) missing-doctype-public-identifier
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato pUbLIcgoof>Hello
+#errors
+(1,24): unexpected-char-in-doctype
+(1,28): unknown-doctype
+#new-errors
+(1:24) missing-quote-before-doctype-public-identifier
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato PUBLIC goof>Hello
+#errors
+(1,25): unexpected-char-in-doctype
+(1,29): unknown-doctype
+#new-errors
+(1:25) missing-quote-before-doctype-public-identifier
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato PUBLIC "go'of">Hello
+#errors
+(1,32): unknown-doctype
+#document
+| <!DOCTYPE potato "go'of" "">
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato PUBLIC 'go'of'>Hello
+#errors
+(1,29): unexpected-char-in-doctype
+(1,32): unknown-doctype
+#new-errors
+(1:29) missing-quote-before-doctype-system-identifier
+#document
+| <!DOCTYPE potato "go" "">
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato PUBLIC 'go:hh   of' >Hello
+#errors
+(1,38): unknown-doctype
+#document
+| <!DOCTYPE potato "go:hh   of" "">
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato PUBLIC "W3C-//dfdf" SYSTEM ggg>Hello
+#errors
+(1,38): unexpected-char-in-doctype
+(1,48): unknown-doctype
+#new-errors
+(1:38) missing-quote-before-doctype-system-identifier
+#document
+| <!DOCTYPE potato "W3C-//dfdf" "">
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+   "http://www.w3.org/TR/html4/strict.dtd">Hello
+#errors
+#document
+| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE ...>Hello
+#errors
+(1,14): unknown-doctype
+#document
+| <!DOCTYPE ...>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+#errors
+(2,58): unknown-doctype
+#document
+| <!DOCTYPE html "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
+#errors
+(2,54): unknown-doctype
+#document
+| <!DOCTYPE html "-//W3C//DTD XHTML 1.0 Frameset//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE root-element [SYSTEM OR PUBLIC FPI] "uri" [ 
+<!-- internal declarations -->
+]>
+#errors
+(1,23): expected-space-or-right-bracket-in-doctype
+(2,30): unknown-doctype
+#new-errors
+(1:24) invalid-character-sequence-after-doctype-name
+#document
+| <!DOCTYPE root-element>
+| <html>
+|   <head>
+|   <body>
+|     "]>"
+
+#data
+<!DOCTYPE html PUBLIC
+  "-//WAPFORUM//DTD XHTML Mobile 1.0//EN"
+    "http://www.wapforum.org/DTD/xhtml-mobile10.dtd">
+#errors
+(3,53): unknown-doctype
+#document
+| <!DOCTYPE html "-//WAPFORUM//DTD XHTML Mobile 1.0//EN" "http://www.wapforum.org/DTD/xhtml-mobile10.dtd">
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE HTML SYSTEM "http://www.w3.org/DTD/HTML4-strict.dtd"><body><b>Mine!</b></body>
+#errors
+(1,63): unknown-doctype
+#document
+| <!DOCTYPE html "" "http://www.w3.org/DTD/HTML4-strict.dtd">
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       "Mine!"
+
+#data
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN""http://www.w3.org/TR/html4/strict.dtd">
+#errors
+(1,50): unexpected-char-in-doctype
+#new-errors
+(1:50) missing-whitespace-between-doctype-public-and-system-identifiers
+#document
+| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"'http://www.w3.org/TR/html4/strict.dtd'>
+#errors
+(1,50): unexpected-char-in-doctype
+#new-errors
+(1:50) missing-whitespace-between-doctype-public-and-system-identifiers
+#document
+| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE HTML PUBLIC"-//W3C//DTD HTML 4.01//EN"'http://www.w3.org/TR/html4/strict.dtd'>
+#errors
+(1,21): unexpected-char-in-doctype
+(1,49): unexpected-char-in-doctype
+#new-errors
+(1:22) missing-whitespace-after-doctype-public-keyword
+(1:49) missing-whitespace-between-doctype-public-and-system-identifiers
+#document
+| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE HTML PUBLIC'-//W3C//DTD HTML 4.01//EN''http://www.w3.org/TR/html4/strict.dtd'>
+#errors
+(1,21): unexpected-char-in-doctype
+(1,49): unexpected-char-in-doctype
+#new-errors
+(1:22) missing-whitespace-after-doctype-public-keyword
+(1:49) missing-whitespace-between-doctype-public-and-system-identifiers
+#document
+| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+| <html>
+|   <head>
+|   <body>
diff --git a/internal/html/testdata/webkit/domjs-unsafe.dat b/internal/html/testdata/webkit/domjs-unsafe.dat
new file mode 100644 (file)
index 0000000..0a1b10f
Binary files /dev/null and b/internal/html/testdata/webkit/domjs-unsafe.dat differ
diff --git a/internal/html/testdata/webkit/entities01.dat b/internal/html/testdata/webkit/entities01.dat
new file mode 100644 (file)
index 0000000..9bacebe
--- /dev/null
@@ -0,0 +1,943 @@
+#data
+FOO&gt;BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO>BAR"
+
+#data
+FOO&gtBAR
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,6): named-entity-without-semicolon
+#new-errors
+(1:7) missing-semicolon-after-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO>BAR"
+
+#data
+FOO&gt BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,6): named-entity-without-semicolon
+#new-errors
+(1:7) missing-semicolon-after-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO> BAR"
+
+#data
+FOO&gt;;;BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO>;;BAR"
+
+#data
+I'm &notit; I tell you
+#errors
+(1,4): expected-doctype-but-got-chars
+(1,9): named-entity-without-semicolon
+#new-errors
+(1:9) missing-semicolon-after-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "I'm ¬it; I tell you"
+
+#data
+I'm &notin; I tell you
+#errors
+(1,4): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "I'm ∉ I tell you"
+
+#data
+&ammmp;
+#errors
+(1,1): expected-doctype-but-got-chars
+(1,7): unknown-named-character-reference
+#new-errors
+(1:7) unknown-named-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "&ammmp;"
+
+#data
+&ammmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmp;
+#errors
+(1,1): expected-doctype-but-got-chars
+(1,950): unknown-named-character-reference
+#new-errors
+(1:950) unknown-named-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "&ammmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmp;"
+
+#data
+FOO& BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO& BAR"
+
+#data
+FOO&<BAR>
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,9): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO&"
+|     <bar>
+
+#data
+FOO&&&&gt;BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO&&&>BAR"
+
+#data
+FOO&#41;BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO)BAR"
+
+#data
+FOO&#x41;BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOABAR"
+
+#data
+FOO&#X41;BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOABAR"
+
+#data
+FOO&#BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,5): expected-numeric-entity
+#new-errors
+(1:6) absence-of-digits-in-numeric-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO&#BAR"
+
+#data
+FOO&#ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,5): expected-numeric-entity
+#new-errors
+(1:6) absence-of-digits-in-numeric-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO&#ZOO"
+
+#data
+FOO&#xBAR
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,7): expected-numeric-entity
+#new-errors
+(1:9) missing-semicolon-after-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOºR"
+
+#data
+FOO&#xZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,6): expected-numeric-entity
+#new-errors
+(1:7) absence-of-digits-in-numeric-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO&#xZOO"
+
+#data
+FOO&#XZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,6): expected-numeric-entity
+#new-errors
+(1:7) absence-of-digits-in-numeric-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO&#XZOO"
+
+#data
+FOO&#41BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,7): numeric-entity-without-semicolon
+#new-errors
+(1:8) missing-semicolon-after-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO)BAR"
+
+#data
+FOO&#x41BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,10): numeric-entity-without-semicolon
+#new-errors
+(1:11) missing-semicolon-after-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO䆺R"
+
+#data
+FOO&#x41ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,8): numeric-entity-without-semicolon
+#new-errors
+(1:9) missing-semicolon-after-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOAZOO"
+
+#data
+FOO&#x0000;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) null-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO�ZOO"
+
+#data
+FOO&#x0078;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOxZOO"
+
+#data
+FOO&#x0079;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOyZOO"
+
+#data
+FOO&#x0080;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO€ZOO"
+
+#data
+FOO&#x0081;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO\81ZOO"
+
+#data
+FOO&#x0082;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO‚ZOO"
+
+#data
+FOO&#x0083;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOƒZOO"
+
+#data
+FOO&#x0084;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO„ZOO"
+
+#data
+FOO&#x0085;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO…ZOO"
+
+#data
+FOO&#x0086;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO†ZOO"
+
+#data
+FOO&#x0087;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO‡ZOO"
+
+#data
+FOO&#x0088;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOˆZOO"
+
+#data
+FOO&#x0089;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO‰ZOO"
+
+#data
+FOO&#x008A;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOŠZOO"
+
+#data
+FOO&#x008B;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO‹ZOO"
+
+#data
+FOO&#x008C;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOŒZOO"
+
+#data
+FOO&#x008D;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO\8dZOO"
+
+#data
+FOO&#x008E;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOŽZOO"
+
+#data
+FOO&#x008F;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO\8fZOO"
+
+#data
+FOO&#x0090;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO\90ZOO"
+
+#data
+FOO&#x0091;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO‘ZOO"
+
+#data
+FOO&#x0092;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO’ZOO"
+
+#data
+FOO&#x0093;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO“ZOO"
+
+#data
+FOO&#x0094;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO”ZOO"
+
+#data
+FOO&#x0095;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO•ZOO"
+
+#data
+FOO&#x0096;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO–ZOO"
+
+#data
+FOO&#x0097;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO—ZOO"
+
+#data
+FOO&#x0098;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO˜ZOO"
+
+#data
+FOO&#x0099;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO™ZOO"
+
+#data
+FOO&#x009A;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOšZOO"
+
+#data
+FOO&#x009B;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO›ZOO"
+
+#data
+FOO&#x009C;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOœZOO"
+
+#data
+FOO&#x009D;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO\9dZOO"
+
+#data
+FOO&#x009E;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOžZOO"
+
+#data
+FOO&#x009F;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) control-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOŸZOO"
+
+#data
+FOO&#x00A0;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO ZOO"
+
+#data
+FOO&#xD7FF;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO퟿ZOO"
+
+#data
+FOO&#xD800;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) surrogate-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO�ZOO"
+
+#data
+FOO&#xD801;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) surrogate-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO�ZOO"
+
+#data
+FOO&#xDFFE;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) surrogate-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO�ZOO"
+
+#data
+FOO&#xDFFF;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,11): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:12) surrogate-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO�ZOO"
+
+#data
+FOO&#xE000;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOZOO"
+
+#data
+FOO&#x10FFFE;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,13): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:14) noncharacter-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO􏿾ZOO"
+
+#data
+FOO&#x1087D4;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO􈟔ZOO"
+
+#data
+FOO&#x10FFFF;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,13): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:14) noncharacter-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO􏿿ZOO"
+
+#data
+FOO&#x110000;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,13): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:14) character-reference-outside-unicode-range
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO�ZOO"
+
+#data
+FOO&#xFFFFFF;ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,13): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:14) character-reference-outside-unicode-range
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO�ZOO"
+
+#data
+FOO&#11111111111
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,13): illegal-codepoint-for-numeric-entity
+(1,13): eof-in-numeric-entity
+#new-errors
+(1:17) missing-semicolon-after-character-reference
+(1:17) character-reference-outside-unicode-range
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO�"
+
+#data
+FOO&#1111111111
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,13): illegal-codepoint-for-numeric-entity
+(1,13): eof-in-numeric-entity
+#new-errors
+(1:16) missing-semicolon-after-character-reference
+(1:16) character-reference-outside-unicode-range
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO�"
+
+#data
+FOO&#111111111111
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,13): illegal-codepoint-for-numeric-entity
+(1,13): eof-in-numeric-entity
+#new-errors
+(1:18) missing-semicolon-after-character-reference
+(1:18) character-reference-outside-unicode-range
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO�"
+
+#data
+FOO&#11111111111ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,16): numeric-entity-without-semicolon
+(1,16): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:17) missing-semicolon-after-character-reference
+(1:17) character-reference-outside-unicode-range
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO�ZOO"
+
+#data
+FOO&#1111111111ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,15): numeric-entity-without-semicolon
+(1,15): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:16) missing-semicolon-after-character-reference
+(1:16) character-reference-outside-unicode-range
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO�ZOO"
+
+#data
+FOO&#111111111111ZOO
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,17): numeric-entity-without-semicolon
+(1,17): illegal-codepoint-for-numeric-entity
+#new-errors
+(1:18) missing-semicolon-after-character-reference
+(1:18) character-reference-outside-unicode-range
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO�ZOO"
diff --git a/internal/html/testdata/webkit/entities02.dat b/internal/html/testdata/webkit/entities02.dat
new file mode 100644 (file)
index 0000000..0c6e898
--- /dev/null
@@ -0,0 +1,309 @@
+#data
+<div bar="ZZ&gt;YY"></div>
+#errors
+(1,20): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ>YY"
+
+#data
+<div bar="ZZ&"></div>
+#errors
+(1,15): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ&"
+
+#data
+<div bar='ZZ&'></div>
+#errors
+(1,15): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ&"
+
+#data
+<div bar=ZZ&></div>
+#errors
+(1,13): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ&"
+
+#data
+<div bar="ZZ&gt=YY"></div>
+#errors
+(1,15): named-entity-without-semicolon
+(1,20): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ&gt=YY"
+
+#data
+<div bar="ZZ&gt0YY"></div>
+#errors
+(1,20): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ&gt0YY"
+
+#data
+<div bar="ZZ&gt9YY"></div>
+#errors
+(1,20): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ&gt9YY"
+
+#data
+<div bar="ZZ&gtaYY"></div>
+#errors
+(1,20): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ&gtaYY"
+
+#data
+<div bar="ZZ&gtZYY"></div>
+#errors
+(1,20): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ&gtZYY"
+
+#data
+<div bar="ZZ&gt YY"></div>
+#errors
+(1,15): named-entity-without-semicolon
+(1,20): expected-doctype-but-got-start-tag
+#new-errors
+(1:16) missing-semicolon-after-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ> YY"
+
+#data
+<div bar="ZZ&gt"></div>
+#errors
+(1,15): named-entity-without-semicolon
+(1,17): expected-doctype-but-got-start-tag
+#new-errors
+(1:16) missing-semicolon-after-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ>"
+
+#data
+<div bar='ZZ&gt'></div>
+#errors
+(1,15): named-entity-without-semicolon
+(1,17): expected-doctype-but-got-start-tag
+#new-errors
+(1:16) missing-semicolon-after-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ>"
+
+#data
+<div bar=ZZ&gt></div>
+#errors
+(1,14): named-entity-without-semicolon
+(1,15): expected-doctype-but-got-start-tag
+#new-errors
+(1:15) missing-semicolon-after-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ>"
+
+#data
+<div bar="ZZ&pound_id=23"></div>
+#errors
+(1,18): named-entity-without-semicolon
+(1,26): expected-doctype-but-got-start-tag
+#new-errors
+(1:19) missing-semicolon-after-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ£_id=23"
+
+#data
+<div bar="ZZ&prod_id=23"></div>
+#errors
+(1,25): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ&prod_id=23"
+
+#data
+<div bar="ZZ&pound;_id=23"></div>
+#errors
+(1,27): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ£_id=23"
+
+#data
+<div bar="ZZ&prod;_id=23"></div>
+#errors
+(1,26): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ∏_id=23"
+
+#data
+<div bar="ZZ&pound=23"></div>
+#errors
+(1,18): named-entity-without-semicolon
+(1,23): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ&pound=23"
+
+#data
+<div bar="ZZ&prod=23"></div>
+#errors
+(1,22): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ&prod=23"
+
+#data
+<div>ZZ&pound_id=23</div>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,13): named-entity-without-semicolon
+#new-errors
+(1:14) missing-semicolon-after-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "ZZ£_id=23"
+
+#data
+<div>ZZ&prod_id=23</div>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "ZZ&prod_id=23"
+
+#data
+<div>ZZ&pound;_id=23</div>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "ZZ£_id=23"
+
+#data
+<div>ZZ&prod;_id=23</div>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "ZZ∏_id=23"
+
+#data
+<div>ZZ&pound=23</div>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,13): named-entity-without-semicolon
+#new-errors
+(1:14) missing-semicolon-after-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "ZZ£=23"
+
+#data
+<div>ZZ&prod=23</div>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "ZZ&prod=23"
+
+#data
+<div>ZZ&AElig=</div>
+#errors
+#new-errors
+(1:14) missing-semicolon-after-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "ZZÆ="
diff --git a/internal/html/testdata/webkit/foreign-fragment.dat b/internal/html/testdata/webkit/foreign-fragment.dat
new file mode 100644 (file)
index 0000000..c81ae81
--- /dev/null
@@ -0,0 +1,559 @@
+#data
+<nobr>X
+#errors
+6: HTML start tag “nobr” in a foreign namespace context.
+7: End of file seen and there were open elements.
+6: Unclosed element “nobr”.
+#document-fragment
+svg path
+#document
+| <svg nobr>
+|   "X"
+
+#data
+<font color></font>X
+#errors
+12: HTML start tag “font” in a foreign namespace context.
+#document-fragment
+svg path
+#document
+| <svg font>
+|   color=""
+| "X"
+
+#data
+<font></font>X
+#errors
+#document-fragment
+svg path
+#document
+| <svg font>
+| "X"
+
+#data
+<g></path>X
+#errors
+10: End tag “path” did not match the name of the current open element (“g”).
+11: End of file seen and there were open elements.
+3: Unclosed element “g”.
+#document-fragment
+svg path
+#document
+| <svg g>
+|   "X"
+
+#data
+</path>X
+#errors
+5: Stray end tag “path”.
+#document-fragment
+svg path
+#document
+| "X"
+
+#data
+</foreignObject>X
+#errors
+5: Stray end tag “foreignobject”.
+#document-fragment
+svg foreignObject
+#document
+| "X"
+
+#data
+</desc>X
+#errors
+5: Stray end tag “desc”.
+#document-fragment
+svg desc
+#document
+| "X"
+
+#data
+</title>X
+#errors
+5: Stray end tag “title”.
+#document-fragment
+svg title
+#document
+| "X"
+
+#data
+</svg>X
+#errors
+5: Stray end tag “svg”.
+#document-fragment
+svg svg
+#document
+| "X"
+
+#data
+</mfenced>X
+#errors
+5: Stray end tag “mfenced”.
+#document-fragment
+math mfenced
+#document
+| "X"
+
+#data
+</malignmark>X
+#errors
+5: Stray end tag “malignmark”.
+#document-fragment
+math malignmark
+#document
+| "X"
+
+#data
+</math>X
+#errors
+5: Stray end tag “math”.
+#document-fragment
+math math
+#document
+| "X"
+
+#data
+</annotation-xml>X
+#errors
+5: Stray end tag “annotation-xml”.
+#document-fragment
+math annotation-xml
+#document
+| "X"
+
+#data
+</mtext>X
+#errors
+5: Stray end tag “mtext”.
+#document-fragment
+math mtext
+#document
+| "X"
+
+#data
+</mi>X
+#errors
+5: Stray end tag “mi”.
+#document-fragment
+math mi
+#document
+| "X"
+
+#data
+</mo>X
+#errors
+5: Stray end tag “mo”.
+#document-fragment
+math mo
+#document
+| "X"
+
+#data
+</mn>X
+#errors
+5: Stray end tag “mn”.
+#document-fragment
+math mn
+#document
+| "X"
+
+#data
+</ms>X
+#errors
+5: Stray end tag “ms”.
+#document-fragment
+math ms
+#document
+| "X"
+
+#data
+<b></b><mglyph/><i></i><malignmark/><u></u><ms/>X
+#errors
+51: Self-closing syntax (“/>”) used on a non-void HTML element. Ignoring the slash and treating as a start tag.
+52: End of file seen and there were open elements.
+51: Unclosed element “ms”.
+#new-errors
+(1:44-1:49) non-void-html-element-start-tag-with-trailing-solidus
+#document-fragment
+math ms
+#document
+| <b>
+| <math mglyph>
+| <i>
+| <math malignmark>
+| <u>
+| <ms>
+|   "X"
+
+#data
+<malignmark></malignmark>
+#errors
+#document-fragment
+math ms
+#document
+| <math malignmark>
+
+#data
+<div></div>
+#errors
+#document-fragment
+math ms
+#document
+| <div>
+
+#data
+<figure></figure>
+#errors
+#document-fragment
+math ms
+#document
+| <figure>
+
+#data
+<b></b><mglyph/><i></i><malignmark/><u></u><mn/>X
+#errors
+51: Self-closing syntax (“/>”) used on a non-void HTML element. Ignoring the slash and treating as a start tag.
+52: End of file seen and there were open elements.
+51: Unclosed element “mn”.
+#new-errors
+(1:44-1:49) non-void-html-element-start-tag-with-trailing-solidus
+#document-fragment
+math mn
+#document
+| <b>
+| <math mglyph>
+| <i>
+| <math malignmark>
+| <u>
+| <mn>
+|   "X"
+
+#data
+<malignmark></malignmark>
+#errors
+#document-fragment
+math mn
+#document
+| <math malignmark>
+
+#data
+<div></div>
+#errors
+#document-fragment
+math mn
+#document
+| <div>
+
+#data
+<figure></figure>
+#errors
+#document-fragment
+math mn
+#document
+| <figure>
+
+#data
+<b></b><mglyph/><i></i><malignmark/><u></u><mo/>X
+#errors
+51: Self-closing syntax (“/>”) used on a non-void HTML element. Ignoring the slash and treating as a start tag.
+52: End of file seen and there were open elements.
+51: Unclosed element “mo”.
+#new-errors
+(1:44-1:49) non-void-html-element-start-tag-with-trailing-solidus
+#document-fragment
+math mo
+#document
+| <b>
+| <math mglyph>
+| <i>
+| <math malignmark>
+| <u>
+| <mo>
+|   "X"
+
+#data
+<malignmark></malignmark>
+#errors
+#document-fragment
+math mo
+#document
+| <math malignmark>
+
+#data
+<div></div>
+#errors
+#document-fragment
+math mo
+#document
+| <div>
+
+#data
+<figure></figure>
+#errors
+#document-fragment
+math mo
+#document
+| <figure>
+
+#data
+<b></b><mglyph/><i></i><malignmark/><u></u><mi/>X
+#errors
+51: Self-closing syntax (“/>”) used on a non-void HTML element. Ignoring the slash and treating as a start tag.
+52: End of file seen and there were open elements.
+51: Unclosed element “mi”.
+#new-errors
+(1:44-1:49) non-void-html-element-start-tag-with-trailing-solidus
+#document-fragment
+math mi
+#document
+| <b>
+| <math mglyph>
+| <i>
+| <math malignmark>
+| <u>
+| <mi>
+|   "X"
+
+#data
+<malignmark></malignmark>
+#errors
+#document-fragment
+math mi
+#document
+| <math malignmark>
+
+#data
+<div></div>
+#errors
+#document-fragment
+math mi
+#document
+| <div>
+
+#data
+<figure></figure>
+#errors
+#document-fragment
+math mi
+#document
+| <figure>
+
+#data
+<b></b><mglyph/><i></i><malignmark/><u></u><mtext/>X
+#errors
+51: Self-closing syntax (“/>”) used on a non-void HTML element. Ignoring the slash and treating as a start tag.
+52: End of file seen and there were open elements.
+51: Unclosed element “mtext”.
+#new-errors
+(1:44-1:52) non-void-html-element-start-tag-with-trailing-solidus
+#document-fragment
+math mtext
+#document
+| <b>
+| <math mglyph>
+| <i>
+| <math malignmark>
+| <u>
+| <mtext>
+|   "X"
+
+#data
+<malignmark></malignmark>
+#errors
+#document-fragment
+math mtext
+#document
+| <math malignmark>
+
+#data
+<div></div>
+#errors
+#document-fragment
+math mtext
+#document
+| <div>
+
+#data
+<figure></figure>
+#errors
+#document-fragment
+math mtext
+#document
+| <figure>
+
+#data
+<div></div>
+#errors
+5: HTML start tag “div” in a foreign namespace context.
+#document-fragment
+math annotation-xml
+#document
+| <math div>
+
+#data
+<figure></figure>
+#errors
+#document-fragment
+math annotation-xml
+#document
+| <math figure>
+
+#data
+<div></div>
+#errors
+5: HTML start tag “div” in a foreign namespace context.
+#document-fragment
+math math
+#document
+| <math div>
+
+#data
+<figure></figure>
+#errors
+#document-fragment
+math math
+#document
+| <math figure>
+
+#data
+<div></div>
+#errors
+#document-fragment
+svg foreignObject
+#document
+| <div>
+
+#data
+<figure></figure>
+#errors
+#document-fragment
+svg foreignObject
+#document
+| <figure>
+
+#data
+<div></div>
+#errors
+#document-fragment
+svg title
+#document
+| <div>
+
+#data
+<figure></figure>
+#errors
+#document-fragment
+svg title
+#document
+| <figure>
+
+#data
+<figure></figure>
+#errors
+#document-fragment
+svg desc
+#document
+| <figure>
+
+#data
+<div><h1>X</h1></div>
+#errors
+5: HTML start tag “div” in a foreign namespace context.
+9: HTML start tag “h1” in a foreign namespace context.
+#document-fragment
+svg svg
+#document
+| <svg div>
+|   <svg h1>
+|     "X"
+
+#data
+<div></div>
+#errors
+5: HTML start tag “div” in a foreign namespace context.
+#document-fragment
+svg svg
+#document
+| <svg div>
+
+#data
+<div></div>
+#errors
+#document-fragment
+svg desc
+#document
+| <div>
+
+#data
+<figure></figure>
+#errors
+#document-fragment
+svg desc
+#document
+| <figure>
+
+#data
+<plaintext><foo>
+#errors
+(1,16): expected-closing-tag-but-got-eof
+#document-fragment
+svg desc
+#document
+| <plaintext>
+|   "<foo>"
+
+#data
+<frameset>X
+#errors
+6: Stray start tag “frameset”.
+#document-fragment
+svg desc
+#document
+| "X"
+
+#data
+<head>X
+#errors
+6: Stray start tag “head”.
+#document-fragment
+svg desc
+#document
+| "X"
+
+#data
+<body>X
+#errors
+6: Stray start tag “body”.
+#document-fragment
+svg desc
+#document
+| "X"
+
+#data
+<html>X
+#errors
+6: Stray start tag “html”.
+#document-fragment
+svg desc
+#document
+| "X"
+
+#data
+<html class="foo">X
+#errors
+6: Stray start tag “html”.
+#document-fragment
+svg desc
+#document
+| "X"
+
+#data
+<body class="foo">X
+#errors
+6: Stray start tag “body”.
+#document-fragment
+svg desc
+#document
+| "X"
diff --git a/internal/html/testdata/webkit/html5test-com.dat b/internal/html/testdata/webkit/html5test-com.dat
new file mode 100644 (file)
index 0000000..f738010
--- /dev/null
@@ -0,0 +1,302 @@
+#data
+<div<div>
+#errors
+(1,9): expected-doctype-but-got-start-tag
+(1,9): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div<div>
+
+#data
+<div foo<bar=''>
+#errors
+(1,9): invalid-character-in-attribute-name
+(1,16): expected-doctype-but-got-start-tag
+(1,16): expected-closing-tag-but-got-eof
+#new-errors
+(1:9) unexpected-character-in-attribute-name
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       foo<bar=""
+
+#data
+<div foo=`bar`>
+#errors
+(1,10): equals-in-unquoted-attribute-value
+(1,14): unexpected-character-in-unquoted-attribute-value
+(1,15): expected-doctype-but-got-start-tag
+(1,15): expected-closing-tag-but-got-eof
+#new-errors
+(1:10) unexpected-character-in-unquoted-attribute-value
+(1:14) unexpected-character-in-unquoted-attribute-value
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       foo="`bar`"
+
+#data
+<div \"foo=''>
+#errors
+(1,7): invalid-character-in-attribute-name
+(1,14): expected-doctype-but-got-start-tag
+(1,14): expected-closing-tag-but-got-eof
+#new-errors
+(1:7) unexpected-character-in-attribute-name
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       \"foo=""
+
+#data
+<a href='\nbar'></a>
+#errors
+(1,16): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       href="\nbar"
+
+#data
+<!DOCTYPE html>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+
+#data
+&lang;&rang;
+#errors
+(1,6): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "⟨⟩"
+
+#data
+&apos;
+#errors
+(1,6): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "'"
+
+#data
+&ImaginaryI;
+#errors
+(1,12): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "ⅈ"
+
+#data
+&Kopf;
+#errors
+(1,6): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "𝕂"
+
+#data
+&notinva;
+#errors
+(1,9): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "∉"
+
+#data
+<?import namespace="foo" implementation="#bar">
+#errors
+(1,1): expected-tag-name-but-got-question-mark
+(1,47): expected-doctype-but-got-eof
+#new-errors
+(1:2) unexpected-question-mark-instead-of-tag-name
+#document
+| <!-- ?import namespace="foo" implementation="#bar" -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!--foo--bar-->
+#errors
+(1,10): unexpected-char-in-comment
+(1,15): expected-doctype-but-got-eof
+#document
+| <!-- foo--bar -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<![CDATA[x]]>
+#errors
+(1,2): expected-dashes-or-doctype
+(1,13): expected-doctype-but-got-eof
+#new-errors
+(1:9) cdata-in-html-content
+#document
+| <!-- [CDATA[x]] -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<textarea><!--</textarea>--></textarea>
+#errors
+(1,10): expected-doctype-but-got-start-tag
+(1,39): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "<!--"
+|     "-->"
+
+#data
+<textarea><!--</textarea>-->
+#errors
+(1,10): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "<!--"
+|     "-->"
+
+#data
+<style><!--</style>--></style>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,30): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|     <style>
+|       "<!--"
+|   <body>
+|     "-->"
+
+#data
+<style><!--</style>-->
+#errors
+(1,7): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <style>
+|       "<!--"
+|   <body>
+|     "-->"
+
+#data
+<ul><li>A </li> <li>B</li></ul>
+#errors
+(1,4): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ul>
+|       <li>
+|         "A "
+|       " "
+|       <li>
+|         "B"
+
+#data
+<table><form><input type=hidden><input></form><div></div></table>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,13): unexpected-form-in-table
+(1,32): unexpected-hidden-input-in-table
+(1,39): unexpected-start-tag-implies-table-voodoo
+(1,46): unexpected-end-tag-implies-table-voodoo
+(1,46): unexpected-end-tag
+(1,51): unexpected-start-tag-implies-table-voodoo
+(1,57): unexpected-end-tag-implies-table-voodoo
+#document
+| <html>
+|   <head>
+|   <body>
+|     <input>
+|     <div>
+|     <table>
+|       <form>
+|       <input>
+|         type="hidden"
+
+#data
+<i>A<b>B<p></i>C</b>D
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,15): adoption-agency-1.3
+(1,20): adoption-agency-1.3
+#document
+| <html>
+|   <head>
+|   <body>
+|     <i>
+|       "A"
+|       <b>
+|         "B"
+|     <b>
+|     <p>
+|       <b>
+|         <i>
+|         "C"
+|       "D"
+
+#data
+<div></div>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+
+#data
+<svg></svg>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+
+#data
+<math></math>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
diff --git a/internal/html/testdata/webkit/inbody01.dat b/internal/html/testdata/webkit/inbody01.dat
new file mode 100644 (file)
index 0000000..10f6520
--- /dev/null
@@ -0,0 +1,54 @@
+#data
+<button>1</foo>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,15): unexpected-end-tag
+(1,15): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <button>
+|       "1"
+
+#data
+<foo>1<p>2</foo>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,16): unexpected-end-tag
+(1,16): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <foo>
+|       "1"
+|       <p>
+|         "2"
+
+#data
+<dd>1</foo>
+#errors
+(1,4): expected-doctype-but-got-start-tag
+(1,11): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <dd>
+|       "1"
+
+#data
+<foo>1<dd>2</foo>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,17): unexpected-end-tag
+(1,17): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <foo>
+|       "1"
+|       <dd>
+|         "2"
diff --git a/internal/html/testdata/webkit/isindex.dat b/internal/html/testdata/webkit/isindex.dat
new file mode 100644 (file)
index 0000000..733f82e
--- /dev/null
@@ -0,0 +1,49 @@
+#data
+<isindex>
+#errors
+(1,9): expected-doctype-but-got-start-tag
+(1,9): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <isindex>
+
+#data
+<isindex name="A" action="B" prompt="C" foo="D">
+#errors
+(1,48): expected-doctype-but-got-start-tag
+(1,48): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <isindex>
+|       action="B"
+|       foo="D"
+|       name="A"
+|       prompt="C"
+
+#data
+<form><isindex>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,15): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <form>
+|       <isindex>
+
+#data
+<!doctype html><isindex>x</isindex>x
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <isindex>
+|       "x"
+|     "x"
diff --git a/internal/html/testdata/webkit/main-element.dat b/internal/html/testdata/webkit/main-element.dat
new file mode 100644 (file)
index 0000000..4b103bb
--- /dev/null
@@ -0,0 +1,46 @@
+#data
+<!doctype html><p>foo<main>bar<p>baz
+#errors
+(1,36): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|     <main>
+|       "bar"
+|       <p>
+|         "baz"
+
+#data
+<!doctype html><main><p>foo</main>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <main>
+|       <p>
+|         "foo"
+|     "bar"
+
+#data
+<!DOCTYPE html>xxx<svg><x><g><a><main><b>
+#errors
+ * (1,42) unexpected HTML-like start tag token in foreign content
+ * (1,42) unexpected end of file
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "xxx"
+|     <svg svg>
+|       <svg x>
+|         <svg g>
+|           <svg a>
+|             <svg main>
+|     <b>
diff --git a/internal/html/testdata/webkit/math.dat b/internal/html/testdata/webkit/math.dat
new file mode 100644 (file)
index 0000000..ae9cd7c
--- /dev/null
@@ -0,0 +1,81 @@
+#data
+<math><tr><td><mo><tr>
+#errors
+#document-fragment
+td
+#document
+| <math math>
+|   <math tr>
+|     <math td>
+|       <math mo>
+
+#data
+<math><tr><td><mo><tr>
+#errors
+#document-fragment
+tr
+#document
+| <math math>
+|   <math tr>
+|     <math td>
+|       <math mo>
+
+#data
+<math><thead><mo><tbody>
+#errors
+#document-fragment
+thead
+#document
+| <math math>
+|   <math thead>
+|     <math mo>
+
+#data
+<math><tfoot><mo><tbody>
+#errors
+#document-fragment
+tfoot
+#document
+| <math math>
+|   <math tfoot>
+|     <math mo>
+
+#data
+<math><tbody><mo><tfoot>
+#errors
+#document-fragment
+tbody
+#document
+| <math math>
+|   <math tbody>
+|     <math mo>
+
+#data
+<math><tbody><mo></table>
+#errors
+#document-fragment
+tbody
+#document
+| <math math>
+|   <math tbody>
+|     <math mo>
+
+#data
+<math><thead><mo></table>
+#errors
+#document-fragment
+tbody
+#document
+| <math math>
+|   <math thead>
+|     <math mo>
+
+#data
+<math><tfoot><mo></table>
+#errors
+#document-fragment
+tbody
+#document
+| <math math>
+|   <math tfoot>
+|     <math mo>
diff --git a/internal/html/testdata/webkit/menuitem-element.dat b/internal/html/testdata/webkit/menuitem-element.dat
new file mode 100644 (file)
index 0000000..43aa0c6
--- /dev/null
@@ -0,0 +1,257 @@
+#data
+<menuitem>
+#errors
+10: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
+10: End of file seen and there were open elements.
+10: Unclosed element “menuitem”.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <menuitem>
+
+#data
+</menuitem>
+#errors
+11: End tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
+11: Stray end tag “menuitem”.
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html><body><menuitem>A
+#errors
+32: End of file seen and there were open elements.
+31: Unclosed element “menuitem”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <menuitem>
+|       "A"
+
+#data
+<!DOCTYPE html><body><menuitem>A<menuitem>B
+#errors
+43: End of file seen and there were open elements.
+42: Unclosed element “menuitem”.
+31: Unclosed element “menuitem”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <menuitem>
+|       "A"
+|       <menuitem>
+|         "B"
+
+#data
+<!DOCTYPE html><body><menuitem>A<menu>B</menu>
+#errors
+46: End of file seen and there were open elements.
+31: Unclosed element “menuitem”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <menuitem>
+|       "A"
+|       <menu>
+|         "B"
+
+#data
+<!DOCTYPE html><body><menuitem>A<hr>B
+#errors
+37: End of file seen and there were open elements.
+31: Unclosed element “menuitem”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <menuitem>
+|       "A"
+|       <hr>
+|       "B"
+
+#data
+<!DOCTYPE html><li><menuitem><li>
+#errors
+33: End tag “li” implied, but there were open elements.
+29: Unclosed element “menuitem”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <li>
+|       <menuitem>
+|     <li>
+
+#data
+<!DOCTYPE html><menuitem><p></menuitem>x
+#errors
+39: Stray end tag “menuitem”.
+40: End of file seen and there were open elements.
+25: Unclosed element “menuitem”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <menuitem>
+|       <p>
+|         "x"
+
+#data
+<!DOCTYPE html><p><b></p><menuitem>
+#errors
+25: End tag “p” seen, but there were open elements.
+21: Unclosed element “b”.
+35: End of file seen and there were open elements.
+35: Unclosed element “menuitem”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <b>
+|     <b>
+|       <menuitem>
+
+#data
+<!DOCTYPE html><menuitem><asdf></menuitem>x
+#errors
+42: End tag “menuitem” seen, but there were open elements.
+31: Unclosed element “asdf”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <menuitem>
+|       <asdf>
+|     "x"
+
+#data
+<!DOCTYPE html></menuitem>
+#errors
+26: Stray end tag “menuitem”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html><html></menuitem>
+#errors
+26: Stray end tag “menuitem”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html><head></menuitem>
+#errors
+26: Stray end tag “menuitem”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html><select><menuitem></select>
+#errors
+33: Stray start tag “menuitem”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+
+#data
+<!DOCTYPE html><option><menuitem>
+#errors
+33: End of file seen and there were open elements.
+33: Unclosed element “menuitem”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <option>
+|       <menuitem>
+
+#data
+<!DOCTYPE html><menuitem><option>
+#errors
+33: End of file seen and there were open elements.
+25: Unclosed element “menuitem”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <menuitem>
+|       <option>
+
+#data
+<!DOCTYPE html><menuitem></body>
+#errors
+32: End tag for  “body” seen, but there were unclosed elements.
+25: Unclosed element “menuitem”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <menuitem>
+
+#data
+<!DOCTYPE html><menuitem></html>
+#errors
+32: End tag for  “html” seen, but there were unclosed elements.
+25: Unclosed element “menuitem”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <menuitem>
+
+#data
+<!DOCTYPE html><menuitem><p>
+#errors
+28: End of file seen and there were open elements.
+25: Unclosed element “menuitem”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <menuitem>
+|       <p>
+
+#data
+<!DOCTYPE html><menuitem><li>
+#errors
+29: End of file seen and there were open elements.
+25: Unclosed element “menuitem”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <menuitem>
+|       <li>
diff --git a/internal/html/testdata/webkit/namespace-sensitivity.dat b/internal/html/testdata/webkit/namespace-sensitivity.dat
new file mode 100644 (file)
index 0000000..ca35c0e
--- /dev/null
@@ -0,0 +1,16 @@
+#data
+<body><table><tr><td><svg><td><foreignObject><span></td>Foo
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "Foo"
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <svg svg>
+|               <svg td>
+|                 <svg foreignObject>
+|                   <span>
diff --git a/internal/html/testdata/webkit/noscript01.dat b/internal/html/testdata/webkit/noscript01.dat
new file mode 100644 (file)
index 0000000..ec3496c
--- /dev/null
@@ -0,0 +1,237 @@
+#data
+<head><noscript><!doctype html><!--foo--></noscript>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+Line: 1 Col: 31 Unexpected DOCTYPE. Ignored.
+#script-off
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       <!-- foo -->
+|   <body>
+
+#data
+<head><noscript><html class="foo"><!--foo--></noscript>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+Line: 1 Col: 34 html needs to be the first start tag.
+#script-off
+#document
+| <html>
+|   class="foo"
+|   <head>
+|     <noscript>
+|       <!-- foo -->
+|   <body>
+
+#data
+<head><noscript></noscript>
+#errors
+(1,6): expected-doctype-but-got-tag
+#script-off
+#document
+| <html>
+|   <head>
+|     <noscript>
+|   <body>
+
+#data
+<head><noscript>   </noscript>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+#script-off
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       "   "
+|   <body>
+
+#data
+<head><noscript><!--foo--></noscript>
+#errors
+(1,6): expected-doctype-but-got-tag
+#script-off
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       <!-- foo -->
+|   <body>
+
+#data
+<head><noscript><basefont><!--foo--></noscript>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+#script-off
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       <basefont>
+|       <!-- foo -->
+|   <body>
+
+#data
+<head><noscript><bgsound><!--foo--></noscript>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+#script-off
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       <bgsound>
+|       <!-- foo -->
+|   <body>
+
+#data
+<head><noscript><link><!--foo--></noscript>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+#script-off
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       <link>
+|       <!-- foo -->
+|   <body>
+
+#data
+<head><noscript><meta><!--foo--></noscript>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+#script-off
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       <meta>
+|       <!-- foo -->
+|   <body>
+
+#data
+<head><noscript><noframes>XXX</noscript></noframes></noscript>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+#script-off
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       <noframes>
+|         "XXX</noscript>"
+|   <body>
+
+#data
+<head><noscript><style>XXX</style></noscript>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+#script-off
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       <style>
+|         "XXX"
+|   <body>
+
+#data
+<head><noscript></br><!--foo--></noscript>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+Line: 1 Col: 21 Element br not allowed in a inhead-noscript context
+Line: 1 Col: 21 Unexpected end tag (br). Treated as br element.
+Line: 1 Col: 42 Unexpected end tag (noscript). Ignored.
+#script-off
+#document
+| <html>
+|   <head>
+|     <noscript>
+|   <body>
+|     <br>
+|     <!-- foo -->
+
+#data
+<head><noscript><head class="foo"><!--foo--></noscript>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+Line: 1 Col: 34 Unexpected start tag (head).
+#script-off
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       <!-- foo -->
+|   <body>
+
+#data
+<head><noscript><noscript class="foo"><!--foo--></noscript>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+Line: 1 Col: 34 Unexpected start tag (noscript).
+#script-off
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       <!-- foo -->
+|   <body>
+
+#data
+<head><noscript></p><!--foo--></noscript>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+Line: 1 Col: 20 Unexpected end tag (p). Ignored.
+#script-off
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       <!-- foo -->
+|   <body>
+
+#data
+<head><noscript><p><!--foo--></noscript>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+Line: 1 Col: 19 Element p not allowed in a inhead-noscript context
+Line: 1 Col: 40 Unexpected end tag (noscript). Ignored.
+#script-off
+#document
+| <html>
+|   <head>
+|     <noscript>
+|   <body>
+|     <p>
+|       <!-- foo -->
+
+#data
+<head><noscript>XXX<!--foo--></noscript></head>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+Line: 1 Col: 19 Unexpected non-space character. Expected inhead-noscript content
+Line: 1 Col: 30 Unexpected end tag (noscript). Ignored.
+Line: 1 Col: 37 Unexpected end tag (head). Ignored.
+#script-off
+#document
+| <html>
+|   <head>
+|     <noscript>
+|   <body>
+|     "XXX"
+|     <!-- foo -->
+
+#data
+<head><noscript>
+#errors
+(1,6): expected-doctype-but-got-tag
+(1,6): eof-in-head-noscript
+#script-off
+#document
+| <html>
+|   <head>
+|     <noscript>
+|   <body>
diff --git a/internal/html/testdata/webkit/pending-spec-changes-plain-text-unsafe.dat b/internal/html/testdata/webkit/pending-spec-changes-plain-text-unsafe.dat
new file mode 100644 (file)
index 0000000..2c546d4
Binary files /dev/null and b/internal/html/testdata/webkit/pending-spec-changes-plain-text-unsafe.dat differ
diff --git a/internal/html/testdata/webkit/pending-spec-changes.dat b/internal/html/testdata/webkit/pending-spec-changes.dat
new file mode 100644 (file)
index 0000000..1647d7f
--- /dev/null
@@ -0,0 +1,46 @@
+#data
+<input type="hidden"><frameset>
+#errors
+(1,21): expected-doctype-but-got-start-tag
+(1,31): unexpected-start-tag
+(1,31): eof-in-frameset
+#document
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!DOCTYPE html><table><caption><svg>foo</table>bar
+#errors
+(1,47): unexpected-end-tag
+(1,47): end-table-tag-in-caption
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <svg svg>
+|           "foo"
+|     "bar"
+
+#data
+<table><tr><td><svg><desc><td></desc><circle>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,30): unexpected-cell-end-tag
+(1,37): unexpected-end-tag
+(1,45): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <svg svg>
+|               <svg desc>
+|           <td>
+|             <circle>
diff --git a/internal/html/testdata/webkit/plain-text-unsafe.dat b/internal/html/testdata/webkit/plain-text-unsafe.dat
new file mode 100644 (file)
index 0000000..dfb5cb6
Binary files /dev/null and b/internal/html/testdata/webkit/plain-text-unsafe.dat differ
diff --git a/internal/html/testdata/webkit/ruby.dat b/internal/html/testdata/webkit/ruby.dat
new file mode 100644 (file)
index 0000000..696782f
--- /dev/null
@@ -0,0 +1,301 @@
+#data
+<html><ruby>a<rb>b<rb></ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rb>
+|         "b"
+|       <rb>
+
+#data
+<html><ruby>a<rb>b<rt></ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rb>
+|         "b"
+|       <rt>
+
+#data
+<html><ruby>a<rb>b<rtc></ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rb>
+|         "b"
+|       <rtc>
+
+#data
+<html><ruby>a<rb>b<rp></ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rb>
+|         "b"
+|       <rp>
+
+#data
+<html><ruby>a<rb>b<span></ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,31): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rb>
+|         "b"
+|         <span>
+
+#data
+<html><ruby>a<rt>b<rb></ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rt>
+|         "b"
+|       <rb>
+
+#data
+<html><ruby>a<rt>b<rt></ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rt>
+|         "b"
+|       <rt>
+
+#data
+<html><ruby>a<rt>b<rtc></ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rt>
+|         "b"
+|       <rtc>
+
+#data
+<html><ruby>a<rt>b<rp></ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rt>
+|         "b"
+|       <rp>
+
+#data
+<html><ruby>a<rt>b<span></ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,31): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rt>
+|         "b"
+|         <span>
+
+#data
+<html><ruby>a<rtc>b<rb></ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rtc>
+|         "b"
+|       <rb>
+
+#data
+<html><ruby>a<rtc>b<rt>c<rt>d</ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rtc>
+|         "b"
+|         <rt>
+|           "c"
+|         <rt>
+|           "d"
+
+#data
+<html><ruby>a<rtc>b<rtc></ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rtc>
+|         "b"
+|       <rtc>
+
+#data
+<html><ruby>a<rtc>b<rp></ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rtc>
+|         "b"
+|         <rp>
+
+#data
+<html><ruby>a<rtc>b<span></ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rtc>
+|         "b"
+|         <span>
+
+#data
+<html><ruby>a<rp>b<rb></ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rp>
+|         "b"
+|       <rb>
+
+#data
+<html><ruby>a<rp>b<rt></ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rp>
+|         "b"
+|       <rt>
+
+#data
+<html><ruby>a<rp>b<rtc></ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rp>
+|         "b"
+|       <rtc>
+
+#data
+<html><ruby>a<rp>b<rp></ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rp>
+|         "b"
+|       <rp>
+
+#data
+<html><ruby>a<rp>b<span></ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,31): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rp>
+|         "b"
+|         <span>
+
+#data
+<html><ruby><rtc><ruby>a<rb>b<rt></ruby></ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       <rtc>
+|         <ruby>
+|           "a"
+|           <rb>
+|             "b"
+|           <rt>
diff --git a/internal/html/testdata/webkit/scriptdata01.dat b/internal/html/testdata/webkit/scriptdata01.dat
new file mode 100644 (file)
index 0000000..e570858
--- /dev/null
@@ -0,0 +1,385 @@
+#data
+FOO<script>'Hello'</script>BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       "'Hello'"
+|     "BAR"
+
+#data
+FOO<script></script>BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|     "BAR"
+
+#data
+FOO<script></script >BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|     "BAR"
+
+#data
+FOO<script></script/>BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,21): self-closing-flag-on-end-tag
+#new-errors
+(1:21) end-tag-with-trailing-solidus
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|     "BAR"
+
+#data
+FOO<script></script/ >BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,20): unexpected-character-after-solidus-in-tag
+#new-errors
+(1:21) unexpected-solidus-in-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|     "BAR"
+
+#data
+FOO<script type="text/plain"></scriptx>BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,42): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       type="text/plain"
+|       "</scriptx>BAR"
+
+#data
+FOO<script></script foo=">" dd>BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,31): attributes-in-end-tag
+#new-errors
+(1:31) end-tag-with-attributes
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|     "BAR"
+
+#data
+FOO<script>'<'</script>BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       "'<'"
+|     "BAR"
+
+#data
+FOO<script>'<!'</script>BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       "'<!'"
+|     "BAR"
+
+#data
+FOO<script>'<!-'</script>BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       "'<!-'"
+|     "BAR"
+
+#data
+FOO<script>'<!--'</script>BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       "'<!--'"
+|     "BAR"
+
+#data
+FOO<script>'<!---'</script>BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       "'<!---'"
+|     "BAR"
+
+#data
+FOO<script>'<!-->'</script>BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       "'<!-->'"
+|     "BAR"
+
+#data
+FOO<script>'<!-->'</script>BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       "'<!-->'"
+|     "BAR"
+
+#data
+FOO<script>'<!-- potato'</script>BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       "'<!-- potato'"
+|     "BAR"
+
+#data
+FOO<script>'<!-- <sCrIpt'</script>BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       "'<!-- <sCrIpt'"
+|     "BAR"
+
+#data
+FOO<script type="text/plain">'<!-- <sCrIpt>'</script>BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,56): expected-script-data-but-got-eof
+(1,56): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:57) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       type="text/plain"
+|       "'<!-- <sCrIpt>'</script>BAR"
+
+#data
+FOO<script type="text/plain">'<!-- <sCrIpt> -'</script>BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,58): expected-script-data-but-got-eof
+(1,58): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:59) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       type="text/plain"
+|       "'<!-- <sCrIpt> -'</script>BAR"
+
+#data
+FOO<script type="text/plain">'<!-- <sCrIpt> --'</script>BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,59): expected-script-data-but-got-eof
+(1,59): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:60) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       type="text/plain"
+|       "'<!-- <sCrIpt> --'</script>BAR"
+
+#data
+FOO<script>'<!-- <sCrIpt> -->'</script>BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       "'<!-- <sCrIpt> -->'"
+|     "BAR"
+
+#data
+FOO<script type="text/plain">'<!-- <sCrIpt> --!>'</script>BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,61): expected-script-data-but-got-eof
+(1,61): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:62) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       type="text/plain"
+|       "'<!-- <sCrIpt> --!>'</script>BAR"
+
+#data
+FOO<script type="text/plain">'<!-- <sCrIpt> -- >'</script>BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,61): expected-script-data-but-got-eof
+(1,61): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:62) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       type="text/plain"
+|       "'<!-- <sCrIpt> -- >'</script>BAR"
+
+#data
+FOO<script type="text/plain">'<!-- <sCrIpt '</script>BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,56): expected-script-data-but-got-eof
+(1,56): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:57) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       type="text/plain"
+|       "'<!-- <sCrIpt '</script>BAR"
+
+#data
+FOO<script type="text/plain">'<!-- <sCrIpt/'</script>BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+(1,56): expected-script-data-but-got-eof
+(1,56): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:57) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       type="text/plain"
+|       "'<!-- <sCrIpt/'</script>BAR"
+
+#data
+FOO<script type="text/plain">'<!-- <sCrIpt\'</script>BAR
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       type="text/plain"
+|       "'<!-- <sCrIpt\'"
+|     "BAR"
+
+#data
+FOO<script type="text/plain">'<!-- <sCrIpt/'</script>BAR</script>QUX
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       type="text/plain"
+|       "'<!-- <sCrIpt/'</script>BAR"
+|     "QUX"
+
+#data
+FOO<script><!--<script>-></script>--></script>QUX
+#errors
+(1,3): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       "<!--<script>-></script>-->"
+|     "QUX"
diff --git a/internal/html/testdata/webkit/scripted/adoption01.dat b/internal/html/testdata/webkit/scripted/adoption01.dat
new file mode 100644 (file)
index 0000000..5cc0f07
--- /dev/null
@@ -0,0 +1,16 @@
+#data
+<p><b id="A"><script>document.getElementById("A").id = "B"</script></p>TEXT</b>
+#errors
+#script-on
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <b>
+|         id="B"
+|         <script>
+|           "document.getElementById("A").id = "B""
+|     <b>
+|       id="A"
+|       "TEXT"
diff --git a/internal/html/testdata/webkit/scripted/ark.dat b/internal/html/testdata/webkit/scripted/ark.dat
new file mode 100644 (file)
index 0000000..feebead
--- /dev/null
@@ -0,0 +1,27 @@
+#data
+<p><font size=4><font size=4><font size=4><script>document.getElementsByTagName("font")[2].setAttribute("size", "5");</script><font size=4><p>X
+#errors
+#script-on
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <font>
+|         size="4"
+|         <font>
+|           size="4"
+|           <font>
+|             size="5"
+|             <script>
+|               "document.getElementsByTagName("font")[2].setAttribute("size", "5");"
+|             <font>
+|               size="4"
+|     <p>
+|       <font>
+|         size="4"
+|         <font>
+|           size="4"
+|           <font>
+|             size="4"
+|             "X"
diff --git a/internal/html/testdata/webkit/scripted/webkit01.dat b/internal/html/testdata/webkit/scripted/webkit01.dat
new file mode 100644 (file)
index 0000000..3e71c1b
--- /dev/null
@@ -0,0 +1,30 @@
+#data
+1<script>document.write("2")</script>3
+#errors
+#script-on
+#document
+| <html>
+|   <head>
+|   <body>
+|     "1"
+|     <script>
+|       "document.write("2")"
+|     "23"
+
+#data
+1<script>document.write("<script>document.write('2')</scr"+ "ipt><script>document.write('3')</scr" + "ipt>")</script>4
+#errors
+#script-on
+#document
+| <html>
+|   <head>
+|   <body>
+|     "1"
+|     <script>
+|       "document.write("<script>document.write('2')</scr"+ "ipt><script>document.write('3')</scr" + "ipt>")"
+|     <script>
+|       "document.write('2')"
+|     "2"
+|     <script>
+|       "document.write('3')"
+|     "34"
diff --git a/internal/html/testdata/webkit/svg.dat b/internal/html/testdata/webkit/svg.dat
new file mode 100644 (file)
index 0000000..8e9a2bb
--- /dev/null
@@ -0,0 +1,81 @@
+#data
+<svg><tr><td><title><tr>
+#errors
+#document-fragment
+td
+#document
+| <svg svg>
+|   <svg tr>
+|     <svg td>
+|       <svg title>
+
+#data
+<svg><tr><td><title><tr>
+#errors
+#document-fragment
+tr
+#document
+| <svg svg>
+|   <svg tr>
+|     <svg td>
+|       <svg title>
+
+#data
+<svg><thead><title><tbody>
+#errors
+#document-fragment
+thead
+#document
+| <svg svg>
+|   <svg thead>
+|     <svg title>
+
+#data
+<svg><tfoot><title><tbody>
+#errors
+#document-fragment
+tfoot
+#document
+| <svg svg>
+|   <svg tfoot>
+|     <svg title>
+
+#data
+<svg><tbody><title><tfoot>
+#errors
+#document-fragment
+tbody
+#document
+| <svg svg>
+|   <svg tbody>
+|     <svg title>
+
+#data
+<svg><tbody><title></table>
+#errors
+#document-fragment
+tbody
+#document
+| <svg svg>
+|   <svg tbody>
+|     <svg title>
+
+#data
+<svg><thead><title></table>
+#errors
+#document-fragment
+tbody
+#document
+| <svg svg>
+|   <svg thead>
+|     <svg title>
+
+#data
+<svg><tfoot><title></table>
+#errors
+#document-fragment
+tbody
+#document
+| <svg svg>
+|   <svg tfoot>
+|     <svg title>
diff --git a/internal/html/testdata/webkit/tables01.dat b/internal/html/testdata/webkit/tables01.dat
new file mode 100644 (file)
index 0000000..f0caaa3
--- /dev/null
@@ -0,0 +1,286 @@
+#data
+<table><th>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,11): unexpected-cell-in-table-body
+(1,11): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <th>
+
+#data
+<table><td>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,11): unexpected-cell-in-table-body
+(1,11): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+
+#data
+<table><col foo='bar'>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,22): eof-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <colgroup>
+|         <col>
+|           foo="bar"
+
+#data
+<table><colgroup></html>foo
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,24): unexpected-end-tag
+(1,27): foster-parenting-character-in-table
+(1,27): foster-parenting-character-in-table
+(1,27): foster-parenting-character-in-table
+(1,27): eof-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     "foo"
+|     <table>
+|       <colgroup>
+
+#data
+<table></table><p>foo
+#errors
+(1,7): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|     <p>
+|       "foo"
+
+#data
+<table></body></caption></col></colgroup></html></tbody></td></tfoot></th></thead></tr><td>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,14): unexpected-end-tag
+(1,24): unexpected-end-tag
+(1,30): unexpected-end-tag
+(1,41): unexpected-end-tag
+(1,48): unexpected-end-tag
+(1,56): unexpected-end-tag
+(1,61): unexpected-end-tag
+(1,69): unexpected-end-tag
+(1,74): unexpected-end-tag
+(1,82): unexpected-end-tag
+(1,87): unexpected-end-tag
+(1,91): unexpected-cell-in-table-body
+(1,91): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+
+#data
+<table><select><option>3</select></table>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,15): unexpected-start-tag-implies-table-voodoo
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+|         "3"
+|     <table>
+
+#data
+<table><select><table></table></select></table>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,15): unexpected-start-tag-implies-table-voodoo
+(1,22): unexpected-table-element-start-tag-in-select-in-table
+(1,22): unexpected-start-tag-implies-end-tag
+(1,39): unexpected-end-tag
+(1,47): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|     <table>
+|     <table>
+
+#data
+<table><select></table>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,15): unexpected-start-tag-implies-table-voodoo
+(1,23): unexpected-table-element-end-tag-in-select-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|     <table>
+
+#data
+<table><select><option>A<tr><td>B</td></tr></table>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,15): unexpected-start-tag-implies-table-voodoo
+(1,28): unexpected-table-element-start-tag-in-select-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+|         "A"
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             "B"
+
+#data
+<table><td></body></caption></col></colgroup></html>foo
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,11): unexpected-cell-in-table-body
+(1,18): unexpected-end-tag
+(1,28): unexpected-end-tag
+(1,34): unexpected-end-tag
+(1,45): unexpected-end-tag
+(1,52): unexpected-end-tag
+(1,55): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             "foo"
+
+#data
+<table><td>A</table>B
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,11): unexpected-cell-in-table-body
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             "A"
+|     "B"
+
+#data
+<table><tr><caption>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,20): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|       <caption>
+
+#data
+<table><tr></body></caption></col></colgroup></html></td></th><td>foo
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,18): unexpected-end-tag-in-table-row
+(1,28): unexpected-end-tag-in-table-row
+(1,34): unexpected-end-tag-in-table-row
+(1,45): unexpected-end-tag-in-table-row
+(1,52): unexpected-end-tag-in-table-row
+(1,57): unexpected-end-tag-in-table-row
+(1,62): unexpected-end-tag-in-table-row
+(1,69): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             "foo"
+
+#data
+<table><td><tr>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,11): unexpected-cell-in-table-body
+(1,15): eof-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|         <tr>
+
+#data
+<table><td><button><td>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,11): unexpected-cell-in-table-body
+(1,23): unexpected-cell-end-tag
+(1,23): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <button>
+|           <td>
+
+#data
+<table><tr><td><svg><desc><td>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,30): unexpected-cell-end-tag
+(1,30): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <svg svg>
+|               <svg desc>
+|           <td>
diff --git a/internal/html/testdata/webkit/template.dat b/internal/html/testdata/webkit/template.dat
new file mode 100644 (file)
index 0000000..b38d4f5
--- /dev/null
@@ -0,0 +1,1604 @@
+#data
+<body><template>Hello</template>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         "Hello"
+
+#data
+<template>Hello</template>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         "Hello"
+|   <body>
+
+#data
+<template></template><div></div>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|   <body>
+|     <div>
+
+#data
+<html><template>Hello</template>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         "Hello"
+|   <body>
+
+#data
+<head><template><div></div></template></head>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <div>
+|   <body>
+
+#data
+<div><template><div><span></template><b>
+#errors
+ * (1,6) missing DOCTYPE
+ * (1,38) mismatched template end tag
+ * (1,41) unexpected end of file
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <template>
+|         content
+|           <div>
+|             <span>
+|       <b>
+
+#data
+<div><template></div>Hello
+#errors
+ * (1,6) missing DOCTYPE
+ * (1,22) unexpected token in template
+ * (1,27) unexpected end of file in template
+ * (1,27) unexpected end of file
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <template>
+|         content
+|           "Hello"
+
+#data
+<div></template></div>
+#errors
+ * (1,6) missing DOCTYPE
+ * (1,17) unexpected template end tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+
+#data
+<table><template></template></table>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <template>
+|         content
+
+#data
+<table><template></template></div>
+#errors
+ * (1,8) missing DOCTYPE
+ * (1,35) unexpected token in table - foster parenting
+ * (1,35) unexpected end tag
+ * (1,35) unexpected end of file
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <template>
+|         content
+
+#data
+<table><div><template></template></div>
+#errors
+ * (1,8) missing DOCTYPE
+ * (1,13) unexpected token in table - foster parenting
+ * (1,40) unexpected token in table - foster parenting
+ * (1,40) unexpected end of file
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <template>
+|         content
+|     <table>
+
+#data
+<table><template></template><div></div>
+#errors
+no doctype
+bad div in table
+bad /div in table
+eof in table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|     <table>
+|       <template>
+|         content
+
+#data
+<table>   <template></template></table>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       "   "
+|       <template>
+|         content
+
+#data
+<table><tbody><template></template></tbody>
+#errors
+no doctype
+eof in table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <template>
+|           content
+
+#data
+<table><tbody><template></tbody></template>
+#errors
+no doctype
+bad /tbody
+eof in table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <template>
+|           content
+
+#data
+<table><tbody><template></template></tbody></table>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <template>
+|           content
+
+#data
+<table><thead><template></template></thead>
+#errors
+no doctype
+eof in table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <thead>
+|         <template>
+|           content
+
+#data
+<table><tfoot><template></template></tfoot>
+#errors
+no doctype
+eof in table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tfoot>
+|         <template>
+|           content
+
+#data
+<select><template></template></select>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <template>
+|         content
+
+#data
+<select><template><option></option></template></select>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <template>
+|         content
+|           <option>
+
+#data
+<template><option></option></select><option></option></template>
+#errors
+no doctype
+bad /select
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <option>
+|         <option>
+|   <body>
+
+#data
+<select><template></template><option></select>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <template>
+|         content
+|       <option>
+
+#data
+<select><option><template></template></select>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+|         <template>
+|           content
+
+#data
+<select><template>
+#errors
+no doctype
+eof in template
+eof in select
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <template>
+|         content
+
+#data
+<select><option></option><template>
+#errors
+no doctype
+eof in template
+eof in select
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+|       <template>
+|         content
+
+#data
+<select><option></option><template><option>
+#errors
+no doctype
+eof in template
+eof in select
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+|       <template>
+|         content
+|           <option>
+
+#data
+<table><thead><template><td></template></table>
+#errors
+ * (1,8) missing DOCTYPE
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <thead>
+|         <template>
+|           content
+|             <td>
+
+#data
+<table><template><thead></template></table>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <template>
+|         content
+|           <thead>
+
+#data
+<body><table><template><td></tr><div></template></table>
+#errors
+no doctype
+bad </tr>
+missing </div>
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <template>
+|         content
+|           <td>
+|             <div>
+
+#data
+<table><template><thead></template></thead></table>
+#errors
+no doctype
+bad /thead after /template
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <template>
+|         content
+|           <thead>
+
+#data
+<table><thead><template><tr></template></table>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <thead>
+|         <template>
+|           content
+|             <tr>
+
+#data
+<table><template><tr></template></table>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <template>
+|         content
+|           <tr>
+
+#data
+<table><tr><template><td>
+#errors
+no doctype
+eof in template
+eof in table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <template>
+|             content
+|               <td>
+
+#data
+<table><template><tr><template><td></template></tr></template></table>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <template>
+|         content
+|           <tr>
+|             <template>
+|               content
+|                 <td>
+
+#data
+<table><template><tr><template><td></td></template></tr></template></table>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <template>
+|         content
+|           <tr>
+|             <template>
+|               content
+|                 <td>
+
+#data
+<table><template><td></template>
+#errors
+no doctype
+eof in table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <template>
+|         content
+|           <td>
+
+#data
+<body><template><td></td></template>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <td>
+
+#data
+<body><template><template><tr></tr></template><td></td></template>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <template>
+|           content
+|             <tr>
+|         <td>
+
+#data
+<table><colgroup><template><col>
+#errors
+no doctype
+eof in template
+eof in table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <colgroup>
+|         <template>
+|           content
+|             <col>
+
+#data
+<frameset><template><frame></frame></template></frameset>
+#errors
+ * (1,11) missing DOCTYPE
+ * (1,21) unexpected start tag token
+ * (1,36) unexpected end tag token
+ * (1,47) unexpected end tag token
+#document
+| <html>
+|   <head>
+|   <frameset>
+|     <frame>
+
+#data
+<template><frame></frame></frameset><frame></frame></template>
+#errors
+ * (1,11) missing DOCTYPE
+ * (1,18) unexpected start tag
+ * (1,26) unexpected end tag
+ * (1,37) unexpected end tag
+ * (1,44) unexpected start tag
+ * (1,52) unexpected end tag
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|   <body>
+
+#data
+<template><div><frameset><span></span></div><span></span></template>
+#errors
+no doctype
+bad frameset
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <div>
+|           <span>
+|         <span>
+|   <body>
+
+#data
+<body><template><div><frameset><span></span></div><span></span></template></body>
+#errors
+no doctype
+bad frameset
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <div>
+|           <span>
+|         <span>
+
+#data
+<body><template><script>var i = 1;</script><td></td></template>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <script>
+|           "var i = 1;"
+|         <td>
+
+#data
+<body><template><tr><div></div></tr></template>
+#errors
+no doctype
+foster-parented div
+foster-parented /div
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <tr>
+|         <div>
+
+#data
+<body><template><tr></tr><td></td></template>
+#errors
+no doctype
+unexpected <td>
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <tr>
+|         <tr>
+|           <td>
+
+#data
+<body><template><td></td></tr><td></td></template>
+#errors
+no doctype
+bad </tr>
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <td>
+|         <td>
+
+#data
+<body><template><td></td><tbody><td></td></template>
+#errors
+no doctype
+bad <tbody>
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <td>
+|         <td>
+
+#data
+<body><template><td></td><caption></caption><td></td></template>
+#errors
+ * (1,7) missing DOCTYPE
+ * (1,35) unexpected start tag in table row
+ * (1,45) unexpected end tag in table row
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <td>
+|         <td>
+
+#data
+<body><template><td></td><colgroup></caption><td></td></template>
+#errors
+ * (1,7) missing DOCTYPE
+ * (1,36) unexpected start tag in table row
+ * (1,46) unexpected end tag in table row
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <td>
+|         <td>
+
+#data
+<body><template><td></td></table><td></td></template>
+#errors
+no doctype
+bad </table>
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <td>
+|         <td>
+
+#data
+<body><template><tr></tr><tbody><tr></tr></template>
+#errors
+no doctype
+bad <tbody>
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <tr>
+|         <tr>
+
+#data
+<body><template><tr></tr><caption><tr></tr></template>
+#errors
+no doctype
+bad <caption>
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <tr>
+|         <tr>
+
+#data
+<body><template><tr></tr></table><tr></tr></template>
+#errors
+no doctype
+bad </table>
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <tr>
+|         <tr>
+
+#data
+<body><template><thead></thead><caption></caption><tbody></tbody></template>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <thead>
+|         <caption>
+|         <tbody>
+
+#data
+<body><template><thead></thead></table><tbody></tbody></template></body>
+#errors
+no doctype
+bad </table>
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <thead>
+|         <tbody>
+
+#data
+<body><template><div><tr></tr></div></template>
+#errors
+no doctype
+bad tr
+bad /tr
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <div>
+
+#data
+<body><template><em>Hello</em></template>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <em>
+|           "Hello"
+
+#data
+<body><template><!--comment--></template>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <!-- comment -->
+
+#data
+<body><template><style></style><td></td></template>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <style>
+|         <td>
+
+#data
+<body><template><meta><td></td></template>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <meta>
+|         <td>
+
+#data
+<body><template><link><td></td></template>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <link>
+|         <td>
+
+#data
+<body><template><template><tr></tr></template><td></td></template>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <template>
+|           content
+|             <tr>
+|         <td>
+
+#data
+<body><table><colgroup><template><col></col></template></colgroup></table></body>
+#errors
+no doctype
+bad /col
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <colgroup>
+|         <template>
+|           content
+|             <col>
+
+#data
+<body a=b><template><div></div><body c=d><div></div></body></template></body>
+#errors
+no doctype
+bad <body>
+bad </body>
+#document
+| <html>
+|   <head>
+|   <body>
+|     a="b"
+|     <template>
+|       content
+|         <div>
+|         <div>
+
+#data
+<html a=b><template><div><html b=c><span></template>
+#errors
+no doctype
+bad <html>
+missing end tags in template
+#document
+| <html>
+|   a="b"
+|   <head>
+|     <template>
+|       content
+|         <div>
+|           <span>
+|   <body>
+
+#data
+<html a=b><template><col></col><html b=c><col></col></template>
+#errors
+no doctype
+bad /col
+bad html
+bad /col
+#document
+| <html>
+|   a="b"
+|   <head>
+|     <template>
+|       content
+|         <col>
+|         <col>
+|   <body>
+
+#data
+<html a=b><template><frame></frame><html b=c><frame></frame></template>
+#errors
+no doctype
+bad frame
+bad /frame
+bad html
+bad frame
+bad /frame
+#document
+| <html>
+|   a="b"
+|   <head>
+|     <template>
+|       content
+|   <body>
+
+#data
+<body><template><tr></tr><template></template><td></td></template>
+#errors
+no doctype
+unexpected <td>
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <tr>
+|         <template>
+|           content
+|         <tr>
+|           <td>
+
+#data
+<body><template><thead></thead><template><tr></tr></template><tr></tr><tfoot></tfoot></template>
+#errors
+no doctype
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <thead>
+|         <template>
+|           content
+|             <tr>
+|         <tbody>
+|           <tr>
+|         <tfoot>
+
+#data
+<body><template><template><b><template></template></template>text</template>
+#errors
+no doctype
+missing </b>
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <template>
+|           content
+|             <b>
+|               <template>
+|                 content
+|         "text"
+
+#data
+<body><template><col><colgroup>
+#errors
+no doctype
+bad colgroup
+eof in template
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <col>
+
+#data
+<body><template><col></colgroup>
+#errors
+no doctype
+bogus /colgroup
+eof in template
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <col>
+
+#data
+<body><template><col><colgroup></template></body>
+#errors
+no doctype
+bad colgroup
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <col>
+
+#data
+<body><template><col><div>
+#errors
+ * (1,7) missing DOCTYPE
+ * (1,27) unexpected token
+ * (1,27) unexpected end of file in template
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <col>
+
+#data
+<body><template><col></div>
+#errors
+no doctype
+bad /div
+eof in template
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <col>
+
+#data
+<body><template><col>Hello
+#errors
+no doctype
+unexpected text
+eof in template
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <col>
+
+#data
+<body><template><i><menu>Foo</i>
+#errors
+no doctype
+mising /menu
+eof in template
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <i>
+|         <menu>
+|           <i>
+|             "Foo"
+
+#data
+<body><template></div><div>Foo</div><template></template><tr></tr>
+#errors
+no doctype
+bogus /div
+bogus tr
+bogus /tr
+eof in template
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+|         <div>
+|           "Foo"
+|         <template>
+|           content
+
+#data
+<body><div><template></div><tr><td>Foo</td></tr></template>
+#errors
+ * (1,7) missing DOCTYPE
+ * (1,28) unexpected token in template
+ * (1,60) unexpected end of file
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <template>
+|         content
+|           <tr>
+|             <td>
+|               "Foo"
+
+#data
+<template></figcaption><sub><table></table>
+#errors
+no doctype
+bad /figcaption
+eof in template
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <sub>
+|           <table>
+|   <body>
+
+#data
+<template><template>
+#errors
+no doctype
+eof in template
+eof in template
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <template>
+|           content
+|   <body>
+
+#data
+<template><div>
+#errors
+no doctype
+eof in template
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <div>
+|   <body>
+
+#data
+<template><template><div>
+#errors
+no doctype
+eof in template
+eof in template
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <template>
+|           content
+|             <div>
+|   <body>
+
+#data
+<template><template><table>
+#errors
+no doctype
+eof in template
+eof in template
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <template>
+|           content
+|             <table>
+|   <body>
+
+#data
+<template><template><tbody>
+#errors
+no doctype
+eof in template
+eof in template
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <template>
+|           content
+|             <tbody>
+|   <body>
+
+#data
+<template><template><tr>
+#errors
+no doctype
+eof in template
+eof in template
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <template>
+|           content
+|             <tr>
+|   <body>
+
+#data
+<template><template><td>
+#errors
+no doctype
+eof in template
+eof in template
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <template>
+|           content
+|             <td>
+|   <body>
+
+#data
+<template><template><caption>
+#errors
+no doctype
+eof in template
+eof in template
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <template>
+|           content
+|             <caption>
+|   <body>
+
+#data
+<template><template><colgroup>
+#errors
+no doctype
+eof in template
+eof in template
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <template>
+|           content
+|             <colgroup>
+|   <body>
+
+#data
+<template><template><col>
+#errors
+no doctype
+eof in template
+eof in template
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <template>
+|           content
+|             <col>
+|   <body>
+
+#data
+<template><template><tbody><select>
+#errors
+ * (1,11) missing DOCTYPE
+ * (1,36) unexpected token in table - foster parenting
+ * (1,36) unexpected end of file in template
+ * (1,36) unexpected end of file in template
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <template>
+|           content
+|             <tbody>
+|             <select>
+|   <body>
+
+#data
+<template><template><table>Foo
+#errors
+no doctype
+foster-parenting text F
+foster-parenting text o
+foster-parenting text o
+eof
+eof
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <template>
+|           content
+|             "Foo"
+|             <table>
+|   <body>
+
+#data
+<template><template><frame>
+#errors
+no doctype
+bad tag
+eof
+eof
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <template>
+|           content
+|   <body>
+
+#data
+<template><template><script>var i
+#errors
+no doctype
+eof in script
+eof in template
+eof in template
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <template>
+|           content
+|             <script>
+|               "var i"
+|   <body>
+
+#data
+<template><template><style>var i
+#errors
+no doctype
+eof in style
+eof in template
+eof in template
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <template>
+|           content
+|             <style>
+|               "var i"
+|   <body>
+
+#data
+<template><table></template><body><span>Foo
+#errors
+no doctype
+missing /table
+bad eof
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <table>
+|   <body>
+|     <span>
+|       "Foo"
+
+#data
+<template><td></template><body><span>Foo
+#errors
+no doctype
+bad eof
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <td>
+|   <body>
+|     <span>
+|       "Foo"
+
+#data
+<template><object></template><body><span>Foo
+#errors
+no doctype
+missing /object
+bad eof
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <object>
+|   <body>
+|     <span>
+|       "Foo"
+
+#data
+<template><svg><template>
+#errors
+no doctype
+eof in template
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <svg svg>
+|           <svg template>
+|   <body>
+
+#data
+<template><svg><foo><template><foreignObject><div></template><div>
+#errors
+no doctype
+ugly template closure
+bad eof
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <svg svg>
+|           <svg foo>
+|             <svg template>
+|               <svg foreignObject>
+|                 <div>
+|   <body>
+|     <div>
+
+#data
+<dummy><template><span></dummy>
+#errors
+no doctype
+bad end tag </dummy>
+eof in template
+eof in dummy
+#document
+| <html>
+|   <head>
+|   <body>
+|     <dummy>
+|       <template>
+|         content
+|           <span>
+
+#data
+<body><table><tr><td><select><template>Foo</template><caption>A</table>
+#errors
+no doctype
+(1,62): unexpected-caption-in-select-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <select>
+|               <template>
+|                 content
+|                   "Foo"
+|       <caption>
+|         "A"
+
+#data
+<body></body><template>
+#errors
+no doctype
+(1,23): template-after-body
+(1,24): eof-in-template
+#document
+| <html>
+|   <head>
+|   <body>
+|     <template>
+|       content
+
+#data
+<head></head><template>
+#errors
+no doctype
+(1,23): template-after-head
+(1,24): eof-in-template
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|   <body>
+
+#data
+<head></head><template>Foo</template>
+#errors
+no doctype
+(1,23): template-after-head
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         "Foo"
+|   <body>
+
+#data
+<!DOCTYPE HTML><dummy><table><template><table><template><table><script>
+#errors
+eof script
+eof template
+eof template
+eof table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <dummy>
+|       <table>
+|         <template>
+|           content
+|             <table>
+|               <template>
+|                 content
+|                   <table>
+|                     <script>
+
+#data
+<template><a><table><a>
+#errors
+#document
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <a>
+|           <a>
+|           <table>
+|   <body>
diff --git a/internal/html/testdata/webkit/tests1.dat b/internal/html/testdata/webkit/tests1.dat
new file mode 100644 (file)
index 0000000..1c36c1b
--- /dev/null
@@ -0,0 +1,1988 @@
+#data
+Test
+#errors
+(1,0): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "Test"
+
+#data
+<p>One<p>Two
+#errors
+(1,3): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "One"
+|     <p>
+|       "Two"
+
+#data
+Line1<br>Line2<br>Line3<br>Line4
+#errors
+(1,0): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "Line1"
+|     <br>
+|     "Line2"
+|     <br>
+|     "Line3"
+|     <br>
+|     "Line4"
+
+#data
+<html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<head>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<body>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<html><head>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<html><head></head>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<html><head></head><body>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<html><head></head><body></body>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<html><head><body></body></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<html><head></body></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<html><head><body></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<html><body></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<body></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<head></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+</head>
+#errors
+(1,7): expected-doctype-but-got-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+</body>
+#errors
+(1,7): expected-doctype-but-got-end-tag element.
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+</html>
+#errors
+(1,7): expected-doctype-but-got-end-tag element.
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<b><table><td><i></table>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,14): unexpected-cell-in-table-body
+(1,25): unexpected-cell-end-tag
+(1,25): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <table>
+|         <tbody>
+|           <tr>
+|             <td>
+|               <i>
+
+#data
+<b><table><td></b><i></table>X
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,14): unexpected-cell-in-table-body
+(1,18): unexpected-end-tag
+(1,29): unexpected-cell-end-tag
+(1,30): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <table>
+|         <tbody>
+|           <tr>
+|             <td>
+|               <i>
+|       "X"
+
+#data
+<h1>Hello<h2>World
+#errors
+(1,4): expected-doctype-but-got-start-tag
+(1,13): unexpected-start-tag
+(1,18): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <h1>
+|       "Hello"
+|     <h2>
+|       "World"
+
+#data
+<a><p>X<a>Y</a>Z</p></a>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,10): unexpected-start-tag-implies-end-tag
+(1,10): adoption-agency-1.3
+(1,24): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|     <p>
+|       <a>
+|         "X"
+|       <a>
+|         "Y"
+|       "Z"
+
+#data
+<b><button>foo</b>bar
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,18): adoption-agency-1.3
+(1,21): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|     <button>
+|       <b>
+|         "foo"
+|       "bar"
+
+#data
+<!DOCTYPE html><span><button>foo</span>bar
+#errors
+(1,39): unexpected-end-tag
+(1,42): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <span>
+|       <button>
+|         "foobar"
+
+#data
+<p><b><div><marquee></p></b></div>X
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,11): unexpected-end-tag
+(1,24): unexpected-end-tag
+(1,28): unexpected-end-tag
+(1,34): end-tag-too-early
+(1,35): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <b>
+|     <div>
+|       <b>
+|         <marquee>
+|           <p>
+|           "X"
+
+#data
+<script><div></script></div><title><p></title><p><p>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,28): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<div>"
+|     <title>
+|       "<p>"
+|   <body>
+|     <p>
+|     <p>
+
+#data
+<!--><div>--<!-->
+#errors
+(1,5): incorrect-comment
+(1,10): expected-doctype-but-got-start-tag
+(1,17): incorrect-comment
+(1,17): expected-closing-tag-but-got-eof
+#new-errors
+(1:5) abrupt-closing-of-empty-comment
+(1:17) abrupt-closing-of-empty-comment
+#document
+| <!--  -->
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "--"
+|       <!--  -->
+
+#data
+<p><hr></p>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,11): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|     <hr>
+|     <p>
+
+#data
+<select><b><option><select><option></b></select>X
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,11): unexpected-start-tag-in-select
+(1,27): unexpected-select-in-select
+(1,39): unexpected-end-tag
+(1,48): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+|     <option>
+|       "X"
+
+#data
+<a><table><td><a><table></table><a></tr><a></table><b>X</b>C<a>Y
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,14): unexpected-cell-in-table-body
+(1,35): unexpected-start-tag-implies-end-tag
+(1,40): unexpected-cell-end-tag
+(1,43): unexpected-start-tag-implies-table-voodoo
+(1,43): unexpected-start-tag-implies-end-tag
+(1,43): unexpected-end-tag
+(1,63): unexpected-start-tag-implies-end-tag
+(1,64): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       <a>
+|       <table>
+|         <tbody>
+|           <tr>
+|             <td>
+|               <a>
+|                 <table>
+|               <a>
+|     <a>
+|       <b>
+|         "X"
+|       "C"
+|     <a>
+|       "Y"
+
+#data
+<a X>0<b>1<a Y>2
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,15): unexpected-start-tag-implies-end-tag
+(1,15): adoption-agency-1.3
+(1,16): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       x=""
+|       "0"
+|       <b>
+|         "1"
+|     <b>
+|       <a>
+|         y=""
+|         "2"
+
+#data
+<!-----><font><div>hello<table>excite!<b>me!<th><i>please!</tr><!--X-->
+#errors
+(1,7): unexpected-dash-after-double-dash-in-comment
+(1,14): expected-doctype-but-got-start-tag
+(1,41): unexpected-start-tag-implies-table-voodoo
+(1,48): foster-parenting-character-in-table
+(1,48): foster-parenting-character-in-table
+(1,48): foster-parenting-character-in-table
+(1,48): foster-parenting-character-in-table
+(1,48): foster-parenting-character-in-table
+(1,48): foster-parenting-character-in-table
+(1,48): foster-parenting-character-in-table
+(1,48): foster-parenting-character-in-table
+(1,48): foster-parenting-character-in-table
+(1,48): foster-parenting-character-in-table
+(1,48): unexpected-cell-in-table-body
+(1,63): unexpected-cell-end-tag
+(1,71): eof-in-table
+#document
+| <!-- - -->
+| <html>
+|   <head>
+|   <body>
+|     <font>
+|       <div>
+|         "helloexcite!"
+|         <b>
+|           "me!"
+|         <table>
+|           <tbody>
+|             <tr>
+|               <th>
+|                 <i>
+|                   "please!"
+|             <!-- X -->
+
+#data
+<!DOCTYPE html><li>hello<li>world<ul>how<li>do</ul>you</body><!--do-->
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <li>
+|       "hello"
+|     <li>
+|       "world"
+|       <ul>
+|         "how"
+|         <li>
+|           "do"
+|       "you"
+|   <!-- do -->
+
+#data
+<!DOCTYPE html>A<option>B<optgroup>C<select>D</option>E
+#errors
+(1,54): unexpected-end-tag-in-select
+(1,55): eof-in-select
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "A"
+|     <option>
+|       "B"
+|     <optgroup>
+|       "C"
+|       <select>
+|         "DE"
+
+#data
+<
+#errors
+(1,1): expected-tag-name
+(1,1): expected-doctype-but-got-chars
+#new-errors
+(1:2) eof-before-tag-name
+#document
+| <html>
+|   <head>
+|   <body>
+|     "<"
+
+#data
+<#
+#errors
+(1,1): expected-tag-name
+(1,1): expected-doctype-but-got-chars
+#new-errors
+(1:2) invalid-first-character-of-tag-name
+#document
+| <html>
+|   <head>
+|   <body>
+|     "<#"
+
+#data
+</
+#errors
+(1,2): expected-closing-tag-but-got-eof
+(1,2): expected-doctype-but-got-chars
+#new-errors
+(1:3) eof-before-tag-name
+#document
+| <html>
+|   <head>
+|   <body>
+|     "</"
+
+#data
+</#
+#errors
+(1,2): expected-closing-tag-but-got-char
+(1,3): expected-doctype-but-got-eof
+#new-errors
+(1:3) invalid-first-character-of-tag-name
+#document
+| <!-- # -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<?
+#errors
+(1,1): expected-tag-name-but-got-question-mark
+(1,2): expected-doctype-but-got-eof
+#new-errors
+(1:2) unexpected-question-mark-instead-of-tag-name
+#document
+| <!-- ? -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<?#
+#errors
+(1,1): expected-tag-name-but-got-question-mark
+(1,3): expected-doctype-but-got-eof
+#new-errors
+(1:2) unexpected-question-mark-instead-of-tag-name
+#document
+| <!-- ?# -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!
+#errors
+(1,2): expected-dashes-or-doctype
+(1,2): expected-doctype-but-got-eof
+#new-errors
+(1:3) incorrectly-opened-comment
+#document
+| <!--  -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!#
+#errors
+(1,2): expected-dashes-or-doctype
+(1,3): expected-doctype-but-got-eof
+#new-errors
+(1:3) incorrectly-opened-comment
+#document
+| <!-- # -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<?COMMENT?>
+#errors
+(1,1): expected-tag-name-but-got-question-mark
+(1,11): expected-doctype-but-got-eof
+#new-errors
+(1:2) unexpected-question-mark-instead-of-tag-name
+#document
+| <!-- ?COMMENT? -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!COMMENT>
+#errors
+(1,2): expected-dashes-or-doctype
+(1,10): expected-doctype-but-got-eof
+#new-errors
+(1:3) incorrectly-opened-comment
+#document
+| <!-- COMMENT -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+</ COMMENT >
+#errors
+(1,2): expected-closing-tag-but-got-char
+(1,12): expected-doctype-but-got-eof
+#new-errors
+(1:3) invalid-first-character-of-tag-name
+#document
+| <!--  COMMENT  -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<?COM--MENT?>
+#errors
+(1,1): expected-tag-name-but-got-question-mark
+(1,13): expected-doctype-but-got-eof
+#new-errors
+(1:2) unexpected-question-mark-instead-of-tag-name
+#document
+| <!-- ?COM--MENT? -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!COM--MENT>
+#errors
+(1,2): expected-dashes-or-doctype
+(1,12): expected-doctype-but-got-eof
+#new-errors
+(1:3) incorrectly-opened-comment
+#document
+| <!-- COM--MENT -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+</ COM--MENT >
+#errors
+(1,2): expected-closing-tag-but-got-char
+(1,14): expected-doctype-but-got-eof
+#new-errors
+(1:3) invalid-first-character-of-tag-name
+#document
+| <!--  COM--MENT  -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html><style> EOF
+#errors
+(1,26): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <style>
+|       " EOF"
+|   <body>
+
+#data
+<!DOCTYPE html><script> <!-- </script> --> </script> EOF
+#errors
+(1,52): unexpected-end-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       " <!-- "
+|     " "
+|   <body>
+|     "-->  EOF"
+
+#data
+<b><p></b>TEST
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,10): adoption-agency-1.3
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|     <p>
+|       <b>
+|       "TEST"
+
+#data
+<p id=a><b><p id=b></b>TEST
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,19): unexpected-end-tag
+(1,23): adoption-agency-1.2
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       id="a"
+|       <b>
+|     <p>
+|       id="b"
+|       "TEST"
+
+#data
+<b id=a><p><b id=b></p></b>TEST
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,23): unexpected-end-tag
+(1,27): adoption-agency-1.2
+(1,31): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       id="a"
+|       <p>
+|         <b>
+|           id="b"
+|       "TEST"
+
+#data
+<!DOCTYPE html><title>U-test</title><body><div><p>Test<u></p></div></body>
+#errors
+(1,61): unexpected-end-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <title>
+|       "U-test"
+|   <body>
+|     <div>
+|       <p>
+|         "Test"
+|         <u>
+
+#data
+<!DOCTYPE html><font><table></font></table></font>
+#errors
+(1,35): unexpected-end-tag-implies-table-voodoo
+(1,35): unexpected-end-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <font>
+|       <table>
+
+#data
+<font><p>hello<b>cruel</font>world
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,29): adoption-agency-1.3
+(1,29): adoption-agency-1.3
+(1,34): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <font>
+|     <p>
+|       <font>
+|         "hello"
+|         <b>
+|           "cruel"
+|       <b>
+|         "world"
+
+#data
+<b>Test</i>Test
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,11): unexpected-end-tag
+(1,15): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       "TestTest"
+
+#data
+<b>A<cite>B<div>C
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,17): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       "A"
+|       <cite>
+|         "B"
+|         <div>
+|           "C"
+
+#data
+<b>A<cite>B<div>C</cite>D
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,24): unexpected-end-tag
+(1,25): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       "A"
+|       <cite>
+|         "B"
+|         <div>
+|           "CD"
+
+#data
+<b>A<cite>B<div>C</b>D
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,21): adoption-agency-1.3
+(1,22): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       "A"
+|       <cite>
+|         "B"
+|     <div>
+|       <b>
+|         "C"
+|       "D"
+
+#data
+
+#errors
+(1,0): expected-doctype-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<DIV>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,5): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+
+#data
+<DIV> abc
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,9): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc"
+
+#data
+<DIV> abc <B>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,13): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+
+#data
+<DIV> abc <B> def
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,17): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+|         " def"
+
+#data
+<DIV> abc <B> def <I>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,21): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+|         " def "
+|         <i>
+
+#data
+<DIV> abc <B> def <I> ghi
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,25): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+|         " def "
+|         <i>
+|           " ghi"
+
+#data
+<DIV> abc <B> def <I> ghi <P>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,29): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+|         " def "
+|         <i>
+|           " ghi "
+|           <p>
+
+#data
+<DIV> abc <B> def <I> ghi <P> jkl
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,33): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+|         " def "
+|         <i>
+|           " ghi "
+|           <p>
+|             " jkl"
+
+#data
+<DIV> abc <B> def <I> ghi <P> jkl </B>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,38): adoption-agency-1.3
+(1,38): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+|         " def "
+|         <i>
+|           " ghi "
+|       <i>
+|         <p>
+|           <b>
+|             " jkl "
+
+#data
+<DIV> abc <B> def <I> ghi <P> jkl </B> mno
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,38): adoption-agency-1.3
+(1,42): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+|         " def "
+|         <i>
+|           " ghi "
+|       <i>
+|         <p>
+|           <b>
+|             " jkl "
+|           " mno"
+
+#data
+<DIV> abc <B> def <I> ghi <P> jkl </B> mno </I>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,38): adoption-agency-1.3
+(1,47): adoption-agency-1.3
+(1,47): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+|         " def "
+|         <i>
+|           " ghi "
+|       <i>
+|       <p>
+|         <i>
+|           <b>
+|             " jkl "
+|           " mno "
+
+#data
+<DIV> abc <B> def <I> ghi <P> jkl </B> mno </I> pqr
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,38): adoption-agency-1.3
+(1,47): adoption-agency-1.3
+(1,51): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+|         " def "
+|         <i>
+|           " ghi "
+|       <i>
+|       <p>
+|         <i>
+|           <b>
+|             " jkl "
+|           " mno "
+|         " pqr"
+
+#data
+<DIV> abc <B> def <I> ghi <P> jkl </B> mno </I> pqr </P>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,38): adoption-agency-1.3
+(1,47): adoption-agency-1.3
+(1,56): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+|         " def "
+|         <i>
+|           " ghi "
+|       <i>
+|       <p>
+|         <i>
+|           <b>
+|             " jkl "
+|           " mno "
+|         " pqr "
+
+#data
+<DIV> abc <B> def <I> ghi <P> jkl </B> mno </I> pqr </P> stu
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,38): adoption-agency-1.3
+(1,47): adoption-agency-1.3
+(1,60): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+|         " def "
+|         <i>
+|           " ghi "
+|       <i>
+|       <p>
+|         <i>
+|           <b>
+|             " jkl "
+|           " mno "
+|         " pqr "
+|       " stu"
+
+#data
+<test attribute---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------->
+#errors
+(1,1040): expected-doctype-but-got-start-tag
+(1,1040): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <test>
+|       attribute----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------=""
+
+#data
+<a href="blah">aba<table><a href="foo">br<tr><td></td></tr>x</table>aoe
+#errors
+(1,15): expected-doctype-but-got-start-tag
+(1,39): unexpected-start-tag-implies-table-voodoo
+(1,39): unexpected-start-tag-implies-end-tag
+(1,39): unexpected-end-tag
+(1,45): foster-parenting-character-in-table
+(1,45): foster-parenting-character-in-table
+(1,68): foster-parenting-character-in-table
+(1,71): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       href="blah"
+|       "aba"
+|       <a>
+|         href="foo"
+|         "br"
+|       <a>
+|         href="foo"
+|         "x"
+|       <table>
+|         <tbody>
+|           <tr>
+|             <td>
+|     <a>
+|       href="foo"
+|       "aoe"
+
+#data
+<a href="blah">aba<table><tr><td><a href="foo">br</td></tr>x</table>aoe
+#errors
+(1,15): expected-doctype-but-got-start-tag
+(1,54): unexpected-cell-end-tag
+(1,68): unexpected text in table
+(1,71): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       href="blah"
+|       "abax"
+|       <table>
+|         <tbody>
+|           <tr>
+|             <td>
+|               <a>
+|                 href="foo"
+|                 "br"
+|       "aoe"
+
+#data
+<table><a href="blah">aba<tr><td><a href="foo">br</td></tr>x</table>aoe
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,22): unexpected-start-tag-implies-table-voodoo
+(1,29): foster-parenting-character-in-table
+(1,29): foster-parenting-character-in-table
+(1,29): foster-parenting-character-in-table
+(1,54): unexpected-cell-end-tag
+(1,68): foster-parenting-character-in-table
+(1,71): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       href="blah"
+|       "aba"
+|     <a>
+|       href="blah"
+|       "x"
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <a>
+|               href="foo"
+|               "br"
+|     <a>
+|       href="blah"
+|       "aoe"
+
+#data
+<a href=a>aa<marquee>aa<a href=b>bb</marquee>aa
+#errors
+(1,10): expected-doctype-but-got-start-tag
+(1,45): end-tag-too-early
+(1,47): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       href="a"
+|       "aa"
+|       <marquee>
+|         "aa"
+|         <a>
+|           href="b"
+|           "bb"
+|       "aa"
+
+#data
+<wbr><strike><code></strike><code><strike></code>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,28): adoption-agency-1.3
+(1,49): adoption-agency-1.3
+(1,49): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <wbr>
+|     <strike>
+|       <code>
+|     <code>
+|       <code>
+|         <strike>
+
+#data
+<!DOCTYPE html><spacer>foo
+#errors
+(1,26): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <spacer>
+|       "foo"
+
+#data
+<title><meta></title><link><title><meta></title>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <title>
+|       "<meta>"
+|     <link>
+|     <title>
+|       "<meta>"
+|   <body>
+
+#data
+<style><!--</style><meta><script>--><link></script>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <style>
+|       "<!--"
+|     <meta>
+|     <script>
+|       "--><link>"
+|   <body>
+
+#data
+<head><meta></head><link>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,25): unexpected-start-tag-out-of-my-head
+#document
+| <html>
+|   <head>
+|     <meta>
+|     <link>
+|   <body>
+
+#data
+<table><tr><tr><td><td><span><th><span>X</table>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,33): unexpected-cell-end-tag
+(1,48): unexpected-cell-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|         <tr>
+|           <td>
+|           <td>
+|             <span>
+|           <th>
+|             <span>
+|               "X"
+
+#data
+<body><body><base><link><meta><title><p></title><body><p></body>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,12): unexpected-start-tag
+(1,54): unexpected-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <base>
+|     <link>
+|     <meta>
+|     <title>
+|       "<p>"
+|     <p>
+
+#data
+<textarea><p></textarea>
+#errors
+(1,10): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "<p>"
+
+#data
+<p><image></p>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,10): unexpected-start-tag-treated-as
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <img>
+
+#data
+<a><table><a></table><p><a><div><a>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,13): unexpected-start-tag-implies-table-voodoo
+(1,13): unexpected-start-tag-implies-end-tag
+(1,13): adoption-agency-1.3
+(1,27): unexpected-start-tag-implies-end-tag
+(1,27): adoption-agency-1.2
+(1,32): unexpected-end-tag
+(1,35): unexpected-start-tag-implies-end-tag
+(1,35): adoption-agency-1.2
+(1,35): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       <a>
+|       <table>
+|     <p>
+|       <a>
+|     <div>
+|       <a>
+
+#data
+<head></p><meta><p>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,10): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|     <meta>
+|   <body>
+|     <p>
+
+#data
+<head></html><meta><p>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,19): expected-eof-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <meta>
+|     <p>
+
+#data
+<b><table><td><i></table>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,14): unexpected-cell-in-table-body
+(1,25): unexpected-cell-end-tag
+(1,25): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <table>
+|         <tbody>
+|           <tr>
+|             <td>
+|               <i>
+
+#data
+<b><table><td></b><i></table>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,14): unexpected-cell-in-table-body
+(1,18): unexpected-end-tag
+(1,29): unexpected-cell-end-tag
+(1,29): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <table>
+|         <tbody>
+|           <tr>
+|             <td>
+|               <i>
+
+#data
+<h1><h2>
+#errors
+(1,4): expected-doctype-but-got-start-tag
+(1,8): unexpected-start-tag
+(1,8): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <h1>
+|     <h2>
+
+#data
+<a><p><a></a></p></a>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,9): unexpected-start-tag-implies-end-tag
+(1,9): adoption-agency-1.3
+(1,21): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|     <p>
+|       <a>
+|       <a>
+
+#data
+<b><button></b></button></b>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,15): adoption-agency-1.3
+(1,28): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|     <button>
+|       <b>
+
+#data
+<p><b><div><marquee></p></b></div>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,11): unexpected-end-tag
+(1,24): unexpected-end-tag
+(1,28): unexpected-end-tag
+(1,34): end-tag-too-early
+(1,34): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <b>
+|     <div>
+|       <b>
+|         <marquee>
+|           <p>
+
+#data
+<script></script></div><title></title><p><p>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,23): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|     <script>
+|     <title>
+|   <body>
+|     <p>
+|     <p>
+
+#data
+<p><hr></p>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,11): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|     <hr>
+|     <p>
+
+#data
+<select><b><option><select><option></b></select>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,11): unexpected-start-tag-in-select
+(1,27): unexpected-select-in-select
+(1,39): unexpected-end-tag
+(1,48): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+|     <option>
+
+#data
+<html><head><title></title><body></body></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <title>
+|   <body>
+
+#data
+<a><table><td><a><table></table><a></tr><a></table><a>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,14): unexpected-cell-in-table-body
+(1,35): unexpected-start-tag-implies-end-tag
+(1,40): unexpected-cell-end-tag
+(1,43): unexpected-start-tag-implies-table-voodoo
+(1,43): unexpected-start-tag-implies-end-tag
+(1,43): unexpected-end-tag
+(1,54): unexpected-start-tag-implies-end-tag
+(1,54): adoption-agency-1.2
+(1,54): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       <a>
+|       <table>
+|         <tbody>
+|           <tr>
+|             <td>
+|               <a>
+|                 <table>
+|               <a>
+|     <a>
+
+#data
+<ul><li></li><div><li></div><li><li><div><li><address><li><b><em></b><li></ul>
+#errors
+(1,4): expected-doctype-but-got-start-tag
+(1,45): end-tag-too-early
+(1,58): end-tag-too-early
+(1,69): adoption-agency-1.3
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ul>
+|       <li>
+|       <div>
+|         <li>
+|       <li>
+|       <li>
+|         <div>
+|       <li>
+|         <address>
+|       <li>
+|         <b>
+|           <em>
+|       <li>
+
+#data
+<ul><li><ul></li><li>a</li></ul></li></ul>
+#errors
+(1,4): expected-doctype-but-got-start-tag
+(1,17): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ul>
+|       <li>
+|         <ul>
+|           <li>
+|             "a"
+
+#data
+<frameset><frame><frameset><frame></frameset><noframes></noframes></frameset>
+#errors
+(1,10): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <frameset>
+|     <frame>
+|     <frameset>
+|       <frame>
+|     <noframes>
+
+#data
+<h1><table><td><h3></table><h3></h1>
+#errors
+(1,4): expected-doctype-but-got-start-tag
+(1,15): unexpected-cell-in-table-body
+(1,27): unexpected-cell-end-tag
+(1,31): unexpected-start-tag
+(1,36): end-tag-too-early
+#document
+| <html>
+|   <head>
+|   <body>
+|     <h1>
+|       <table>
+|         <tbody>
+|           <tr>
+|             <td>
+|               <h3>
+|     <h3>
+
+#data
+<table><colgroup><col><colgroup><col><col><col><colgroup><col><col><thead><tr><td></table>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <colgroup>
+|         <col>
+|       <colgroup>
+|         <col>
+|         <col>
+|         <col>
+|       <colgroup>
+|         <col>
+|         <col>
+|       <thead>
+|         <tr>
+|           <td>
+
+#data
+<table><col><tbody><col><tr><col><td><col></table><col>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,37): unexpected-cell-in-table-body
+(1,55): unexpected-start-tag-ignored
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <colgroup>
+|         <col>
+|       <tbody>
+|       <colgroup>
+|         <col>
+|       <tbody>
+|         <tr>
+|       <colgroup>
+|         <col>
+|       <tbody>
+|         <tr>
+|           <td>
+|       <colgroup>
+|         <col>
+
+#data
+<table><colgroup><tbody><colgroup><tr><colgroup><td><colgroup></table><colgroup>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,52): unexpected-cell-in-table-body
+(1,80): unexpected-start-tag-ignored
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <colgroup>
+|       <tbody>
+|       <colgroup>
+|       <tbody>
+|         <tr>
+|       <colgroup>
+|       <tbody>
+|         <tr>
+|           <td>
+|       <colgroup>
+
+#data
+</strong></b></em></i></u></strike></s></blink></tt></pre></big></small></font></select></h1></h2></h3></h4></h5></h6></body></br></a></img></title></span></style></script></table></th></td></tr></frame></area></link></param></hr></input></col></base></meta></basefont></bgsound></embed></spacer></p></dd></dt></caption></colgroup></tbody></tfoot></thead></address></blockquote></center></dir></div></dl></fieldset></listing></menu></ol></ul></li></nobr></wbr></form></button></marquee></object></html></frameset></head></iframe></image></isindex></noembed></noframes></noscript></optgroup></option></plaintext></textarea>
+#errors
+(1,9): expected-doctype-but-got-end-tag
+(1,9): unexpected-end-tag-before-html
+(1,13): unexpected-end-tag-before-html
+(1,18): unexpected-end-tag-before-html
+(1,22): unexpected-end-tag-before-html
+(1,26): unexpected-end-tag-before-html
+(1,35): unexpected-end-tag-before-html
+(1,39): unexpected-end-tag-before-html
+(1,47): unexpected-end-tag-before-html
+(1,52): unexpected-end-tag-before-html
+(1,58): unexpected-end-tag-before-html
+(1,64): unexpected-end-tag-before-html
+(1,72): unexpected-end-tag-before-html
+(1,79): unexpected-end-tag-before-html
+(1,88): unexpected-end-tag-before-html
+(1,93): unexpected-end-tag-before-html
+(1,98): unexpected-end-tag-before-html
+(1,103): unexpected-end-tag-before-html
+(1,108): unexpected-end-tag-before-html
+(1,113): unexpected-end-tag-before-html
+(1,118): unexpected-end-tag-before-html
+(1,130): unexpected-end-tag-after-body
+(1,130): unexpected-end-tag-treated-as
+(1,134): unexpected-end-tag
+(1,140): unexpected-end-tag
+(1,148): unexpected-end-tag
+(1,155): unexpected-end-tag
+(1,163): unexpected-end-tag
+(1,172): unexpected-end-tag
+(1,180): unexpected-end-tag
+(1,185): unexpected-end-tag
+(1,190): unexpected-end-tag
+(1,195): unexpected-end-tag
+(1,203): unexpected-end-tag
+(1,210): unexpected-end-tag
+(1,217): unexpected-end-tag
+(1,225): unexpected-end-tag
+(1,230): unexpected-end-tag
+(1,238): unexpected-end-tag
+(1,244): unexpected-end-tag
+(1,251): unexpected-end-tag
+(1,258): unexpected-end-tag
+(1,269): unexpected-end-tag
+(1,279): unexpected-end-tag
+(1,287): unexpected-end-tag
+(1,296): unexpected-end-tag
+(1,300): unexpected-end-tag
+(1,305): unexpected-end-tag
+(1,310): unexpected-end-tag
+(1,320): unexpected-end-tag
+(1,331): unexpected-end-tag
+(1,339): unexpected-end-tag
+(1,347): unexpected-end-tag
+(1,355): unexpected-end-tag
+(1,365): end-tag-too-early
+(1,378): end-tag-too-early
+(1,387): end-tag-too-early
+(1,393): end-tag-too-early
+(1,399): end-tag-too-early
+(1,404): end-tag-too-early
+(1,415): end-tag-too-early
+(1,425): end-tag-too-early
+(1,432): end-tag-too-early
+(1,437): end-tag-too-early
+(1,442): end-tag-too-early
+(1,447): unexpected-end-tag
+(1,454): unexpected-end-tag
+(1,460): unexpected-end-tag
+(1,467): unexpected-end-tag
+(1,476): end-tag-too-early
+(1,486): end-tag-too-early
+(1,495): end-tag-too-early
+(1,513): expected-eof-but-got-end-tag
+(1,513): unexpected-end-tag
+(1,520): unexpected-end-tag
+(1,529): unexpected-end-tag
+(1,537): unexpected-end-tag
+(1,547): unexpected-end-tag
+(1,557): unexpected-end-tag
+(1,568): unexpected-end-tag
+(1,579): unexpected-end-tag
+(1,590): unexpected-end-tag
+(1,599): unexpected-end-tag
+(1,611): unexpected-end-tag
+(1,622): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <br>
+|     <p>
+
+#data
+<table><tr></strong></b></em></i></u></strike></s></blink></tt></pre></big></small></font></select></h1></h2></h3></h4></h5></h6></body></br></a></img></title></span></style></script></table></th></td></tr></frame></area></link></param></hr></input></col></base></meta></basefont></bgsound></embed></spacer></p></dd></dt></caption></colgroup></tbody></tfoot></thead></address></blockquote></center></dir></div></dl></fieldset></listing></menu></ol></ul></li></nobr></wbr></form></button></marquee></object></html></frameset></head></iframe></image></isindex></noembed></noframes></noscript></optgroup></option></plaintext></textarea>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,20): unexpected-end-tag-implies-table-voodoo
+(1,20): unexpected-end-tag
+(1,24): unexpected-end-tag-implies-table-voodoo
+(1,24): unexpected-end-tag
+(1,29): unexpected-end-tag-implies-table-voodoo
+(1,29): unexpected-end-tag
+(1,33): unexpected-end-tag-implies-table-voodoo
+(1,33): unexpected-end-tag
+(1,37): unexpected-end-tag-implies-table-voodoo
+(1,37): unexpected-end-tag
+(1,46): unexpected-end-tag-implies-table-voodoo
+(1,46): unexpected-end-tag
+(1,50): unexpected-end-tag-implies-table-voodoo
+(1,50): unexpected-end-tag
+(1,58): unexpected-end-tag-implies-table-voodoo
+(1,58): unexpected-end-tag
+(1,63): unexpected-end-tag-implies-table-voodoo
+(1,63): unexpected-end-tag
+(1,69): unexpected-end-tag-implies-table-voodoo
+(1,69): end-tag-too-early
+(1,75): unexpected-end-tag-implies-table-voodoo
+(1,75): unexpected-end-tag
+(1,83): unexpected-end-tag-implies-table-voodoo
+(1,83): unexpected-end-tag
+(1,90): unexpected-end-tag-implies-table-voodoo
+(1,90): unexpected-end-tag
+(1,99): unexpected-end-tag-implies-table-voodoo
+(1,99): unexpected-end-tag
+(1,104): unexpected-end-tag-implies-table-voodoo
+(1,104): end-tag-too-early
+(1,109): unexpected-end-tag-implies-table-voodoo
+(1,109): end-tag-too-early
+(1,114): unexpected-end-tag-implies-table-voodoo
+(1,114): end-tag-too-early
+(1,119): unexpected-end-tag-implies-table-voodoo
+(1,119): end-tag-too-early
+(1,124): unexpected-end-tag-implies-table-voodoo
+(1,124): end-tag-too-early
+(1,129): unexpected-end-tag-implies-table-voodoo
+(1,129): end-tag-too-early
+(1,136): unexpected-end-tag-in-table-row
+(1,141): unexpected-end-tag-implies-table-voodoo
+(1,141): unexpected-end-tag-treated-as
+(1,145): unexpected-end-tag-implies-table-voodoo
+(1,145): unexpected-end-tag
+(1,151): unexpected-end-tag-implies-table-voodoo
+(1,151): unexpected-end-tag
+(1,159): unexpected-end-tag-implies-table-voodoo
+(1,159): unexpected-end-tag
+(1,166): unexpected-end-tag-implies-table-voodoo
+(1,166): unexpected-end-tag
+(1,174): unexpected-end-tag-implies-table-voodoo
+(1,174): unexpected-end-tag
+(1,183): unexpected-end-tag-implies-table-voodoo
+(1,183): unexpected-end-tag
+(1,196): unexpected-end-tag
+(1,201): unexpected-end-tag
+(1,206): unexpected-end-tag
+(1,214): unexpected-end-tag
+(1,221): unexpected-end-tag
+(1,228): unexpected-end-tag
+(1,236): unexpected-end-tag
+(1,241): unexpected-end-tag
+(1,249): unexpected-end-tag
+(1,255): unexpected-end-tag
+(1,262): unexpected-end-tag
+(1,269): unexpected-end-tag
+(1,280): unexpected-end-tag
+(1,290): unexpected-end-tag
+(1,298): unexpected-end-tag
+(1,307): unexpected-end-tag
+(1,311): unexpected-end-tag
+(1,316): unexpected-end-tag
+(1,321): unexpected-end-tag
+(1,331): unexpected-end-tag
+(1,342): unexpected-end-tag
+(1,350): unexpected-end-tag
+(1,358): unexpected-end-tag
+(1,366): unexpected-end-tag
+(1,376): end-tag-too-early
+(1,389): end-tag-too-early
+(1,398): end-tag-too-early
+(1,404): end-tag-too-early
+(1,410): end-tag-too-early
+(1,415): end-tag-too-early
+(1,426): end-tag-too-early
+(1,436): end-tag-too-early
+(1,443): end-tag-too-early
+(1,448): end-tag-too-early
+(1,453): end-tag-too-early
+(1,458): unexpected-end-tag
+(1,465): unexpected-end-tag
+(1,471): unexpected-end-tag
+(1,478): unexpected-end-tag
+(1,487): end-tag-too-early
+(1,497): end-tag-too-early
+(1,506): end-tag-too-early
+(1,524): expected-eof-but-got-end-tag
+(1,524): unexpected-end-tag
+(1,531): unexpected-end-tag
+(1,540): unexpected-end-tag
+(1,548): unexpected-end-tag
+(1,558): unexpected-end-tag
+(1,568): unexpected-end-tag
+(1,579): unexpected-end-tag
+(1,590): unexpected-end-tag
+(1,601): unexpected-end-tag
+(1,610): unexpected-end-tag
+(1,622): unexpected-end-tag
+(1,633): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <br>
+|     <table>
+|       <tbody>
+|         <tr>
+|     <p>
+
+#data
+<frameset>
+#errors
+(1,10): expected-doctype-but-got-start-tag
+(1,10): eof-in-frameset
+#document
+| <html>
+|   <head>
+|   <frameset>
diff --git a/internal/html/testdata/webkit/tests10.dat b/internal/html/testdata/webkit/tests10.dat
new file mode 100644 (file)
index 0000000..f84e2d5
--- /dev/null
@@ -0,0 +1,849 @@
+#data
+<!DOCTYPE html><svg></svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+
+#data
+<!DOCTYPE html><svg></svg><![CDATA[a]]>
+#errors
+(1,28) expected-dashes-or-doctype
+#new-errors
+(1:35) cdata-in-html-content
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|     <!-- [CDATA[a]] -->
+
+#data
+<!DOCTYPE html><body><svg></svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+
+#data
+<!DOCTYPE html><body><select><svg></svg></select>
+#errors
+(1,34) unexpected-start-tag-in-select
+(1,40) unexpected-end-tag-in-select
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+
+#data
+<!DOCTYPE html><body><select><option><svg></svg></option></select>
+#errors
+(1,42) unexpected-start-tag-in-select
+(1,48) unexpected-end-tag-in-select
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+
+#data
+<!DOCTYPE html><body><table><svg></svg></table>
+#errors
+(1,33) foster-parenting-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|     <table>
+
+#data
+<!DOCTYPE html><body><table><svg><g>foo</g></svg></table>
+#errors
+(1,33) foster-parenting-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg g>
+|         "foo"
+|     <table>
+
+#data
+<!DOCTYPE html><body><table><svg><g>foo</g><g>bar</g></svg></table>
+#errors
+(1,33) foster-parenting-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg g>
+|         "foo"
+|       <svg g>
+|         "bar"
+|     <table>
+
+#data
+<!DOCTYPE html><body><table><tbody><svg><g>foo</g><g>bar</g></svg></tbody></table>
+#errors
+(1,40) foster-parenting-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg g>
+|         "foo"
+|       <svg g>
+|         "bar"
+|     <table>
+|       <tbody>
+
+#data
+<!DOCTYPE html><body><table><tbody><tr><svg><g>foo</g><g>bar</g></svg></tr></tbody></table>
+#errors
+(1,44) foster-parenting-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg g>
+|         "foo"
+|       <svg g>
+|         "bar"
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+<!DOCTYPE html><body><table><tbody><tr><td><svg><g>foo</g><g>bar</g></svg></td></tr></tbody></table>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <svg svg>
+|               <svg g>
+|                 "foo"
+|               <svg g>
+|                 "bar"
+
+#data
+<!DOCTYPE html><body><table><tbody><tr><td><svg><g>foo</g><g>bar</g></svg><p>baz</td></tr></tbody></table>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <svg svg>
+|               <svg g>
+|                 "foo"
+|               <svg g>
+|                 "bar"
+|             <p>
+|               "baz"
+
+#data
+<!DOCTYPE html><body><table><caption><svg><g>foo</g><g>bar</g></svg><p>baz</caption></table>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <svg svg>
+|           <svg g>
+|             "foo"
+|           <svg g>
+|             "bar"
+|         <p>
+|           "baz"
+
+#data
+<!DOCTYPE html><body><table><caption><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
+#errors
+(1,65) unexpected-html-element-in-foreign-content
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <svg svg>
+|           <svg g>
+|             "foo"
+|           <svg g>
+|             "bar"
+|         <p>
+|           "baz"
+|     <p>
+|       "quux"
+
+#data
+<!DOCTYPE html><body><table><caption><svg><g>foo</g><g>bar</g>baz</table><p>quux
+#errors
+(1,73) unexpected-end-tag
+(1,73) expected-one-end-tag-but-got-another
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <svg svg>
+|           <svg g>
+|             "foo"
+|           <svg g>
+|             "bar"
+|           "baz"
+|     <p>
+|       "quux"
+
+#data
+<!DOCTYPE html><body><table><colgroup><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
+#errors
+(1,43) foster-parenting-start-tag svg
+(1,66) unexpected HTML-like start tag token in foreign content
+(1,66) foster-parenting-start-tag
+(1,67) foster-parenting-character
+(1,68) foster-parenting-character
+(1,69) foster-parenting-character
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg g>
+|         "foo"
+|       <svg g>
+|         "bar"
+|     <p>
+|       "baz"
+|     <table>
+|       <colgroup>
+|     <p>
+|       "quux"
+
+#data
+<!DOCTYPE html><body><table><tr><td><select><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
+#errors
+(1,49) unexpected-start-tag-in-select
+(1,52) unexpected-start-tag-in-select
+(1,59) unexpected-end-tag-in-select
+(1,62) unexpected-start-tag-in-select
+(1,69) unexpected-end-tag-in-select
+(1,72) unexpected-start-tag-in-select
+(1,83) unexpected-table-element-end-tag-in-select-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <select>
+|               "foobarbaz"
+|     <p>
+|       "quux"
+
+#data
+<!DOCTYPE html><body><table><select><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
+#errors
+(1,36) unexpected-start-tag-implies-table-voodoo
+(1,41) unexpected-start-tag-in-select
+(1,44) unexpected-start-tag-in-select
+(1,51) unexpected-end-tag-in-select
+(1,54) unexpected-start-tag-in-select
+(1,61) unexpected-end-tag-in-select
+(1,64) unexpected-start-tag-in-select
+(1,75) unexpected-table-element-end-tag-in-select-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       "foobarbaz"
+|     <table>
+|     <p>
+|       "quux"
+
+#data
+<!DOCTYPE html><body></body></html><svg><g>foo</g><g>bar</g><p>baz
+#errors
+(1,40) expected-eof-but-got-start-tag
+(1,63) unexpected-html-element-in-foreign-content
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg g>
+|         "foo"
+|       <svg g>
+|         "bar"
+|     <p>
+|       "baz"
+
+#data
+<!DOCTYPE html><body></body><svg><g>foo</g><g>bar</g><p>baz
+#errors
+(1,33) unexpected-start-tag-after-body
+(1,56) unexpected-html-element-in-foreign-content
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg g>
+|         "foo"
+|       <svg g>
+|         "bar"
+|     <p>
+|       "baz"
+
+#data
+<!DOCTYPE html><frameset><svg><g></g><g></g><p><span>
+#errors
+(1,30) unexpected-start-tag-in-frameset
+(1,33) unexpected-start-tag-in-frameset
+(1,37) unexpected-end-tag-in-frameset
+(1,40) unexpected-start-tag-in-frameset
+(1,44) unexpected-end-tag-in-frameset
+(1,47) unexpected-start-tag-in-frameset
+(1,53) unexpected-start-tag-in-frameset
+(1,53) eof-in-frameset
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!DOCTYPE html><frameset></frameset><svg><g></g><g></g><p><span>
+#errors
+(1,41) unexpected-start-tag-after-frameset
+(1,44) unexpected-start-tag-after-frameset
+(1,48) unexpected-end-tag-after-frameset
+(1,51) unexpected-start-tag-after-frameset
+(1,55) unexpected-end-tag-after-frameset
+(1,58) unexpected-start-tag-after-frameset
+(1,64) unexpected-start-tag-after-frameset
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!DOCTYPE html><body xlink:href=foo><svg xlink:href=foo></svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     xlink:href="foo"
+|     <svg svg>
+|       xlink href="foo"
+
+#data
+<!DOCTYPE html><body xlink:href=foo xml:lang=en><svg><g xml:lang=en xlink:href=foo></g></svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     xlink:href="foo"
+|     xml:lang="en"
+|     <svg svg>
+|       <svg g>
+|         xlink href="foo"
+|         xml lang="en"
+
+#data
+<!DOCTYPE html><body xlink:href=foo xml:lang=en><svg><g xml:lang=en xlink:href=foo /></svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     xlink:href="foo"
+|     xml:lang="en"
+|     <svg svg>
+|       <svg g>
+|         xlink href="foo"
+|         xml lang="en"
+
+#data
+<!DOCTYPE html><body xlink:href=foo xml:lang=en><svg><g xml:lang=en xlink:href=foo />bar</svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     xlink:href="foo"
+|     xml:lang="en"
+|     <svg svg>
+|       <svg g>
+|         xlink href="foo"
+|         xml lang="en"
+|       "bar"
+
+#data
+<svg></path>
+#errors
+(1,5) expected-doctype-but-got-start-tag
+(1,12) unexpected-end-tag
+(1,12) unexpected-end-tag
+(1,12) expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+
+#data
+<div><svg></div>a
+#errors
+(1,5) expected-doctype-but-got-start-tag
+(1,16) unexpected-end-tag
+(1,16) end-tag-too-early
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <svg svg>
+|     "a"
+
+#data
+<div><svg><path></div>a
+#errors
+(1,5) expected-doctype-but-got-start-tag
+(1,22) unexpected-end-tag
+(1,22) end-tag-too-early
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <svg svg>
+|         <svg path>
+|     "a"
+
+#data
+<div><svg><path></svg><path>
+#errors
+(1,5) expected-doctype-but-got-start-tag
+(1,22) unexpected-end-tag
+(1,28) expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <svg svg>
+|         <svg path>
+|       <path>
+
+#data
+<div><svg><path><foreignObject><math></div>a
+#errors
+(1,5) expected-doctype-but-got-start-tag
+(1,43) unexpected-end-tag
+(1,43) end-tag-too-early
+(1,44) expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <svg svg>
+|         <svg path>
+|           <svg foreignObject>
+|             <math math>
+|               "a"
+
+#data
+<div><svg><path><foreignObject><p></div>a
+#errors
+(1,5) expected-doctype-but-got-start-tag
+(1,40) end-tag-too-early
+(1,41) expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <svg svg>
+|         <svg path>
+|           <svg foreignObject>
+|             <p>
+|               "a"
+
+#data
+<!DOCTYPE html><svg><desc><div><svg><ul>a
+#errors
+(1,40) unexpected-html-element-in-foreign-content
+(1,41) expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg desc>
+|         <div>
+|           <svg svg>
+|           <ul>
+|             "a"
+
+#data
+<!DOCTYPE html><svg><desc><svg><ul>a
+#errors
+(1,35) unexpected-html-element-in-foreign-content
+(1,36) expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg desc>
+|         <svg svg>
+|         <ul>
+|           "a"
+
+#data
+<!DOCTYPE html><p><svg><desc><p>
+#errors
+(1,32) expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <svg svg>
+|         <svg desc>
+|           <p>
+
+#data
+<!DOCTYPE html><p><svg><title><p>
+#errors
+(1,33) expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <svg svg>
+|         <svg title>
+|           <p>
+
+#data
+<div><svg><path><foreignObject><p></foreignObject><p>
+#errors
+(1,5) expected-doctype-but-got-start-tag
+(1,50) unexpected-end-tag
+(1,53) expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <svg svg>
+|         <svg path>
+|           <svg foreignObject>
+|             <p>
+|             <p>
+
+#data
+<math><mi><div><object><div><span></span></div></object></div></mi><mi>
+#errors
+(1,6) expected-doctype-but-got-start-tag
+(1,71) expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+|         <div>
+|           <object>
+|             <div>
+|               <span>
+|       <math mi>
+
+#data
+<math><mi><svg><foreignObject><div><div></div></div></foreignObject></svg></mi><mi>
+#errors
+(1,6) expected-doctype-but-got-start-tag
+(1,83) expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+|         <svg svg>
+|           <svg foreignObject>
+|             <div>
+|               <div>
+|       <math mi>
+
+#data
+<svg><script></script><path>
+#errors
+(1,5) expected-doctype-but-got-start-tag
+(1,28) expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg script>
+|       <svg path>
+
+#data
+<table><svg></svg><tr>
+#errors
+(1,7) expected-doctype-but-got-start-tag
+(1,12) unexpected-start-tag-implies-table-voodoo
+(1,22) eof-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+<math><mi><mglyph>
+#errors
+(1,6) expected-doctype-but-got-start-tag
+(1,18) expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+|         <math mglyph>
+
+#data
+<math><mi><malignmark>
+#errors
+(1,6) expected-doctype-but-got-start-tag
+(1,22) expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+|         <math malignmark>
+
+#data
+<math><mo><mglyph>
+#errors
+(1,6) expected-doctype-but-got-start-tag
+(1,18) expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mo>
+|         <math mglyph>
+
+#data
+<math><mo><malignmark>
+#errors
+(1,6) expected-doctype-but-got-start-tag
+(1,22) expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mo>
+|         <math malignmark>
+
+#data
+<math><mn><mglyph>
+#errors
+(1,6) expected-doctype-but-got-start-tag
+(1,18) expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mn>
+|         <math mglyph>
+
+#data
+<math><mn><malignmark>
+#errors
+(1,6) expected-doctype-but-got-start-tag
+(1,22) expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mn>
+|         <math malignmark>
+
+#data
+<math><ms><mglyph>
+#errors
+(1,6) expected-doctype-but-got-start-tag
+(1,18) expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math ms>
+|         <math mglyph>
+
+#data
+<math><ms><malignmark>
+#errors
+(1,6) expected-doctype-but-got-start-tag
+(1,22) expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math ms>
+|         <math malignmark>
+
+#data
+<math><mtext><mglyph>
+#errors
+(1,6) expected-doctype-but-got-start-tag
+(1,21) expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mtext>
+|         <math mglyph>
+
+#data
+<math><mtext><malignmark>
+#errors
+(1,6) expected-doctype-but-got-start-tag
+(1,25) expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mtext>
+|         <math malignmark>
+
+#data
+<math><annotation-xml><svg></svg></annotation-xml><mi>
+#errors
+(1,6) expected-doctype-but-got-start-tag
+(1,54) expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         <svg svg>
+|       <math mi>
+
+#data
+<math><annotation-xml><svg><foreignObject><div><math><mi></mi></math><span></span></div></foreignObject><path></path></svg></annotation-xml><mi>
+#errors
+(1,6) expected-doctype-but-got-start-tag
+(1,144) expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         <svg svg>
+|           <svg foreignObject>
+|             <div>
+|               <math math>
+|                 <math mi>
+|               <span>
+|           <svg path>
+|       <math mi>
+
+#data
+<math><annotation-xml><svg><foreignObject><math><mi><svg></svg></mi><mo></mo></math><span></span></foreignObject><path></path></svg></annotation-xml><mi>
+#errors
+(1,6) expected-doctype-but-got-start-tag
+(1,153) expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         <svg svg>
+|           <svg foreignObject>
+|             <math math>
+|               <math mi>
+|                 <svg svg>
+|               <math mo>
+|             <span>
+|           <svg path>
+|       <math mi>
diff --git a/internal/html/testdata/webkit/tests11.dat b/internal/html/testdata/webkit/tests11.dat
new file mode 100644 (file)
index 0000000..b9901e7
--- /dev/null
@@ -0,0 +1,523 @@
+#data
+<!DOCTYPE html><body><svg attributeName='' attributeType='' baseFrequency='' baseProfile='' calcMode='' clipPathUnits='' diffuseConstant='' edgeMode='' filterUnits='' glyphRef='' gradientTransform='' gradientUnits='' kernelMatrix='' kernelUnitLength='' keyPoints='' keySplines='' keyTimes='' lengthAdjust='' limitingConeAngle='' markerHeight='' markerUnits='' markerWidth='' maskContentUnits='' maskUnits='' numOctaves='' pathLength='' patternContentUnits='' patternTransform='' patternUnits='' pointsAtX='' pointsAtY='' pointsAtZ='' preserveAlpha='' preserveAspectRatio='' primitiveUnits='' refX='' refY='' repeatCount='' repeatDur='' requiredExtensions='' requiredFeatures='' specularConstant='' specularExponent='' spreadMethod='' startOffset='' stdDeviation='' stitchTiles='' surfaceScale='' systemLanguage='' tableValues='' targetX='' targetY='' textLength='' viewBox='' viewTarget='' xChannelSelector='' yChannelSelector='' zoomAndPan=''></svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       attributeName=""
+|       attributeType=""
+|       baseFrequency=""
+|       baseProfile=""
+|       calcMode=""
+|       clipPathUnits=""
+|       diffuseConstant=""
+|       edgeMode=""
+|       filterUnits=""
+|       glyphRef=""
+|       gradientTransform=""
+|       gradientUnits=""
+|       kernelMatrix=""
+|       kernelUnitLength=""
+|       keyPoints=""
+|       keySplines=""
+|       keyTimes=""
+|       lengthAdjust=""
+|       limitingConeAngle=""
+|       markerHeight=""
+|       markerUnits=""
+|       markerWidth=""
+|       maskContentUnits=""
+|       maskUnits=""
+|       numOctaves=""
+|       pathLength=""
+|       patternContentUnits=""
+|       patternTransform=""
+|       patternUnits=""
+|       pointsAtX=""
+|       pointsAtY=""
+|       pointsAtZ=""
+|       preserveAlpha=""
+|       preserveAspectRatio=""
+|       primitiveUnits=""
+|       refX=""
+|       refY=""
+|       repeatCount=""
+|       repeatDur=""
+|       requiredExtensions=""
+|       requiredFeatures=""
+|       specularConstant=""
+|       specularExponent=""
+|       spreadMethod=""
+|       startOffset=""
+|       stdDeviation=""
+|       stitchTiles=""
+|       surfaceScale=""
+|       systemLanguage=""
+|       tableValues=""
+|       targetX=""
+|       targetY=""
+|       textLength=""
+|       viewBox=""
+|       viewTarget=""
+|       xChannelSelector=""
+|       yChannelSelector=""
+|       zoomAndPan=""
+
+#data
+<!DOCTYPE html><BODY><SVG ATTRIBUTENAME='' ATTRIBUTETYPE='' BASEFREQUENCY='' BASEPROFILE='' CALCMODE='' CLIPPATHUNITS='' DIFFUSECONSTANT='' EDGEMODE='' FILTERUNITS='' GLYPHREF='' GRADIENTTRANSFORM='' GRADIENTUNITS='' KERNELMATRIX='' KERNELUNITLENGTH='' KEYPOINTS='' KEYSPLINES='' KEYTIMES='' LENGTHADJUST='' LIMITINGCONEANGLE='' MARKERHEIGHT='' MARKERUNITS='' MARKERWIDTH='' MASKCONTENTUNITS='' MASKUNITS='' NUMOCTAVES='' PATHLENGTH='' PATTERNCONTENTUNITS='' PATTERNTRANSFORM='' PATTERNUNITS='' POINTSATX='' POINTSATY='' POINTSATZ='' PRESERVEALPHA='' PRESERVEASPECTRATIO='' PRIMITIVEUNITS='' REFX='' REFY='' REPEATCOUNT='' REPEATDUR='' REQUIREDEXTENSIONS='' REQUIREDFEATURES='' SPECULARCONSTANT='' SPECULAREXPONENT='' SPREADMETHOD='' STARTOFFSET='' STDDEVIATION='' STITCHTILES='' SURFACESCALE='' SYSTEMLANGUAGE='' TABLEVALUES='' TARGETX='' TARGETY='' TEXTLENGTH='' VIEWBOX='' VIEWTARGET='' XCHANNELSELECTOR='' YCHANNELSELECTOR='' ZOOMANDPAN=''></SVG>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       attributeName=""
+|       attributeType=""
+|       baseFrequency=""
+|       baseProfile=""
+|       calcMode=""
+|       clipPathUnits=""
+|       diffuseConstant=""
+|       edgeMode=""
+|       filterUnits=""
+|       glyphRef=""
+|       gradientTransform=""
+|       gradientUnits=""
+|       kernelMatrix=""
+|       kernelUnitLength=""
+|       keyPoints=""
+|       keySplines=""
+|       keyTimes=""
+|       lengthAdjust=""
+|       limitingConeAngle=""
+|       markerHeight=""
+|       markerUnits=""
+|       markerWidth=""
+|       maskContentUnits=""
+|       maskUnits=""
+|       numOctaves=""
+|       pathLength=""
+|       patternContentUnits=""
+|       patternTransform=""
+|       patternUnits=""
+|       pointsAtX=""
+|       pointsAtY=""
+|       pointsAtZ=""
+|       preserveAlpha=""
+|       preserveAspectRatio=""
+|       primitiveUnits=""
+|       refX=""
+|       refY=""
+|       repeatCount=""
+|       repeatDur=""
+|       requiredExtensions=""
+|       requiredFeatures=""
+|       specularConstant=""
+|       specularExponent=""
+|       spreadMethod=""
+|       startOffset=""
+|       stdDeviation=""
+|       stitchTiles=""
+|       surfaceScale=""
+|       systemLanguage=""
+|       tableValues=""
+|       targetX=""
+|       targetY=""
+|       textLength=""
+|       viewBox=""
+|       viewTarget=""
+|       xChannelSelector=""
+|       yChannelSelector=""
+|       zoomAndPan=""
+
+#data
+<!DOCTYPE html><body><svg attributename='' attributetype='' basefrequency='' baseprofile='' calcmode='' clippathunits='' diffuseconstant='' edgemode='' filterunits='' filterres='' glyphref='' gradienttransform='' gradientunits='' kernelmatrix='' kernelunitlength='' keypoints='' keysplines='' keytimes='' lengthadjust='' limitingconeangle='' markerheight='' markerunits='' markerwidth='' maskcontentunits='' maskunits='' numoctaves='' pathlength='' patterncontentunits='' patterntransform='' patternunits='' pointsatx='' pointsaty='' pointsatz='' preservealpha='' preserveaspectratio='' primitiveunits='' refx='' refy='' repeatcount='' repeatdur='' requiredextensions='' requiredfeatures='' specularconstant='' specularexponent='' spreadmethod='' startoffset='' stddeviation='' stitchtiles='' surfacescale='' systemlanguage='' tablevalues='' targetx='' targety='' textlength='' viewbox='' viewtarget='' xchannelselector='' ychannelselector='' zoomandpan=''></svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       attributeName=""
+|       attributeType=""
+|       baseFrequency=""
+|       baseProfile=""
+|       calcMode=""
+|       clipPathUnits=""
+|       diffuseConstant=""
+|       edgeMode=""
+|       filterUnits=""
+|       filterres=""
+|       glyphRef=""
+|       gradientTransform=""
+|       gradientUnits=""
+|       kernelMatrix=""
+|       kernelUnitLength=""
+|       keyPoints=""
+|       keySplines=""
+|       keyTimes=""
+|       lengthAdjust=""
+|       limitingConeAngle=""
+|       markerHeight=""
+|       markerUnits=""
+|       markerWidth=""
+|       maskContentUnits=""
+|       maskUnits=""
+|       numOctaves=""
+|       pathLength=""
+|       patternContentUnits=""
+|       patternTransform=""
+|       patternUnits=""
+|       pointsAtX=""
+|       pointsAtY=""
+|       pointsAtZ=""
+|       preserveAlpha=""
+|       preserveAspectRatio=""
+|       primitiveUnits=""
+|       refX=""
+|       refY=""
+|       repeatCount=""
+|       repeatDur=""
+|       requiredExtensions=""
+|       requiredFeatures=""
+|       specularConstant=""
+|       specularExponent=""
+|       spreadMethod=""
+|       startOffset=""
+|       stdDeviation=""
+|       stitchTiles=""
+|       surfaceScale=""
+|       systemLanguage=""
+|       tableValues=""
+|       targetX=""
+|       targetY=""
+|       textLength=""
+|       viewBox=""
+|       viewTarget=""
+|       xChannelSelector=""
+|       yChannelSelector=""
+|       zoomAndPan=""
+
+#data
+<!DOCTYPE html><body><math attributeName='' attributeType='' baseFrequency='' baseProfile='' calcMode='' clipPathUnits='' diffuseConstant='' edgeMode='' filterUnits='' glyphRef='' gradientTransform='' gradientUnits='' kernelMatrix='' kernelUnitLength='' keyPoints='' keySplines='' keyTimes='' lengthAdjust='' limitingConeAngle='' markerHeight='' markerUnits='' markerWidth='' maskContentUnits='' maskUnits='' numOctaves='' pathLength='' patternContentUnits='' patternTransform='' patternUnits='' pointsAtX='' pointsAtY='' pointsAtZ='' preserveAlpha='' preserveAspectRatio='' primitiveUnits='' refX='' refY='' repeatCount='' repeatDur='' requiredExtensions='' requiredFeatures='' specularConstant='' specularExponent='' spreadMethod='' startOffset='' stdDeviation='' stitchTiles='' surfaceScale='' systemLanguage='' tableValues='' targetX='' targetY='' textLength='' viewBox='' viewTarget='' xChannelSelector='' yChannelSelector='' zoomAndPan=''></math>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       attributename=""
+|       attributetype=""
+|       basefrequency=""
+|       baseprofile=""
+|       calcmode=""
+|       clippathunits=""
+|       diffuseconstant=""
+|       edgemode=""
+|       filterunits=""
+|       glyphref=""
+|       gradienttransform=""
+|       gradientunits=""
+|       kernelmatrix=""
+|       kernelunitlength=""
+|       keypoints=""
+|       keysplines=""
+|       keytimes=""
+|       lengthadjust=""
+|       limitingconeangle=""
+|       markerheight=""
+|       markerunits=""
+|       markerwidth=""
+|       maskcontentunits=""
+|       maskunits=""
+|       numoctaves=""
+|       pathlength=""
+|       patterncontentunits=""
+|       patterntransform=""
+|       patternunits=""
+|       pointsatx=""
+|       pointsaty=""
+|       pointsatz=""
+|       preservealpha=""
+|       preserveaspectratio=""
+|       primitiveunits=""
+|       refx=""
+|       refy=""
+|       repeatcount=""
+|       repeatdur=""
+|       requiredextensions=""
+|       requiredfeatures=""
+|       specularconstant=""
+|       specularexponent=""
+|       spreadmethod=""
+|       startoffset=""
+|       stddeviation=""
+|       stitchtiles=""
+|       surfacescale=""
+|       systemlanguage=""
+|       tablevalues=""
+|       targetx=""
+|       targety=""
+|       textlength=""
+|       viewbox=""
+|       viewtarget=""
+|       xchannelselector=""
+|       ychannelselector=""
+|       zoomandpan=""
+
+#data
+<!DOCTYPE html><body><svg contentScriptType='' contentStyleType='' externalResourcesRequired='' filterRes=''></svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       contentscripttype=""
+|       contentstyletype=""
+|       externalresourcesrequired=""
+|       filterres=""
+
+#data
+<!DOCTYPE html><body><svg CONTENTSCRIPTTYPE='' CONTENTSTYLETYPE='' EXTERNALRESOURCESREQUIRED='' FILTERRES=''></svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       contentscripttype=""
+|       contentstyletype=""
+|       externalresourcesrequired=""
+|       filterres=""
+
+#data
+<!DOCTYPE html><body><svg contentscripttype='' contentstyletype='' externalresourcesrequired='' filterres=''></svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       contentscripttype=""
+|       contentstyletype=""
+|       externalresourcesrequired=""
+|       filterres=""
+
+#data
+<!DOCTYPE html><body><math contentScriptType='' contentStyleType='' externalResourcesRequired='' filterRes=''></math>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       contentscripttype=""
+|       contentstyletype=""
+|       externalresourcesrequired=""
+|       filterres=""
+
+#data
+<!DOCTYPE html><body><svg><altGlyph /><altGlyphDef /><altGlyphItem /><animateColor /><animateMotion /><animateTransform /><clipPath /><feBlend /><feColorMatrix /><feComponentTransfer /><feComposite /><feConvolveMatrix /><feDiffuseLighting /><feDisplacementMap /><feDistantLight /><feFlood /><feFuncA /><feFuncB /><feFuncG /><feFuncR /><feGaussianBlur /><feImage /><feMerge /><feMergeNode /><feMorphology /><feOffset /><fePointLight /><feSpecularLighting /><feSpotLight /><feTile /><feTurbulence /><foreignObject /><glyphRef /><linearGradient /><radialGradient /><textPath /></svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg altGlyph>
+|       <svg altGlyphDef>
+|       <svg altGlyphItem>
+|       <svg animateColor>
+|       <svg animateMotion>
+|       <svg animateTransform>
+|       <svg clipPath>
+|       <svg feBlend>
+|       <svg feColorMatrix>
+|       <svg feComponentTransfer>
+|       <svg feComposite>
+|       <svg feConvolveMatrix>
+|       <svg feDiffuseLighting>
+|       <svg feDisplacementMap>
+|       <svg feDistantLight>
+|       <svg feFlood>
+|       <svg feFuncA>
+|       <svg feFuncB>
+|       <svg feFuncG>
+|       <svg feFuncR>
+|       <svg feGaussianBlur>
+|       <svg feImage>
+|       <svg feMerge>
+|       <svg feMergeNode>
+|       <svg feMorphology>
+|       <svg feOffset>
+|       <svg fePointLight>
+|       <svg feSpecularLighting>
+|       <svg feSpotLight>
+|       <svg feTile>
+|       <svg feTurbulence>
+|       <svg foreignObject>
+|       <svg glyphRef>
+|       <svg linearGradient>
+|       <svg radialGradient>
+|       <svg textPath>
+
+#data
+<!DOCTYPE html><body><svg><altglyph /><altglyphdef /><altglyphitem /><animatecolor /><animatemotion /><animatetransform /><clippath /><feblend /><fecolormatrix /><fecomponenttransfer /><fecomposite /><feconvolvematrix /><fediffuselighting /><fedisplacementmap /><fedistantlight /><feflood /><fefunca /><fefuncb /><fefuncg /><fefuncr /><fegaussianblur /><feimage /><femerge /><femergenode /><femorphology /><feoffset /><fepointlight /><fespecularlighting /><fespotlight /><fetile /><feturbulence /><foreignobject /><glyphref /><lineargradient /><radialgradient /><textpath /></svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg altGlyph>
+|       <svg altGlyphDef>
+|       <svg altGlyphItem>
+|       <svg animateColor>
+|       <svg animateMotion>
+|       <svg animateTransform>
+|       <svg clipPath>
+|       <svg feBlend>
+|       <svg feColorMatrix>
+|       <svg feComponentTransfer>
+|       <svg feComposite>
+|       <svg feConvolveMatrix>
+|       <svg feDiffuseLighting>
+|       <svg feDisplacementMap>
+|       <svg feDistantLight>
+|       <svg feFlood>
+|       <svg feFuncA>
+|       <svg feFuncB>
+|       <svg feFuncG>
+|       <svg feFuncR>
+|       <svg feGaussianBlur>
+|       <svg feImage>
+|       <svg feMerge>
+|       <svg feMergeNode>
+|       <svg feMorphology>
+|       <svg feOffset>
+|       <svg fePointLight>
+|       <svg feSpecularLighting>
+|       <svg feSpotLight>
+|       <svg feTile>
+|       <svg feTurbulence>
+|       <svg foreignObject>
+|       <svg glyphRef>
+|       <svg linearGradient>
+|       <svg radialGradient>
+|       <svg textPath>
+
+#data
+<!DOCTYPE html><BODY><SVG><ALTGLYPH /><ALTGLYPHDEF /><ALTGLYPHITEM /><ANIMATECOLOR /><ANIMATEMOTION /><ANIMATETRANSFORM /><CLIPPATH /><FEBLEND /><FECOLORMATRIX /><FECOMPONENTTRANSFER /><FECOMPOSITE /><FECONVOLVEMATRIX /><FEDIFFUSELIGHTING /><FEDISPLACEMENTMAP /><FEDISTANTLIGHT /><FEFLOOD /><FEFUNCA /><FEFUNCB /><FEFUNCG /><FEFUNCR /><FEGAUSSIANBLUR /><FEIMAGE /><FEMERGE /><FEMERGENODE /><FEMORPHOLOGY /><FEOFFSET /><FEPOINTLIGHT /><FESPECULARLIGHTING /><FESPOTLIGHT /><FETILE /><FETURBULENCE /><FOREIGNOBJECT /><GLYPHREF /><LINEARGRADIENT /><RADIALGRADIENT /><TEXTPATH /></SVG>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg altGlyph>
+|       <svg altGlyphDef>
+|       <svg altGlyphItem>
+|       <svg animateColor>
+|       <svg animateMotion>
+|       <svg animateTransform>
+|       <svg clipPath>
+|       <svg feBlend>
+|       <svg feColorMatrix>
+|       <svg feComponentTransfer>
+|       <svg feComposite>
+|       <svg feConvolveMatrix>
+|       <svg feDiffuseLighting>
+|       <svg feDisplacementMap>
+|       <svg feDistantLight>
+|       <svg feFlood>
+|       <svg feFuncA>
+|       <svg feFuncB>
+|       <svg feFuncG>
+|       <svg feFuncR>
+|       <svg feGaussianBlur>
+|       <svg feImage>
+|       <svg feMerge>
+|       <svg feMergeNode>
+|       <svg feMorphology>
+|       <svg feOffset>
+|       <svg fePointLight>
+|       <svg feSpecularLighting>
+|       <svg feSpotLight>
+|       <svg feTile>
+|       <svg feTurbulence>
+|       <svg foreignObject>
+|       <svg glyphRef>
+|       <svg linearGradient>
+|       <svg radialGradient>
+|       <svg textPath>
+
+#data
+<!DOCTYPE html><body><math><altGlyph /><altGlyphDef /><altGlyphItem /><animateColor /><animateMotion /><animateTransform /><clipPath /><feBlend /><feColorMatrix /><feComponentTransfer /><feComposite /><feConvolveMatrix /><feDiffuseLighting /><feDisplacementMap /><feDistantLight /><feFlood /><feFuncA /><feFuncB /><feFuncG /><feFuncR /><feGaussianBlur /><feImage /><feMerge /><feMergeNode /><feMorphology /><feOffset /><fePointLight /><feSpecularLighting /><feSpotLight /><feTile /><feTurbulence /><foreignObject /><glyphRef /><linearGradient /><radialGradient /><textPath /></math>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math altglyph>
+|       <math altglyphdef>
+|       <math altglyphitem>
+|       <math animatecolor>
+|       <math animatemotion>
+|       <math animatetransform>
+|       <math clippath>
+|       <math feblend>
+|       <math fecolormatrix>
+|       <math fecomponenttransfer>
+|       <math fecomposite>
+|       <math feconvolvematrix>
+|       <math fediffuselighting>
+|       <math fedisplacementmap>
+|       <math fedistantlight>
+|       <math feflood>
+|       <math fefunca>
+|       <math fefuncb>
+|       <math fefuncg>
+|       <math fefuncr>
+|       <math fegaussianblur>
+|       <math feimage>
+|       <math femerge>
+|       <math femergenode>
+|       <math femorphology>
+|       <math feoffset>
+|       <math fepointlight>
+|       <math fespecularlighting>
+|       <math fespotlight>
+|       <math fetile>
+|       <math feturbulence>
+|       <math foreignobject>
+|       <math glyphref>
+|       <math lineargradient>
+|       <math radialgradient>
+|       <math textpath>
+
+#data
+<!DOCTYPE html><body><svg><solidColor /></svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg solidcolor>
diff --git a/internal/html/testdata/webkit/tests12.dat b/internal/html/testdata/webkit/tests12.dat
new file mode 100644 (file)
index 0000000..63107d2
--- /dev/null
@@ -0,0 +1,62 @@
+#data
+<!DOCTYPE html><body><p>foo<math><mtext><i>baz</i></mtext><annotation-xml><svg><desc><b>eggs</b></desc><g><foreignObject><P>spam<TABLE><tr><td><img></td></table></foreignObject></g><g>quux</g></svg></annotation-xml></math>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|       <math math>
+|         <math mtext>
+|           <i>
+|             "baz"
+|         <math annotation-xml>
+|           <svg svg>
+|             <svg desc>
+|               <b>
+|                 "eggs"
+|             <svg g>
+|               <svg foreignObject>
+|                 <p>
+|                   "spam"
+|                 <table>
+|                   <tbody>
+|                     <tr>
+|                       <td>
+|                         <img>
+|             <svg g>
+|               "quux"
+|       "bar"
+
+#data
+<!DOCTYPE html><body>foo<math><mtext><i>baz</i></mtext><annotation-xml><svg><desc><b>eggs</b></desc><g><foreignObject><P>spam<TABLE><tr><td><img></td></table></foreignObject></g><g>quux</g></svg></annotation-xml></math>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "foo"
+|     <math math>
+|       <math mtext>
+|         <i>
+|           "baz"
+|       <math annotation-xml>
+|         <svg svg>
+|           <svg desc>
+|             <b>
+|               "eggs"
+|           <svg g>
+|             <svg foreignObject>
+|               <p>
+|                 "spam"
+|               <table>
+|                 <tbody>
+|                   <tr>
+|                     <td>
+|                       <img>
+|           <svg g>
+|             "quux"
+|     "bar"
diff --git a/internal/html/testdata/webkit/tests14.dat b/internal/html/testdata/webkit/tests14.dat
new file mode 100644 (file)
index 0000000..a08b764
--- /dev/null
@@ -0,0 +1,75 @@
+#data
+<!DOCTYPE html><html><body><xyz:abc></xyz:abc>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <xyz:abc>
+
+#data
+<!DOCTYPE html><html><body><xyz:abc></xyz:abc><span></span>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <xyz:abc>
+|     <span>
+
+#data
+<!DOCTYPE html><html><html abc:def=gh><xyz:abc></xyz:abc>
+#errors
+(1,38): non-html-root
+#document
+| <!DOCTYPE html>
+| <html>
+|   abc:def="gh"
+|   <head>
+|   <body>
+|     <xyz:abc>
+
+#data
+<!DOCTYPE html><html xml:lang=bar><html xml:lang=foo>
+#errors
+(1,53): non-html-root
+#document
+| <!DOCTYPE html>
+| <html>
+|   xml:lang="bar"
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html><html 123=456>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   123="456"
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html><html 123=456><html 789=012>
+#errors
+(1,43): non-html-root
+#document
+| <!DOCTYPE html>
+| <html>
+|   123="456"
+|   789="012"
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html><html><body 789=012>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     789="012"
diff --git a/internal/html/testdata/webkit/tests15.dat b/internal/html/testdata/webkit/tests15.dat
new file mode 100644 (file)
index 0000000..93d06a8
--- /dev/null
@@ -0,0 +1,216 @@
+#data
+<!DOCTYPE html><p><b><i><u></p> <p>X
+#errors
+(1,31): unexpected-end-tag
+(1,36): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <b>
+|         <i>
+|           <u>
+|     <b>
+|       <i>
+|         <u>
+|           " "
+|           <p>
+|             "X"
+
+#data
+<p><b><i><u></p>
+<p>X
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,16): unexpected-end-tag
+(2,4): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <b>
+|         <i>
+|           <u>
+|     <b>
+|       <i>
+|         <u>
+|           "
+"
+|           <p>
+|             "X"
+
+#data
+<!doctype html></html> <head>
+#errors
+(1,29): expected-eof-but-got-start-tag
+(1,29): unexpected-start-tag-ignored
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     " "
+
+#data
+<!doctype html></body><meta>
+#errors
+(1,28): unexpected-start-tag-after-body
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <meta>
+
+#data
+<html></html><!-- foo -->
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+| <!--  foo  -->
+
+#data
+<!doctype html></body><title>X</title>
+#errors
+(1,29): unexpected-start-tag-after-body
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <title>
+|       "X"
+
+#data
+<!doctype html><table> X<meta></table>
+#errors
+(1,23): foster-parenting-character
+(1,24): foster-parenting-character
+(1,30): foster-parenting-start-character
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     " X"
+|     <meta>
+|     <table>
+
+#data
+<!doctype html><table> x</table>
+#errors
+(1,23): foster-parenting-character
+(1,24): foster-parenting-character
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     " x"
+|     <table>
+
+#data
+<!doctype html><table> x </table>
+#errors
+(1,23): foster-parenting-character
+(1,24): foster-parenting-character
+(1,25): foster-parenting-character
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     " x "
+|     <table>
+
+#data
+<!doctype html><table><tr> x</table>
+#errors
+(1,27): foster-parenting-character
+(1,28): foster-parenting-character
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     " x"
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+<!doctype html><table>X<style> <tr>x </style> </table>
+#errors
+(1,23): foster-parenting-character
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "X"
+|     <table>
+|       <style>
+|         " <tr>x "
+|       " "
+
+#data
+<!doctype html><div><table><a>foo</a> <tr><td>bar</td> </tr></table></div>
+#errors
+(1,30): foster-parenting-start-tag
+(1,31): foster-parenting-character
+(1,32): foster-parenting-character
+(1,33): foster-parenting-character
+(1,37): foster-parenting-end-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <a>
+|         "foo"
+|       <table>
+|         " "
+|         <tbody>
+|           <tr>
+|             <td>
+|               "bar"
+|             " "
+
+#data
+<frame></frame></frame><frameset><frame><frameset><frame></frameset><noframes></frameset><noframes>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,7): unexpected-start-tag-ignored
+(1,15): unexpected-end-tag
+(1,23): unexpected-end-tag
+(1,33): unexpected-start-tag
+(1,99): expected-named-closing-tag-but-got-eof
+(1,99): eof-in-frameset
+#document
+| <html>
+|   <head>
+|   <frameset>
+|     <frame>
+|     <frameset>
+|       <frame>
+|     <noframes>
+|       "</frameset><noframes>"
+
+#data
+<!DOCTYPE html><object></html>
+#errors
+(1,30): expected-body-in-scope
+(1,30): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <object>
diff --git a/internal/html/testdata/webkit/tests16.dat b/internal/html/testdata/webkit/tests16.dat
new file mode 100644 (file)
index 0000000..cea7340
--- /dev/null
@@ -0,0 +1,2604 @@
+#data
+<!doctype html><script>
+#errors
+(1,23): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|   <body>
+
+#data
+<!doctype html><script>a
+#errors
+(1,24): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "a"
+|   <body>
+
+#data
+<!doctype html><script><
+#errors
+(1,24): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<"
+|   <body>
+
+#data
+<!doctype html><script></
+#errors
+(1,25): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</"
+|   <body>
+
+#data
+<!doctype html><script></S
+#errors
+(1,26): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</S"
+|   <body>
+
+#data
+<!doctype html><script></SC
+#errors
+(1,27): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</SC"
+|   <body>
+
+#data
+<!doctype html><script></SCR
+#errors
+(1,28): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</SCR"
+|   <body>
+
+#data
+<!doctype html><script></SCRI
+#errors
+(1,29): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</SCRI"
+|   <body>
+
+#data
+<!doctype html><script></SCRIP
+#errors
+(1,30): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</SCRIP"
+|   <body>
+
+#data
+<!doctype html><script></SCRIPT
+#errors
+(1,31): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</SCRIPT"
+|   <body>
+
+#data
+<!doctype html><script></SCRIPT 
+#errors
+(1,32): expected-attribute-name-but-got-eof
+(1,32): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:33) eof-in-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|   <body>
+
+#data
+<!doctype html><script></s
+#errors
+(1,26): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</s"
+|   <body>
+
+#data
+<!doctype html><script></sc
+#errors
+(1,27): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</sc"
+|   <body>
+
+#data
+<!doctype html><script></scr
+#errors
+(1,28): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</scr"
+|   <body>
+
+#data
+<!doctype html><script></scri
+#errors
+(1,29): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</scri"
+|   <body>
+
+#data
+<!doctype html><script></scrip
+#errors
+(1,30): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</scrip"
+|   <body>
+
+#data
+<!doctype html><script></script
+#errors
+(1,31): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</script"
+|   <body>
+
+#data
+<!doctype html><script></script 
+#errors
+(1,32): expected-attribute-name-but-got-eof
+(1,32): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:33) eof-in-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|   <body>
+
+#data
+<!doctype html><script><!
+#errors
+(1,25): expected-script-data-but-got-eof
+(1,25): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!"
+|   <body>
+
+#data
+<!doctype html><script><!a
+#errors
+(1,26): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!a"
+|   <body>
+
+#data
+<!doctype html><script><!-
+#errors
+(1,26): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!-"
+|   <body>
+
+#data
+<!doctype html><script><!-a
+#errors
+(1,27): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!-a"
+|   <body>
+
+#data
+<!doctype html><script><!--
+#errors
+(1,27): expected-named-closing-tag-but-got-eof
+(1,27): unexpected-eof-in-text-mode
+#new-errors
+(1:28) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--"
+|   <body>
+
+#data
+<!doctype html><script><!--a
+#errors
+(1,28): expected-named-closing-tag-but-got-eof
+(1,28): unexpected-eof-in-text-mode
+#new-errors
+(1:29) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--a"
+|   <body>
+
+#data
+<!doctype html><script><!--<
+#errors
+(1,28): expected-named-closing-tag-but-got-eof
+(1,28): unexpected-eof-in-text-mode
+#new-errors
+(1:29) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<"
+|   <body>
+
+#data
+<!doctype html><script><!--<a
+#errors
+(1,29): expected-named-closing-tag-but-got-eof
+(1,29): unexpected-eof-in-text-mode
+#new-errors
+(1:30) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<a"
+|   <body>
+
+#data
+<!doctype html><script><!--</
+#errors
+(1,29): expected-named-closing-tag-but-got-eof
+(1,29): unexpected-eof-in-text-mode
+#new-errors
+(1:30) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--</"
+|   <body>
+
+#data
+<!doctype html><script><!--</script
+#errors
+(1,35): expected-named-closing-tag-but-got-eof
+(1,35): unexpected-eof-in-text-mode
+#new-errors
+(1:36) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--</script"
+|   <body>
+
+#data
+<!doctype html><script><!--</script 
+#errors
+(1,36): expected-attribute-name-but-got-eof
+(1,36): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:37) eof-in-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--"
+|   <body>
+
+#data
+<!doctype html><script><!--<s
+#errors
+(1,29): expected-named-closing-tag-but-got-eof
+(1,29): unexpected-eof-in-text-mode
+#new-errors
+(1:30) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<s"
+|   <body>
+
+#data
+<!doctype html><script><!--<script
+#errors
+(1,34): expected-named-closing-tag-but-got-eof
+(1,34): unexpected-eof-in-text-mode
+#new-errors
+(1:35) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script"
+|   <body>
+
+#data
+<!doctype html><script><!--<script 
+#errors
+(1,35): eof-in-script-in-script
+(1,35): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:36) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script "
+|   <body>
+
+#data
+<!doctype html><script><!--<script <
+#errors
+(1,36): eof-in-script-in-script
+(1,36): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:37) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script <"
+|   <body>
+
+#data
+<!doctype html><script><!--<script <a
+#errors
+(1,37): eof-in-script-in-script
+(1,37): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:38) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script <a"
+|   <body>
+
+#data
+<!doctype html><script><!--<script </
+#errors
+(1,37): eof-in-script-in-script
+(1,37): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:38) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </"
+|   <body>
+
+#data
+<!doctype html><script><!--<script </s
+#errors
+(1,38): eof-in-script-in-script
+(1,38): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:39) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </s"
+|   <body>
+
+#data
+<!doctype html><script><!--<script </script
+#errors
+(1,43): eof-in-script-in-script
+(1,43): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:44) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script"
+|   <body>
+
+#data
+<!doctype html><script><!--<script </scripta
+#errors
+(1,44): eof-in-script-in-script
+(1,44): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:45) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </scripta"
+|   <body>
+
+#data
+<!doctype html><script><!--<script </script 
+#errors
+(1,44): expected-named-closing-tag-but-got-eof
+(1,44): unexpected-eof-in-text-mode
+#new-errors
+(1:45) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script "
+|   <body>
+
+#data
+<!doctype html><script><!--<script </script>
+#errors
+(1,44): expected-named-closing-tag-but-got-eof
+(1,44): unexpected-eof-in-text-mode
+#new-errors
+(1:45) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script>"
+|   <body>
+
+#data
+<!doctype html><script><!--<script </script/
+#errors
+(1,44): expected-named-closing-tag-but-got-eof
+(1,44): unexpected-eof-in-text-mode
+#new-errors
+(1:45) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script/"
+|   <body>
+
+#data
+<!doctype html><script><!--<script </script <
+#errors
+(1,45): expected-named-closing-tag-but-got-eof
+(1,45): unexpected-eof-in-text-mode
+#new-errors
+(1:46) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script <"
+|   <body>
+
+#data
+<!doctype html><script><!--<script </script <a
+#errors
+(1,46): expected-named-closing-tag-but-got-eof
+(1,46): unexpected-eof-in-text-mode
+#new-errors
+(1:47) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script <a"
+|   <body>
+
+#data
+<!doctype html><script><!--<script </script </
+#errors
+(1,46): expected-named-closing-tag-but-got-eof
+(1,46): unexpected-eof-in-text-mode
+#new-errors
+(1:47) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script </"
+|   <body>
+
+#data
+<!doctype html><script><!--<script </script </script
+#errors
+(1,52): expected-named-closing-tag-but-got-eof
+(1,52): unexpected-eof-in-text-mode
+#new-errors
+(1:53) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script </script"
+|   <body>
+
+#data
+<!doctype html><script><!--<script </script </script 
+#errors
+(1,53): expected-attribute-name-but-got-eof
+(1,53): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:54) eof-in-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script "
+|   <body>
+
+#data
+<!doctype html><script><!--<script </script </script/
+#errors
+(1,53): unexpected-EOF-after-solidus-in-tag
+(1,53): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:54) eof-in-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script "
+|   <body>
+
+#data
+<!doctype html><script><!--<script </script </script>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script "
+|   <body>
+
+#data
+<!doctype html><script><!--<script -
+#errors
+(1,36): eof-in-script-in-script
+(1,36): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:37) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -"
+|   <body>
+
+#data
+<!doctype html><script><!--<script -a
+#errors
+(1,37): eof-in-script-in-script
+(1,37): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:38) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -a"
+|   <body>
+
+#data
+<!doctype html><script><!--<script -<
+#errors
+(1,37): eof-in-script-in-script
+(1,37): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:38) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -<"
+|   <body>
+
+#data
+<!doctype html><script><!--<script --
+#errors
+(1,37): eof-in-script-in-script
+(1,37): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:38) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script --"
+|   <body>
+
+#data
+<!doctype html><script><!--<script --a
+#errors
+(1,38): eof-in-script-in-script
+(1,38): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:39) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script --a"
+|   <body>
+
+#data
+<!doctype html><script><!--<script --<
+#errors
+(1,38): eof-in-script-in-script
+(1,38): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:39) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script --<"
+|   <body>
+
+#data
+<!doctype html><script><!--<script -->
+#errors
+(1,38): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -->"
+|   <body>
+
+#data
+<!doctype html><script><!--<script --><
+#errors
+(1,39): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script --><"
+|   <body>
+
+#data
+<!doctype html><script><!--<script --></
+#errors
+(1,40): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script --></"
+|   <body>
+
+#data
+<!doctype html><script><!--<script --></script
+#errors
+(1,46): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script --></script"
+|   <body>
+
+#data
+<!doctype html><script><!--<script --></script 
+#errors
+(1,47): expected-attribute-name-but-got-eof
+(1,47): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:48) eof-in-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -->"
+|   <body>
+
+#data
+<!doctype html><script><!--<script --></script/
+#errors
+(1,47): unexpected-EOF-after-solidus-in-tag
+(1,47): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:48) eof-in-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -->"
+|   <body>
+
+#data
+<!doctype html><script><!--<script --></script>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -->"
+|   <body>
+
+#data
+<!doctype html><script><!--<script><\/script>--></script>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script><\/script>-->"
+|   <body>
+
+#data
+<!doctype html><script><!--<script></scr'+'ipt>--></script>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></scr'+'ipt>-->"
+|   <body>
+
+#data
+<!doctype html><script><!--<script></script><script></script></script>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>"
+|   <body>
+
+#data
+<!doctype html><script><!--<script></script><script></script>--><!--</script>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>--><!--"
+|   <body>
+
+#data
+<!doctype html><script><!--<script></script><script></script>-- ></script>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>-- >"
+|   <body>
+
+#data
+<!doctype html><script><!--<script></script><script></script>- -></script>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>- ->"
+|   <body>
+
+#data
+<!doctype html><script><!--<script></script><script></script>- - ></script>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>- - >"
+|   <body>
+
+#data
+<!doctype html><script><!--<script></script><script></script>-></script>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>->"
+|   <body>
+
+#data
+<!doctype html><script><!--<script>--!></script>X
+#errors
+(1,49): expected-named-closing-tag-but-got-eof
+(1,49): unexpected-EOF-in-text-mode
+#new-errors
+(1:50) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script>--!></script>X"
+|   <body>
+
+#data
+<!doctype html><script><!--<scr'+'ipt></script>--></script>
+#errors
+(1,59): unexpected-end-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<scr'+'ipt>"
+|   <body>
+|     "-->"
+
+#data
+<!doctype html><script><!--<script></scr'+'ipt></script>X
+#errors
+(1,57): expected-named-closing-tag-but-got-eof
+(1,57): unexpected-eof-in-text-mode
+#new-errors
+(1:58) eof-in-script-html-comment-like-text
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></scr'+'ipt></script>X"
+|   <body>
+
+#data
+<!doctype html><style><!--<style></style>--></style>
+#errors
+(1,52): unexpected-end-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <style>
+|       "<!--<style>"
+|   <body>
+|     "-->"
+
+#data
+<!doctype html><style><!--</style>X
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <style>
+|       "<!--"
+|   <body>
+|     "X"
+
+#data
+<!doctype html><style><!--...</style>...--></style>
+#errors
+(1,51): unexpected-end-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <style>
+|       "<!--..."
+|   <body>
+|     "...-->"
+
+#data
+<!doctype html><style><!--<br><html xmlns:v="urn:schemas-microsoft-com:vml"><!--[if !mso]><style></style>X
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <style>
+|       "<!--<br><html xmlns:v="urn:schemas-microsoft-com:vml"><!--[if !mso]><style>"
+|   <body>
+|     "X"
+
+#data
+<!doctype html><style><!--...<style><!--...--!></style>--></style>
+#errors
+(1,66): unexpected-end-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <style>
+|       "<!--...<style><!--...--!>"
+|   <body>
+|     "-->"
+
+#data
+<!doctype html><style><!--...</style><!-- --><style>@import ...</style>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <style>
+|       "<!--..."
+|     <!--   -->
+|     <style>
+|       "@import ..."
+|   <body>
+
+#data
+<!doctype html><style>...<style><!--...</style><!-- --></style>
+#errors
+(1,63): unexpected-end-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <style>
+|       "...<style><!--..."
+|     <!--   -->
+|   <body>
+
+#data
+<!doctype html><style>...<!--[if IE]><style>...</style>X
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <style>
+|       "...<!--[if IE]><style>..."
+|   <body>
+|     "X"
+
+#data
+<!doctype html><title><!--<title></title>--></title>
+#errors
+(1,52): unexpected-end-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <title>
+|       "<!--<title>"
+|   <body>
+|     "-->"
+
+#data
+<!doctype html><title>&lt;/title></title>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <title>
+|       "</title>"
+|   <body>
+
+#data
+<!doctype html><title>foo/title><link></head><body>X
+#errors
+(1,52): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <title>
+|       "foo/title><link></head><body>X"
+|   <body>
+
+#data
+<!doctype html><noscript><!--<noscript></noscript>--></noscript>
+#errors
+(1,64): unexpected-end-tag
+#script-on
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <noscript>
+|       "<!--<noscript>"
+|   <body>
+|     "-->"
+
+#data
+<!doctype html><noscript><!--<noscript></noscript>--></noscript>
+#errors
+#script-off
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <noscript>
+|       <!-- <noscript></noscript> -->
+|   <body>
+
+#data
+<!doctype html><noscript><!--</noscript>X<noscript>--></noscript>
+#errors
+#script-on
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <noscript>
+|       "<!--"
+|   <body>
+|     "X"
+|     <noscript>
+|       "-->"
+
+#data
+<!doctype html><noscript><!--</noscript>X<noscript>--></noscript>
+#errors
+#script-off
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <noscript>
+|       <!-- </noscript>X<noscript> -->
+|   <body>
+
+#data
+<!doctype html><noscript><iframe></noscript>X
+#errors
+#script-on
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <noscript>
+|       "<iframe>"
+|   <body>
+|     "X"
+
+#data
+<!doctype html><noscript><iframe></noscript>X
+#errors
+ * (1,34) unexpected token in head noscript
+ * (1,46) unexpected EOF
+#script-off
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <noscript>
+|   <body>
+|     <iframe>
+|       "</noscript>X"
+
+#data
+<!doctype html><noframes><!--<noframes></noframes>--></noframes>
+#errors
+(1,64): unexpected-end-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <noframes>
+|       "<!--<noframes>"
+|   <body>
+|     "-->"
+
+#data
+<!doctype html><noframes><body><script><!--...</script></body></noframes></html>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <noframes>
+|       "<body><script><!--...</script></body>"
+|   <body>
+
+#data
+<!doctype html><textarea><!--<textarea></textarea>--></textarea>
+#errors
+(1,64): unexpected-end-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "<!--<textarea>"
+|     "-->"
+
+#data
+<!doctype html><textarea>&lt;/textarea></textarea>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "</textarea>"
+
+#data
+<!doctype html><textarea>&lt;</textarea>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "<"
+
+#data
+<!doctype html><textarea>a&lt;b</textarea>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "a<b"
+
+#data
+<!doctype html><iframe><!--<iframe></iframe>--></iframe>
+#errors
+(1,56): unexpected-end-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <iframe>
+|       "<!--<iframe>"
+|     "-->"
+
+#data
+<!doctype html><iframe>...<!--X->...<!--/X->...</iframe>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <iframe>
+|       "...<!--X->...<!--/X->..."
+
+#data
+<!doctype html><xmp><!--<xmp></xmp>--></xmp>
+#errors
+(1,44): unexpected-end-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <xmp>
+|       "<!--<xmp>"
+|     "-->"
+
+#data
+<!doctype html><noembed><!--<noembed></noembed>--></noembed>
+#errors
+(1,60): unexpected-end-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <noembed>
+|       "<!--<noembed>"
+|     "-->"
+
+#data
+<script>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,8): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|   <body>
+
+#data
+<script>a
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,9): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "a"
+|   <body>
+
+#data
+<script><
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,9): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<"
+|   <body>
+
+#data
+<script></
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,10): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</"
+|   <body>
+
+#data
+<script></S
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,11): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</S"
+|   <body>
+
+#data
+<script></SC
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,12): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</SC"
+|   <body>
+
+#data
+<script></SCR
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,13): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</SCR"
+|   <body>
+
+#data
+<script></SCRI
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,14): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</SCRI"
+|   <body>
+
+#data
+<script></SCRIP
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,15): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</SCRIP"
+|   <body>
+
+#data
+<script></SCRIPT
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,16): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</SCRIPT"
+|   <body>
+
+#data
+<script></SCRIPT 
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,17): expected-attribute-name-but-got-eof
+(1,17): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:18) eof-in-tag
+#document
+| <html>
+|   <head>
+|     <script>
+|   <body>
+
+#data
+<script></s
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,11): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</s"
+|   <body>
+
+#data
+<script></sc
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,12): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</sc"
+|   <body>
+
+#data
+<script></scr
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,13): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</scr"
+|   <body>
+
+#data
+<script></scri
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,14): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</scri"
+|   <body>
+
+#data
+<script></scrip
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,15): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</scrip"
+|   <body>
+
+#data
+<script></script
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,16): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</script"
+|   <body>
+
+#data
+<script></script 
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,17): expected-attribute-name-but-got-eof
+(1,17): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:18) eof-in-tag
+#document
+| <html>
+|   <head>
+|     <script>
+|   <body>
+
+#data
+<script><!
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,10): expected-script-data-but-got-eof
+(1,10): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!"
+|   <body>
+
+#data
+<script><!a
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,11): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!a"
+|   <body>
+
+#data
+<script><!-
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,11): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!-"
+|   <body>
+
+#data
+<script><!-a
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,12): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!-a"
+|   <body>
+
+#data
+<script><!--
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,12): expected-named-closing-tag-but-got-eof
+(1,12): unexpected-eof-in-text-mode
+#new-errors
+(1:13) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--"
+|   <body>
+
+#data
+<script><!--a
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,13): expected-named-closing-tag-but-got-eof
+(1,13): unexpected-eof-in-text-mode
+#new-errors
+(1:14) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--a"
+|   <body>
+
+#data
+<script><!--<
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,13): expected-named-closing-tag-but-got-eof
+(1,13): unexpected-eof-in-text-mode
+#new-errors
+(1:14) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<"
+|   <body>
+
+#data
+<script><!--<a
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,14): expected-named-closing-tag-but-got-eof
+(1,14): unexpected-eof-in-text-mode
+#new-errors
+(1:15) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<a"
+|   <body>
+
+#data
+<script><!--</
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,14): expected-named-closing-tag-but-got-eof
+(1,14): unexpected-eof-in-text-mode
+#new-errors
+(1:15) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--</"
+|   <body>
+
+#data
+<script><!--</script
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,20): expected-named-closing-tag-but-got-eof
+(1,20): unexpected-eof-in-text-mode
+#new-errors
+(1:21) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--</script"
+|   <body>
+
+#data
+<script><!--</script 
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,21): expected-attribute-name-but-got-eof
+(1,21): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:22) eof-in-tag
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--"
+|   <body>
+
+#data
+<script><!--<s
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,14): expected-named-closing-tag-but-got-eof
+(1,14): unexpected-eof-in-text-mode
+#new-errors
+(1:15) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<s"
+|   <body>
+
+#data
+<script><!--<script
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,19): expected-named-closing-tag-but-got-eof
+(1,19): unexpected-eof-in-text-mode
+#new-errors
+(1:20) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script"
+|   <body>
+
+#data
+<script><!--<script 
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,20): eof-in-script-in-script
+(1,20): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:21) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script "
+|   <body>
+
+#data
+<script><!--<script <
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,21): eof-in-script-in-script
+(1,21): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:22) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script <"
+|   <body>
+
+#data
+<script><!--<script <a
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,22): eof-in-script-in-script
+(1,22): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:23) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script <a"
+|   <body>
+
+#data
+<script><!--<script </
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,22): eof-in-script-in-script
+(1,22): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:23) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </"
+|   <body>
+
+#data
+<script><!--<script </s
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,23): eof-in-script-in-script
+(1,23): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:24) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </s"
+|   <body>
+
+#data
+<script><!--<script </script
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,28): eof-in-script-in-script
+(1,28): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:29) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script"
+|   <body>
+
+#data
+<script><!--<script </scripta
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,29): eof-in-script-in-script
+(1,29): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:30) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </scripta"
+|   <body>
+
+#data
+<script><!--<script </script 
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,29): expected-named-closing-tag-but-got-eof
+(1,29): unexpected-eof-in-text-mode
+#new-errors
+(1:30) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script "
+|   <body>
+
+#data
+<script><!--<script </script>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,29): expected-named-closing-tag-but-got-eof
+(1,29): unexpected-eof-in-text-mode
+#new-errors
+(1:30) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script>"
+|   <body>
+
+#data
+<script><!--<script </script/
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,29): expected-named-closing-tag-but-got-eof
+(1,29): unexpected-eof-in-text-mode
+#new-errors
+(1:30) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script/"
+|   <body>
+
+#data
+<script><!--<script </script <
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,30): expected-named-closing-tag-but-got-eof
+(1,30): unexpected-eof-in-text-mode
+#new-errors
+(1:31) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script <"
+|   <body>
+
+#data
+<script><!--<script </script <a
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,31): expected-named-closing-tag-but-got-eof
+(1,31): unexpected-eof-in-text-mode
+#new-errors
+(1:32) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script <a"
+|   <body>
+
+#data
+<script><!--<script </script </
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,31): expected-named-closing-tag-but-got-eof
+(1,31): unexpected-eof-in-text-mode
+#new-errors
+(1:32) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script </"
+|   <body>
+
+#data
+<script><!--<script </script </script
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,37): expected-named-closing-tag-but-got-eof
+(1,37): unexpected-eof-in-text-mode
+#new-errors
+(1:38) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script </script"
+|   <body>
+
+#data
+<script><!--<script </script </script 
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,38): expected-attribute-name-but-got-eof
+(1,38): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:39) eof-in-tag
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script "
+|   <body>
+
+#data
+<script><!--<script </script </script/
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,38): unexpected-EOF-after-solidus-in-tag
+(1,38): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:39) eof-in-tag
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script "
+|   <body>
+
+#data
+<script><!--<script </script </script>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script "
+|   <body>
+
+#data
+<script><!--<script -
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,21): eof-in-script-in-script
+(1,21): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:22) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -"
+|   <body>
+
+#data
+<script><!--<script -a
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,22): eof-in-script-in-script
+(1,22): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:23) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -a"
+|   <body>
+
+#data
+<script><!--<script --
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,22): eof-in-script-in-script
+(1,22): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:23) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script --"
+|   <body>
+
+#data
+<script><!--<script --a
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,23): eof-in-script-in-script
+(1,23): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:24) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script --a"
+|   <body>
+
+#data
+<script><!--<script -->
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,23): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -->"
+|   <body>
+
+#data
+<script><!--<script --><
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,24): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script --><"
+|   <body>
+
+#data
+<script><!--<script --></
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,25): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script --></"
+|   <body>
+
+#data
+<script><!--<script --></script
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,31): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script --></script"
+|   <body>
+
+#data
+<script><!--<script --></script 
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,32): expected-attribute-name-but-got-eof
+(1,32): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:33) eof-in-tag
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -->"
+|   <body>
+
+#data
+<script><!--<script --></script/
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,32): unexpected-EOF-after-solidus-in-tag
+(1,32): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:33) eof-in-tag
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -->"
+|   <body>
+
+#data
+<script><!--<script --></script>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -->"
+|   <body>
+
+#data
+<script><!--<script><\/script>--></script>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script><\/script>-->"
+|   <body>
+
+#data
+<script><!--<script></scr'+'ipt>--></script>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></scr'+'ipt>-->"
+|   <body>
+
+#data
+<script><!--<script></script><script></script></script>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>"
+|   <body>
+
+#data
+<script><!--<script></script><script></script>--><!--</script>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>--><!--"
+|   <body>
+
+#data
+<script><!--<script></script><script></script>-- ></script>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>-- >"
+|   <body>
+
+#data
+<script><!--<script></script><script></script>- -></script>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>- ->"
+|   <body>
+
+#data
+<script><!--<script></script><script></script>- - ></script>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>- - >"
+|   <body>
+
+#data
+<script><!--<script></script><script></script>-></script>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>->"
+|   <body>
+
+#data
+<script><!--<script>--!></script>X
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,34): expected-named-closing-tag-but-got-eof
+(1,34): unexpected-eof-in-text-mode
+#new-errors
+(1:35) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script>--!></script>X"
+|   <body>
+
+#data
+<script><!--<scr'+'ipt></script>--></script>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,44): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<scr'+'ipt>"
+|   <body>
+|     "-->"
+
+#data
+<script><!--<script></scr'+'ipt></script>X
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,42): expected-named-closing-tag-but-got-eof
+(1,42): unexpected-eof-in-text-mode
+#new-errors
+(1:43) eof-in-script-html-comment-like-text
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></scr'+'ipt></script>X"
+|   <body>
+
+#data
+<style><!--<style></style>--></style>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,37): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|     <style>
+|       "<!--<style>"
+|   <body>
+|     "-->"
+
+#data
+<style><!--</style>X
+#errors
+(1,7): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <style>
+|       "<!--"
+|   <body>
+|     "X"
+
+#data
+<style><!--...</style>...--></style>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,36): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|     <style>
+|       "<!--..."
+|   <body>
+|     "...-->"
+
+#data
+<style><!--<br><html xmlns:v="urn:schemas-microsoft-com:vml"><!--[if !mso]><style></style>X
+#errors
+(1,7): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <style>
+|       "<!--<br><html xmlns:v="urn:schemas-microsoft-com:vml"><!--[if !mso]><style>"
+|   <body>
+|     "X"
+
+#data
+<style><!--...<style><!--...--!></style>--></style>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,51): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|     <style>
+|       "<!--...<style><!--...--!>"
+|   <body>
+|     "-->"
+
+#data
+<style><!--...</style><!-- --><style>@import ...</style>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <style>
+|       "<!--..."
+|     <!--   -->
+|     <style>
+|       "@import ..."
+|   <body>
+
+#data
+<style>...<style><!--...</style><!-- --></style>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,48): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|     <style>
+|       "...<style><!--..."
+|     <!--   -->
+|   <body>
+
+#data
+<style>...<!--[if IE]><style>...</style>X
+#errors
+(1,7): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <style>
+|       "...<!--[if IE]><style>..."
+|   <body>
+|     "X"
+
+#data
+<title><!--<title></title>--></title>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,37): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|     <title>
+|       "<!--<title>"
+|   <body>
+|     "-->"
+
+#data
+<title>&lt;/title></title>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <title>
+|       "</title>"
+|   <body>
+
+#data
+<title>foo/title><link></head><body>X
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,37): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <title>
+|       "foo/title><link></head><body>X"
+|   <body>
+
+#data
+<noscript><!--<noscript></noscript>--></noscript>
+#errors
+(1,10): expected-doctype-but-got-start-tag
+(1,49): unexpected-end-tag
+#script-on
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       "<!--<noscript>"
+|   <body>
+|     "-->"
+
+#data
+<noscript><!--<noscript></noscript>--></noscript>
+#errors
+ * (1,11) missing DOCTYPE
+#script-off
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       <!-- <noscript></noscript> -->
+|   <body>
+
+#data
+<noscript><!--</noscript>X<noscript>--></noscript>
+#errors
+(1,10): expected-doctype-but-got-start-tag
+#script-on
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       "<!--"
+|   <body>
+|     "X"
+|     <noscript>
+|       "-->"
+
+#data
+<noscript><!--</noscript>X<noscript>--></noscript>
+#errors
+(1,10): expected-doctype-but-got-start-tag
+#script-off
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       <!-- </noscript>X<noscript> -->
+|   <body>
+
+#data
+<noscript><iframe></noscript>X
+#errors
+(1,10): expected-doctype-but-got-start-tag
+#script-on
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       "<iframe>"
+|   <body>
+|     "X"
+
+#data
+<noscript><iframe></noscript>X
+#errors
+ * (1,11) missing DOCTYPE
+ * (1,19) unexpected token in head noscript
+ * (1,31) unexpected EOF
+#script-off
+#document
+| <html>
+|   <head>
+|     <noscript>
+|   <body>
+|     <iframe>
+|       "</noscript>X"
+
+#data
+<noframes><!--<noframes></noframes>--></noframes>
+#errors
+(1,10): expected-doctype-but-got-start-tag
+(1,49): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|     <noframes>
+|       "<!--<noframes>"
+|   <body>
+|     "-->"
+
+#data
+<noframes><body><script><!--...</script></body></noframes></html>
+#errors
+(1,10): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <noframes>
+|       "<body><script><!--...</script></body>"
+|   <body>
+
+#data
+<textarea><!--<textarea></textarea>--></textarea>
+#errors
+(1,10): expected-doctype-but-got-start-tag
+(1,49): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "<!--<textarea>"
+|     "-->"
+
+#data
+<textarea>&lt;/textarea></textarea>
+#errors
+(1,10): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "</textarea>"
+
+#data
+<iframe><!--<iframe></iframe>--></iframe>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,41): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <iframe>
+|       "<!--<iframe>"
+|     "-->"
+
+#data
+<iframe>...<!--X->...<!--/X->...</iframe>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <iframe>
+|       "...<!--X->...<!--/X->..."
+
+#data
+<xmp><!--<xmp></xmp>--></xmp>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,29): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <xmp>
+|       "<!--<xmp>"
+|     "-->"
+
+#data
+<noembed><!--<noembed></noembed>--></noembed>
+#errors
+(1,9): expected-doctype-but-got-start-tag
+(1,45): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <noembed>
+|       "<!--<noembed>"
+|     "-->"
+
+#data
+<!doctype html><table>
+
+#errors
+(2,0): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       "
+"
+
+#data
+<!doctype html><table><td><span><font></span><span>
+#errors
+(1,26): unexpected-cell-in-table-body
+(1,45): unexpected-end-tag
+(1,51): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <span>
+|               <font>
+|             <font>
+|               <span>
+
+#data
+<!doctype html><form><table></form><form></table></form>
+#errors
+(1,35): unexpected-end-tag-implies-table-voodoo
+(1,35): unexpected-end-tag
+(1,41): unexpected-form-in-table
+(1,56): unexpected-end-tag
+(1,56): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <form>
+|       <table>
+|         <form>
diff --git a/internal/html/testdata/webkit/tests17.dat b/internal/html/testdata/webkit/tests17.dat
new file mode 100644 (file)
index 0000000..e49bcf0
--- /dev/null
@@ -0,0 +1,179 @@
+#data
+<!doctype html><table><tbody><select><tr>
+#errors
+(1,37): unexpected-start-tag-implies-table-voodoo
+(1,41): unexpected-table-element-start-tag-in-select-in-table
+(1,41): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+<!doctype html><table><tr><select><td>
+#errors
+(1,34): unexpected-start-tag-implies-table-voodoo
+(1,38): unexpected-table-element-start-tag-in-select-in-table
+(1,38): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+
+#data
+<!doctype html><table><tr><td><select><td>
+#errors
+(1,42): unexpected-table-element-start-tag-in-select-in-table
+(1,42): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <select>
+|           <td>
+
+#data
+<!doctype html><table><tr><th><select><td>
+#errors
+(1,42): unexpected-table-element-start-tag-in-select-in-table
+(1,42): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <th>
+|             <select>
+|           <td>
+
+#data
+<!doctype html><table><caption><select><tr>
+#errors
+(1,43): unexpected-table-element-start-tag-in-select-in-table
+(1,43): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <select>
+|       <tbody>
+|         <tr>
+
+#data
+<!doctype html><select><tr>
+#errors
+(1,27): unexpected-start-tag-in-select
+(1,27): eof-in-select
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+
+#data
+<!doctype html><select><td>
+#errors
+(1,27): unexpected-start-tag-in-select
+(1,27): eof-in-select
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+
+#data
+<!doctype html><select><th>
+#errors
+(1,27): unexpected-start-tag-in-select
+(1,27): eof-in-select
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+
+#data
+<!doctype html><select><tbody>
+#errors
+(1,30): unexpected-start-tag-in-select
+(1,30): eof-in-select
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+
+#data
+<!doctype html><select><thead>
+#errors
+(1,30): unexpected-start-tag-in-select
+(1,30): eof-in-select
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+
+#data
+<!doctype html><select><tfoot>
+#errors
+(1,30): unexpected-start-tag-in-select
+(1,30): eof-in-select
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+
+#data
+<!doctype html><select><caption>
+#errors
+(1,32): unexpected-start-tag-in-select
+(1,32): eof-in-select
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+
+#data
+<!doctype html><table><tr></table>a
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|     "a"
diff --git a/internal/html/testdata/webkit/tests18.dat b/internal/html/testdata/webkit/tests18.dat
new file mode 100644 (file)
index 0000000..05363b3
--- /dev/null
@@ -0,0 +1,534 @@
+#data
+<plaintext></plaintext>
+#errors
+11: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
+23: End of file seen and there were open elements.
+11: Unclosed element “plaintext”.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+
+#data
+<!doctype html><plaintext></plaintext>
+#errors
+(1,38): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+
+#data
+<!doctype html><html><plaintext></plaintext>
+#errors
+44: End of file seen and there were open elements.
+32: Unclosed element “plaintext”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+
+#data
+<!doctype html><head><plaintext></plaintext>
+#errors
+44: End of file seen and there were open elements.
+32: Unclosed element “plaintext”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+
+#data
+<!doctype html><html><noscript><plaintext></plaintext>
+#errors
+42: Bad start tag in “plaintext” in “head”.
+54: End of file seen and there were open elements.
+42: Unclosed element “plaintext”.
+#script-off
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <noscript>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+
+#data
+<!doctype html></head><plaintext></plaintext>
+#errors
+45: End of file seen and there were open elements.
+33: Unclosed element “plaintext”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+
+#data
+<!doctype html><body><plaintext></plaintext>
+#errors
+44: End of file seen and there were open elements.
+32: Unclosed element “plaintext”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+
+#data
+<!doctype html><table><plaintext></plaintext>
+#errors
+(1,33): foster-parenting-start-tag
+(1,45): foster-parenting-character
+(1,45): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+|     <table>
+
+#data
+<!doctype html><table><tbody><plaintext></plaintext>
+#errors
+(1,40): foster-parenting-start-tag
+(1,41): foster-parenting-character
+(1,52): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+|     <table>
+|       <tbody>
+
+#data
+<!doctype html><table><tbody><tr><plaintext></plaintext>
+#errors
+(1,44): foster-parenting-start-tag
+(1,56): foster-parenting-character
+(1,56): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+<!doctype html><table><td><plaintext></plaintext>
+#errors
+(1,26): unexpected-cell-in-table-body
+(1,49): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <plaintext>
+|               "</plaintext>"
+
+#data
+<!doctype html><table><caption><plaintext></plaintext>
+#errors
+(1,54): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <plaintext>
+|           "</plaintext>"
+
+#data
+<!doctype html><table><colgroup><plaintext></plaintext>
+#errors
+43: Start tag “plaintext” seen in “table”.
+55: Misplaced non-space characters inside a table.
+55: End of file seen and there were open elements.
+43: Unclosed element “plaintext”.
+22: Unclosed element “table”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+|     <table>
+|       <colgroup>
+
+#data
+<!doctype html><select><plaintext></plaintext>X
+#errors
+34: Stray start tag “plaintext”.
+46: Stray end tag “plaintext”.
+47: End of file seen and there were open elements.
+23: Unclosed element “select”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       "X"
+
+#data
+<!doctype html><table><select><plaintext>a<caption>b
+#errors
+30: Start tag “select” seen in “table”.
+41: Stray start tag “plaintext”.
+51: “caption” start tag with “select” open.
+52: End of file seen and there were open elements.
+51: Unclosed element “caption”.
+22: Unclosed element “table”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       "a"
+|     <table>
+|       <caption>
+|         "b"
+
+#data
+<!doctype html><template><plaintext>a</template>b
+#errors
+49: End of file seen and there were open elements.
+36: Unclosed element “plaintext”.
+25: Unclosed element “template”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <plaintext>
+|           "a</template>b"
+|   <body>
+
+#data
+<!doctype html><body></body><plaintext></plaintext>
+#errors
+39: Stray start tag “plaintext”.
+51: End of file seen and there were open elements.
+39: Unclosed element “plaintext”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+
+#data
+<!doctype html><frameset><plaintext></plaintext>
+#errors
+36: Stray start tag “plaintext”.
+48: Stray end tag “plaintext”.
+48: End of file seen and there were open elements.
+25: Unclosed element “frameset”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!doctype html><frameset></frameset><plaintext></plaintext>
+#errors
+47: Stray start tag “plaintext”.
+59: Stray end tag “plaintext”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!doctype html><body></body></html><plaintext></plaintext>
+#errors
+46: Stray start tag “plaintext”.
+58: End of file seen and there were open elements.
+46: Unclosed element “plaintext”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+
+#data
+<!doctype html><frameset></frameset></html><plaintext></plaintext>
+#errors
+54: Stray start tag “plaintext”.
+66: Stray end tag “plaintext”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!doctype html><svg><plaintext>a</plaintext>b
+#errors
+45: End of file seen and there were open elements.
+20: Unclosed element “svg”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg plaintext>
+|         "a"
+|       "b"
+
+#data
+<!doctype html><svg><title><plaintext>a</plaintext>b
+#errors
+52: End of file seen and there were open elements.
+38: Unclosed element “plaintext”.
+27: Unclosed element “title”.
+20: Unclosed element “svg”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg title>
+|         <plaintext>
+|           "a</plaintext>b"
+
+#data
+<!doctype html><table><tr><style></script></style>abc
+#errors
+(1,51): foster-parenting-character
+(1,52): foster-parenting-character
+(1,53): foster-parenting-character
+(1,53): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "abc"
+|     <table>
+|       <tbody>
+|         <tr>
+|           <style>
+|             "</script>"
+
+#data
+<!doctype html><table><tr><script></style></script>abc
+#errors
+(1,52): foster-parenting-character
+(1,53): foster-parenting-character
+(1,54): foster-parenting-character
+(1,54): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "abc"
+|     <table>
+|       <tbody>
+|         <tr>
+|           <script>
+|             "</style>"
+
+#data
+<!doctype html><table><caption><style></script></style>abc
+#errors
+(1,58): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <style>
+|           "</script>"
+|         "abc"
+
+#data
+<!doctype html><table><td><style></script></style>abc
+#errors
+(1,26): unexpected-cell-in-table-body
+(1,53): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <style>
+|               "</script>"
+|             "abc"
+
+#data
+<!doctype html><select><script></style></script>abc
+#errors
+(1,51): eof-in-select
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <script>
+|         "</style>"
+|       "abc"
+
+#data
+<!doctype html><table><select><script></style></script>abc
+#errors
+(1,30): unexpected-start-tag-implies-table-voodoo
+(1,58): eof-in-select
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <script>
+|         "</style>"
+|       "abc"
+|     <table>
+
+#data
+<!doctype html><table><tr><select><script></style></script>abc
+#errors
+(1,34): unexpected-start-tag-implies-table-voodoo
+(1,62): eof-in-select
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <script>
+|         "</style>"
+|       "abc"
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+<!doctype html><frameset></frameset><noframes>abc
+#errors
+(1,49): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|   <noframes>
+|     "abc"
+
+#data
+<!doctype html><frameset></frameset><noframes>abc</noframes><!--abc-->
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|   <noframes>
+|     "abc"
+|   <!-- abc -->
+
+#data
+<!doctype html><frameset></frameset></html><noframes>abc
+#errors
+(1,56): expected-named-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|   <noframes>
+|     "abc"
+
+#data
+<!doctype html><frameset></frameset></html><noframes>abc</noframes><!--abc-->
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|   <noframes>
+|     "abc"
+| <!-- abc -->
+
+#data
+<!doctype html><table><tr></tbody><tfoot>
+#errors
+(1,41): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|       <tfoot>
+
+#data
+<!doctype html><table><td><svg></svg>abc<td>
+#errors
+(1,26): unexpected-cell-in-table-body
+(1,44): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <svg svg>
+|             "abc"
+|           <td>
diff --git a/internal/html/testdata/webkit/tests19.dat b/internal/html/testdata/webkit/tests19.dat
new file mode 100644 (file)
index 0000000..a189777
--- /dev/null
@@ -0,0 +1,1454 @@
+#data
+<!doctype html><math><mn DefinitionUrl="foo">
+#errors
+(1,45): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mn>
+|         definitionURL="foo"
+
+#data
+<!doctype html><html></p><!--foo-->
+#errors
+(1,25): end-tag-after-implied-root
+#document
+| <!DOCTYPE html>
+| <html>
+|   <!-- foo -->
+|   <head>
+|   <body>
+
+#data
+<!doctype html><head></head></p><!--foo-->
+#errors
+(1,32): unexpected-end-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <!-- foo -->
+|   <body>
+
+#data
+<!doctype html><body><p><pre>
+#errors
+(1,29): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|     <pre>
+
+#data
+<!doctype html><body><p><listing>
+#errors
+(1,33): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|     <listing>
+
+#data
+<!doctype html><p><plaintext>
+#errors
+(1,29): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|     <plaintext>
+
+#data
+<!doctype html><p><h1>
+#errors
+(1,22): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|     <h1>
+
+#data
+<!doctype html><isindex type="hidden">
+#errors
+(1,38): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <isindex>
+|       type="hidden"
+
+#data
+<!doctype html><ruby><p><rp>
+#errors
+(1,28): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       <p>
+|       <rp>
+
+#data
+<!doctype html><ruby><div><span><rp>
+#errors
+(1,36): XXX-undefined-error
+(1,36): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       <div>
+|         <span>
+|           <rp>
+
+#data
+<!doctype html><ruby><div><p><rp>
+#errors
+(1,33): XXX-undefined-error
+(1,33): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       <div>
+|         <p>
+|         <rp>
+
+#data
+<!doctype html><ruby><p><rt>
+#errors
+(1,28): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       <p>
+|       <rt>
+
+#data
+<!doctype html><ruby><div><span><rt>
+#errors
+(1,36): XXX-undefined-error
+(1,36): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       <div>
+|         <span>
+|           <rt>
+
+#data
+<!doctype html><ruby><div><p><rt>
+#errors
+(1,33): XXX-undefined-error
+(1,33): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       <div>
+|         <p>
+|         <rt>
+
+#data
+<html><ruby>a<rb>b<rt></ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rb>
+|         "b"
+|       <rt>
+
+#data
+<html><ruby>a<rp>b<rt></ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rp>
+|         "b"
+|       <rt>
+
+#data
+<html><ruby>a<rt>b<rt></ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rt>
+|         "b"
+|       <rt>
+
+#data
+<html><ruby>a<rtc>b<rt>c<rb>d</ruby></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       "a"
+|       <rtc>
+|         "b"
+|         <rt>
+|           "c"
+|       <rb>
+|         "d"
+
+#data
+<!doctype html><math/><foo>
+#errors
+(1,27): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|     <foo>
+
+#data
+<!doctype html><svg/><foo>
+#errors
+(1,26): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|     <foo>
+
+#data
+<!doctype html><div></body><!--foo-->
+#errors
+(1,27): expected-one-end-tag-but-got-another
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|   <!-- foo -->
+
+#data
+<!doctype html><h1><div><h3><span></h1>foo
+#errors
+(1,39): end-tag-too-early
+(1,42): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <h1>
+|       <div>
+|         <h3>
+|           <span>
+|         "foo"
+
+#data
+<!doctype html><p></h3>foo
+#errors
+(1,23): end-tag-too-early
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+
+#data
+<!doctype html><h3><li>abc</h2>foo
+#errors
+(1,31): end-tag-too-early
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <h3>
+|       <li>
+|         "abc"
+|     "foo"
+
+#data
+<!doctype html><table>abc<!--foo-->
+#errors
+(1,23): foster-parenting-character
+(1,24): foster-parenting-character
+(1,25): foster-parenting-character
+(1,35): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "abc"
+|     <table>
+|       <!-- foo -->
+
+#data
+<!doctype html><table>  <!--foo-->
+#errors
+(1,34): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       "  "
+|       <!-- foo -->
+
+#data
+<!doctype html><table> b <!--foo-->
+#errors
+(1,23): foster-parenting-character
+(1,24): foster-parenting-character
+(1,25): foster-parenting-character
+(1,35): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     " b "
+|     <table>
+|       <!-- foo -->
+
+#data
+<!doctype html><select><option><option>
+#errors
+(1,39): eof-in-select
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+|       <option>
+
+#data
+<!doctype html><select><option></optgroup>
+#errors
+(1,42): unexpected-end-tag-in-select
+(1,42): eof-in-select
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+
+#data
+<!doctype html><select><option></optgroup>
+#errors
+(1,42): unexpected-end-tag-in-select
+(1,42): eof-in-select
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+
+#data
+<!doctype html><dd><optgroup><dd>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <dd>
+|       <optgroup>
+|     <dd>
+
+#data
+<!doctype html><p><math><mi><p><h1>
+#errors
+(1,35): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <math math>
+|         <math mi>
+|           <p>
+|           <h1>
+
+#data
+<!doctype html><p><math><mo><p><h1>
+#errors
+(1,35): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <math math>
+|         <math mo>
+|           <p>
+|           <h1>
+
+#data
+<!doctype html><p><math><mn><p><h1>
+#errors
+(1,35): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <math math>
+|         <math mn>
+|           <p>
+|           <h1>
+
+#data
+<!doctype html><p><math><ms><p><h1>
+#errors
+(1,35): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <math math>
+|         <math ms>
+|           <p>
+|           <h1>
+
+#data
+<!doctype html><p><math><mtext><p><h1>
+#errors
+(1,38): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <math math>
+|         <math mtext>
+|           <p>
+|           <h1>
+
+#data
+<!doctype html><frameset></noframes>
+#errors
+(1,36): unexpected-end-tag-in-frameset
+(1,36): eof-in-frameset
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!doctype html><html c=d><body></html><html a=b>
+#errors
+(1,48): non-html-root
+#document
+| <!DOCTYPE html>
+| <html>
+|   a="b"
+|   c="d"
+|   <head>
+|   <body>
+
+#data
+<!doctype html><html c=d><frameset></frameset></html><html a=b>
+#errors
+(1,63): non-html-root
+#document
+| <!DOCTYPE html>
+| <html>
+|   a="b"
+|   c="d"
+|   <head>
+|   <frameset>
+
+#data
+<!doctype html><html><frameset></frameset></html><!--foo-->
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+| <!-- foo -->
+
+#data
+<!doctype html><html><frameset></frameset></html>  
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|   "  "
+
+#data
+<!doctype html><html><frameset></frameset></html>abc
+#errors
+(1,50): expected-eof-but-got-char
+(1,51): expected-eof-but-got-char
+(1,52): expected-eof-but-got-char
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!doctype html><html><frameset></frameset></html><p>
+#errors
+(1,52): expected-eof-but-got-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!doctype html><html><frameset></frameset></html></p>
+#errors
+(1,53): expected-eof-but-got-end-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<html><frameset></frameset></html><!doctype html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,49): unexpected-doctype
+#document
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!doctype html><body><frameset>
+#errors
+(1,31): unexpected-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!doctype html><p><frameset><frame>
+#errors
+(1,28): unexpected-start-tag
+(1,35): eof-in-frameset
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|     <frame>
+
+#data
+<!doctype html><p>a<frameset>
+#errors
+(1,29): unexpected-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "a"
+
+#data
+<!doctype html><p> <frameset><frame>
+#errors
+(1,29): unexpected-start-tag
+(1,36): eof-in-frameset
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|     <frame>
+
+#data
+<!doctype html><pre><frameset>
+#errors
+(1,30): unexpected-start-tag
+(1,30): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <pre>
+
+#data
+<!doctype html><listing><frameset>
+#errors
+(1,34): unexpected-start-tag
+(1,34): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <listing>
+
+#data
+<!doctype html><li><frameset>
+#errors
+(1,29): unexpected-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <li>
+
+#data
+<!doctype html><dd><frameset>
+#errors
+(1,29): unexpected-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <dd>
+
+#data
+<!doctype html><dt><frameset>
+#errors
+(1,29): unexpected-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <dt>
+
+#data
+<!doctype html><button><frameset>
+#errors
+(1,33): unexpected-start-tag
+(1,33): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <button>
+
+#data
+<!doctype html><applet><frameset>
+#errors
+(1,33): unexpected-start-tag
+(1,33): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <applet>
+
+#data
+<!doctype html><marquee><frameset>
+#errors
+(1,34): unexpected-start-tag
+(1,34): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <marquee>
+
+#data
+<!doctype html><object><frameset>
+#errors
+(1,33): unexpected-start-tag
+(1,33): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <object>
+
+#data
+<!doctype html><table><frameset>
+#errors
+(1,32): unexpected-start-tag-implies-table-voodoo
+(1,32): unexpected-start-tag
+(1,32): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+
+#data
+<!doctype html><area><frameset>
+#errors
+(1,31): unexpected-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <area>
+
+#data
+<!doctype html><basefont><frameset>
+#errors
+(1,35): eof-in-frameset
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <basefont>
+|   <frameset>
+
+#data
+<!doctype html><bgsound><frameset>
+#errors
+(1,34): eof-in-frameset
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <bgsound>
+|   <frameset>
+
+#data
+<!doctype html><br><frameset>
+#errors
+(1,29): unexpected-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <br>
+
+#data
+<!doctype html><embed><frameset>
+#errors
+(1,32): unexpected-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <embed>
+
+#data
+<!doctype html><img><frameset>
+#errors
+(1,30): unexpected-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <img>
+
+#data
+<!doctype html><input><frameset>
+#errors
+(1,32): unexpected-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <input>
+
+#data
+<!doctype html><keygen><frameset>
+#errors
+(1,33): unexpected-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <keygen>
+
+#data
+<!doctype html><wbr><frameset>
+#errors
+(1,30): unexpected-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <wbr>
+
+#data
+<!doctype html><hr><frameset>
+#errors
+(1,29): unexpected-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <hr>
+
+#data
+<!doctype html><textarea></textarea><frameset>
+#errors
+(1,46): unexpected-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+
+#data
+<!doctype html><xmp></xmp><frameset>
+#errors
+(1,36): unexpected-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <xmp>
+
+#data
+<!doctype html><iframe></iframe><frameset>
+#errors
+(1,42): unexpected-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <iframe>
+
+#data
+<!doctype html><select></select><frameset>
+#errors
+(1,42): unexpected-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+
+#data
+<!doctype html><svg></svg><frameset><frame>
+#errors
+(1,36): unexpected-start-tag
+(1,43): eof-in-frameset
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|     <frame>
+
+#data
+<!doctype html><math></math><frameset><frame>
+#errors
+(1,38): unexpected-start-tag
+(1,45): eof-in-frameset
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|     <frame>
+
+#data
+<!doctype html><svg><foreignObject><div> <frameset><frame>
+#errors
+(1,51): unexpected-start-tag
+(1,58): eof-in-frameset
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|     <frame>
+
+#data
+<!doctype html><svg>a</svg><frameset><frame>
+#errors
+(1,37): unexpected-start-tag
+(1,44): unexpected-start-tag-ignored
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "a"
+
+#data
+<!doctype html><svg> </svg><frameset><frame>
+#errors
+(1,37): unexpected-start-tag
+(1,44): eof-in-frameset
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|     <frame>
+
+#data
+<html>aaa<frameset></frameset>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,19): unexpected-start-tag
+(1,30): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     "aaa"
+
+#data
+<html> a <frameset></frameset>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,19): unexpected-start-tag
+(1,30): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     "a "
+
+#data
+<!doctype html><div><frameset>
+#errors
+(1,30): unexpected-start-tag
+(1,30): eof-in-frameset
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!doctype html><div><body><frameset>
+#errors
+(1,26): unexpected-start-tag
+(1,36): unexpected-start-tag
+(1,36): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <div>
+
+#data
+<!doctype html><p><math></p>a
+#errors
+(1,28): unexpected-end-tag
+(1,28): unexpected-end-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <math math>
+|     "a"
+
+#data
+<!doctype html><p><math><mn><span></p>a
+#errors
+(1,38): unexpected-end-tag
+(1,39): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <math math>
+|         <math mn>
+|           <span>
+|             <p>
+|             "a"
+
+#data
+<!doctype html><math></html>
+#errors
+(1,28): unexpected-end-tag
+(1,28): expected-one-end-tag-but-got-another
+(1,28): unexpected-end-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+
+#data
+<!doctype html><meta charset="ascii">
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <meta>
+|       charset="ascii"
+|   <body>
+
+#data
+<!doctype html><meta http-equiv="content-type" content="text/html;charset=ascii">
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <meta>
+|       content="text/html;charset=ascii"
+|       http-equiv="content-type"
+|   <body>
+
+#data
+<!doctype html><head><!--aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa--><meta charset="utf8">
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <!-- aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa -->
+|     <meta>
+|       charset="utf8"
+|   <body>
+
+#data
+<!doctype html><html a=b><head></head><html c=d>
+#errors
+(1,48): non-html-root
+#document
+| <!DOCTYPE html>
+| <html>
+|   a="b"
+|   c="d"
+|   <head>
+|   <body>
+
+#data
+<!doctype html><image/>
+#errors
+(1,23): image-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <img>
+
+#data
+<!doctype html>a<i>b<table>c<b>d</i>e</b>f
+#errors
+(1,28): foster-parenting-character
+(1,31): foster-parenting-start-tag
+(1,32): foster-parenting-character
+(1,36): foster-parenting-end-tag
+(1,36): adoption-agency-1.3
+(1,37): foster-parenting-character
+(1,41): foster-parenting-end-tag
+(1,42): foster-parenting-character
+(1,42): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "a"
+|     <i>
+|       "bc"
+|       <b>
+|         "de"
+|       "f"
+|       <table>
+
+#data
+<!doctype html><table><i>a<b>b<div>c<a>d</i>e</b>f
+#errors
+(1,25): foster-parenting-start-tag
+(1,26): foster-parenting-character
+(1,29): foster-parenting-start-tag
+(1,30): foster-parenting-character
+(1,35): foster-parenting-start-tag
+(1,36): foster-parenting-character
+(1,39): foster-parenting-start-tag
+(1,40): foster-parenting-character
+(1,44): foster-parenting-end-tag
+(1,44): adoption-agency-1.3
+(1,44): adoption-agency-1.3
+(1,45): foster-parenting-character
+(1,49): foster-parenting-end-tag
+(1,49): adoption-agency-1.3
+(1,49): adoption-agency-1.3
+(1,50): foster-parenting-character
+(1,50): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <i>
+|       "a"
+|       <b>
+|         "b"
+|     <b>
+|     <div>
+|       <b>
+|         <i>
+|           "c"
+|           <a>
+|             "d"
+|         <a>
+|           "e"
+|       <a>
+|         "f"
+|     <table>
+
+#data
+<!doctype html><i>a<b>b<div>c<a>d</i>e</b>f
+#errors
+(1,37): adoption-agency-1.3
+(1,37): adoption-agency-1.3
+(1,42): adoption-agency-1.3
+(1,42): adoption-agency-1.3
+(1,43): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <i>
+|       "a"
+|       <b>
+|         "b"
+|     <b>
+|     <div>
+|       <b>
+|         <i>
+|           "c"
+|           <a>
+|             "d"
+|         <a>
+|           "e"
+|       <a>
+|         "f"
+
+#data
+<!doctype html><table><i>a<b>b<div>c</i>
+#errors
+(1,25): foster-parenting-start-tag
+(1,26): foster-parenting-character
+(1,29): foster-parenting-start-tag
+(1,30): foster-parenting-character
+(1,35): foster-parenting-start-tag
+(1,36): foster-parenting-character
+(1,40): foster-parenting-end-tag
+(1,40): adoption-agency-1.3
+(1,40): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <i>
+|       "a"
+|       <b>
+|         "b"
+|     <b>
+|       <div>
+|         <i>
+|           "c"
+|     <table>
+
+#data
+<!doctype html><table><i>a<b>b<div>c<a>d</i>e</b>f
+#errors
+(1,25): foster-parenting-start-tag
+(1,26): foster-parenting-character
+(1,29): foster-parenting-start-tag
+(1,30): foster-parenting-character
+(1,35): foster-parenting-start-tag
+(1,36): foster-parenting-character
+(1,39): foster-parenting-start-tag
+(1,40): foster-parenting-character
+(1,44): foster-parenting-end-tag
+(1,44): adoption-agency-1.3
+(1,44): adoption-agency-1.3
+(1,45): foster-parenting-character
+(1,49): foster-parenting-end-tag
+(1,44): adoption-agency-1.3
+(1,44): adoption-agency-1.3
+(1,50): foster-parenting-character
+(1,50): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <i>
+|       "a"
+|       <b>
+|         "b"
+|     <b>
+|     <div>
+|       <b>
+|         <i>
+|           "c"
+|           <a>
+|             "d"
+|         <a>
+|           "e"
+|       <a>
+|         "f"
+|     <table>
+
+#data
+<!doctype html><table><i>a<div>b<tr>c<b>d</i>e
+#errors
+(1,25): foster-parenting-start-tag
+(1,26): foster-parenting-character
+(1,31): foster-parenting-start-tag
+(1,32): foster-parenting-character
+(1,37): foster-parenting-character
+(1,40): foster-parenting-start-tag
+(1,41): foster-parenting-character
+(1,45): foster-parenting-end-tag
+(1,45): adoption-agency-1.3
+(1,46): foster-parenting-character
+(1,46): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <i>
+|       "a"
+|       <div>
+|         "b"
+|     <i>
+|       "c"
+|       <b>
+|         "d"
+|     <b>
+|       "e"
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+<!doctype html><table><td><table><i>a<div>b<b>c</i>d
+#errors
+(1,26): unexpected-cell-in-table-body
+(1,36): foster-parenting-start-tag
+(1,37): foster-parenting-character
+(1,42): foster-parenting-start-tag
+(1,43): foster-parenting-character
+(1,46): foster-parenting-start-tag
+(1,47): foster-parenting-character
+(1,51): foster-parenting-end-tag
+(1,51): adoption-agency-1.3
+(1,51): adoption-agency-1.3
+(1,52): foster-parenting-character
+(1,52): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <i>
+|               "a"
+|             <div>
+|               <i>
+|                 "b"
+|                 <b>
+|                   "c"
+|               <b>
+|                 "d"
+|             <table>
+
+#data
+<!doctype html><body><bgsound>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <bgsound>
+
+#data
+<!doctype html><body><basefont>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <basefont>
+
+#data
+<!doctype html><a><b></a><basefont>
+#errors
+(1,25): adoption-agency-1.3
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       <b>
+|     <basefont>
+
+#data
+<!doctype html><a><b></a><bgsound>
+#errors
+(1,25): adoption-agency-1.3
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       <b>
+|     <bgsound>
+
+#data
+<!doctype html><figcaption><article></figcaption>a
+#errors
+(1,49): end-tag-too-early
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <figcaption>
+|       <article>
+|     "a"
+
+#data
+<!doctype html><summary><article></summary>a
+#errors
+(1,43): end-tag-too-early
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <summary>
+|       <article>
+|     "a"
+
+#data
+<!doctype html><p><a><plaintext>b
+#errors
+(1,32): unexpected-end-tag
+(1,33): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <a>
+|     <plaintext>
+|       <a>
+|         "b"
+
+#data
+<!DOCTYPE html><div>a<a></div>b<p>c</p>d
+#errors
+(1,30): end-tag-too-early
+(1,40): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "a"
+|       <a>
+|     <a>
+|       "b"
+|       <p>
+|         "c"
+|       "d"
diff --git a/internal/html/testdata/webkit/tests2.dat b/internal/html/testdata/webkit/tests2.dat
new file mode 100644 (file)
index 0000000..b44fec4
--- /dev/null
@@ -0,0 +1,821 @@
+#data
+<!DOCTYPE html>Test
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "Test"
+
+#data
+<textarea>test</div>test
+#errors
+(1,10): expected-doctype-but-got-start-tag
+(1,24): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "test</div>test"
+
+#data
+<table><td>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,11): unexpected-cell-in-table-body
+(1,11): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+
+#data
+<table><td>test</tbody></table>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,11): unexpected-cell-in-table-body
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             "test"
+
+#data
+<frame>test
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,7): unexpected-start-tag-ignored
+#document
+| <html>
+|   <head>
+|   <body>
+|     "test"
+
+#data
+<!DOCTYPE html><frameset>test
+#errors
+(1,29): unexpected-char-in-frameset
+(1,29): unexpected-char-in-frameset
+(1,29): unexpected-char-in-frameset
+(1,29): unexpected-char-in-frameset
+(1,29): eof-in-frameset
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!DOCTYPE html><frameset> te st
+#errors
+(1,29): unexpected-char-in-frameset
+(1,29): unexpected-char-in-frameset
+(1,29): unexpected-char-in-frameset
+(1,29): unexpected-char-in-frameset
+(1,29): eof-in-frameset
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|     "  "
+
+#data
+<!DOCTYPE html><frameset></frameset> te st
+#errors
+(1,29): unexpected-char-after-frameset
+(1,29): unexpected-char-after-frameset
+(1,29): unexpected-char-after-frameset
+(1,29): unexpected-char-after-frameset
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|   "  "
+
+#data
+<!DOCTYPE html><frameset><!DOCTYPE html>
+#errors
+(1,40): unexpected-doctype
+(1,40): eof-in-frameset
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!DOCTYPE html><font><p><b>test</font>
+#errors
+(1,38): adoption-agency-1.3
+(1,38): adoption-agency-1.3
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <font>
+|     <p>
+|       <font>
+|         <b>
+|           "test"
+
+#data
+<!DOCTYPE html><dt><div><dd>
+#errors
+(1,28): end-tag-too-early
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <dt>
+|       <div>
+|     <dd>
+
+#data
+<script></x
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,11): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</x"
+|   <body>
+
+#data
+<table><plaintext><td>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,18): unexpected-start-tag-implies-table-voodoo
+(1,22): foster-parenting-character-in-table
+(1,22): foster-parenting-character-in-table
+(1,22): foster-parenting-character-in-table
+(1,22): foster-parenting-character-in-table
+(1,22): eof-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "<td>"
+|     <table>
+
+#data
+<plaintext></plaintext>
+#errors
+(1,11): expected-doctype-but-got-start-tag
+(1,23): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+
+#data
+<!DOCTYPE html><table><tr>TEST
+#errors
+(1,30): foster-parenting-character-in-table
+(1,30): foster-parenting-character-in-table
+(1,30): foster-parenting-character-in-table
+(1,30): foster-parenting-character-in-table
+(1,30): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "TEST"
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+<!DOCTYPE html><body t1=1><body t2=2><body t3=3 t4=4>
+#errors
+(1,37): unexpected-start-tag
+(1,53): unexpected-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     t1="1"
+|     t2="2"
+|     t3="3"
+|     t4="4"
+
+#data
+</b test
+#errors
+(1,8): eof-in-attribute-name
+(1,8): expected-doctype-but-got-eof
+#new-errors
+(1:9) eof-in-tag
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html></b test<b &=&amp>X
+#errors
+(1,24): invalid-character-in-attribute-name
+(1,32): named-entity-without-semicolon
+(1,33): attributes-in-end-tag
+(1,33): unexpected-end-tag-before-html
+#new-errors
+(1:24) unexpected-character-in-attribute-name
+(1:33) missing-semicolon-after-character-reference
+(1:33) end-tag-with-attributes
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "X"
+
+#data
+<!doctypehtml><scrIPt type=text/x-foobar;baz>X</SCRipt
+#errors
+(1,9): need-space-after-doctype
+(1,54): expected-named-closing-tag-but-got-eof
+#new-errors
+(1:10) missing-whitespace-before-doctype-name
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       type="text/x-foobar;baz"
+|       "X</SCRipt"
+|   <body>
+
+#data
+&
+#errors
+(1,1): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "&"
+
+#data
+&#
+#errors
+(1,2): expected-numeric-entity
+(1,2): expected-doctype-but-got-chars
+#new-errors
+(1:3) absence-of-digits-in-numeric-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "&#"
+
+#data
+&#X
+#errors
+(1,3): expected-numeric-entity
+(1,3): expected-doctype-but-got-chars
+#new-errors
+(1:4) absence-of-digits-in-numeric-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "&#X"
+
+#data
+&#x
+#errors
+(1,3): expected-numeric-entity
+(1,3): expected-doctype-but-got-chars
+#new-errors
+(1:4) absence-of-digits-in-numeric-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "&#x"
+
+#data
+&#45
+#errors
+(1,4): numeric-entity-without-semicolon
+(1,4): expected-doctype-but-got-chars
+#new-errors
+(1:5) missing-semicolon-after-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "-"
+
+#data
+&x-test
+#errors
+(1,2): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "&x-test"
+
+#data
+<!doctypehtml><p><li>
+#errors
+(1,9): need-space-after-doctype
+#new-errors
+(1:10) missing-whitespace-before-doctype-name
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|     <li>
+
+#data
+<!doctypehtml><p><dt>
+#errors
+(1,9): need-space-after-doctype
+#new-errors
+(1:10) missing-whitespace-before-doctype-name
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|     <dt>
+
+#data
+<!doctypehtml><p><dd>
+#errors
+(1,9): need-space-after-doctype
+#new-errors
+(1:10) missing-whitespace-before-doctype-name
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|     <dd>
+
+#data
+<!doctypehtml><p><form>
+#errors
+(1,9): need-space-after-doctype
+(1,23): expected-closing-tag-but-got-eof
+#new-errors
+(1:10) missing-whitespace-before-doctype-name
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|     <form>
+
+#data
+<!DOCTYPE html><p></P>X
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|     "X"
+
+#data
+&AMP
+#errors
+(1,4): named-entity-without-semicolon
+(1,4): expected-doctype-but-got-chars
+#new-errors
+(1:5) missing-semicolon-after-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "&"
+
+#data
+&AMp;
+#errors
+(1,3): expected-named-entity
+(1,3): expected-doctype-but-got-chars
+#new-errors
+(1:5) unknown-named-character-reference
+#document
+| <html>
+|   <head>
+|   <body>
+|     "&AMp;"
+
+#data
+<!DOCTYPE html><html><head></head><body><thisISasillyTESTelementNameToMakeSureCrazyTagNamesArePARSEDcorrectLY>
+#errors
+(1,110): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <thisisasillytestelementnametomakesurecrazytagnamesareparsedcorrectly>
+
+#data
+<!DOCTYPE html>X</body>X
+#errors
+(1,24): unexpected-char-after-body
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "XX"
+
+#data
+<!DOCTYPE html><!-- X
+#errors
+(1,21): eof-in-comment
+#new-errors
+(1:22) eof-in-comment
+#document
+| <!DOCTYPE html>
+| <!--  X -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html><table><caption>test TEST</caption><td>test
+#errors
+(1,54): unexpected-cell-in-table-body
+(1,58): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         "test TEST"
+|       <tbody>
+|         <tr>
+|           <td>
+|             "test"
+
+#data
+<!DOCTYPE html><select><option><optgroup>
+#errors
+(1,41): eof-in-select
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+|       <optgroup>
+
+#data
+<!DOCTYPE html><select><optgroup><option></optgroup><option><select><option>
+#errors
+(1,68): unexpected-select-in-select
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <optgroup>
+|         <option>
+|       <option>
+|     <option>
+
+#data
+<!DOCTYPE html><select><optgroup><option><optgroup>
+#errors
+(1,51): eof-in-select
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <optgroup>
+|         <option>
+|       <optgroup>
+
+#data
+<!DOCTYPE html><datalist><option>foo</datalist>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <datalist>
+|       <option>
+|         "foo"
+|     "bar"
+
+#data
+<!DOCTYPE html><font><input><input></font>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <font>
+|       <input>
+|       <input>
+
+#data
+<!DOCTYPE html><!-- XXX - XXX -->
+#errors
+#document
+| <!DOCTYPE html>
+| <!--  XXX - XXX  -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html><!-- XXX - XXX
+#errors
+(1,29): eof-in-comment
+#new-errors
+(1:30) eof-in-comment
+#document
+| <!DOCTYPE html>
+| <!--  XXX - XXX -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html><!-- XXX - XXX - XXX -->
+#errors
+#document
+| <!DOCTYPE html>
+| <!--  XXX - XXX - XXX  -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+test
+test
+#errors
+(2,4): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "test
+test"
+
+#data
+<!DOCTYPE html><body><title>test</body></title>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <title>
+|       "test</body>"
+
+#data
+<!DOCTYPE html><body><title>X</title><meta name=z><link rel=foo><style>
+x { content:"</style" } </style>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <title>
+|       "X"
+|     <meta>
+|       name="z"
+|     <link>
+|       rel="foo"
+|     <style>
+|       "
+x { content:"</style" } "
+
+#data
+<!DOCTYPE html><select><optgroup></optgroup></select>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <optgroup>
+
+#data
+#errors
+(2,1): expected-doctype-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html>  <html>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html><script>
+</script>  <title>x</title>  </head>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "
+"
+|     "  "
+|     <title>
+|       "x"
+|     "  "
+|   <body>
+
+#data
+<!DOCTYPE html><html><body><html id=x>
+#errors
+(1,38): non-html-root
+#document
+| <!DOCTYPE html>
+| <html>
+|   id="x"
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html>X</body><html id="x">
+#errors
+(1,36): non-html-root
+#document
+| <!DOCTYPE html>
+| <html>
+|   id="x"
+|   <head>
+|   <body>
+|     "X"
+
+#data
+<!DOCTYPE html><head><html id=x>
+#errors
+(1,32): non-html-root
+#document
+| <!DOCTYPE html>
+| <html>
+|   id="x"
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html>X</html>X
+#errors
+(1,24): expected-eof-but-got-char
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "XX"
+
+#data
+<!DOCTYPE html>X</html> 
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "X "
+
+#data
+<!DOCTYPE html>X</html><p>X
+#errors
+(1,26): expected-eof-but-got-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "X"
+|     <p>
+|       "X"
+
+#data
+<!DOCTYPE html>X<p/x/y/z>
+#errors
+(1,19): unexpected-character-after-solidus-in-tag
+(1,21): unexpected-character-after-solidus-in-tag
+(1,23): unexpected-character-after-solidus-in-tag
+#new-errors
+(1:20) unexpected-solidus-in-tag
+(1:22) unexpected-solidus-in-tag
+(1:24) unexpected-solidus-in-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "X"
+|     <p>
+|       x=""
+|       y=""
+|       z=""
+
+#data
+<!DOCTYPE html><!--x--
+#errors
+(1,22): eof-in-comment-double-dash
+#new-errors
+(1:23) eof-in-comment
+#document
+| <!DOCTYPE html>
+| <!-- x -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html><table><tr><td></p></table>
+#errors
+(1,34): unexpected-end-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <p>
+
+#data
+<!DOCTYPE <!DOCTYPE HTML>><!--<!--x-->-->
+#errors
+(1,20): expected-space-or-right-bracket-in-doctype
+(1,25): unknown-doctype
+(1,35): unexpected-char-in-comment
+#new-errors
+(1:21) invalid-character-sequence-after-doctype-name
+(1:35) nested-comment
+#document
+| <!DOCTYPE <!doctype>
+| <html>
+|   <head>
+|   <body>
+|     ">"
+|     <!-- <!--x -->
+|     "-->"
+
+#data
+<!doctype html><div><form></form><div></div></div>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <form>
+|       <div>
diff --git a/internal/html/testdata/webkit/tests20.dat b/internal/html/testdata/webkit/tests20.dat
new file mode 100644 (file)
index 0000000..afdae74
--- /dev/null
@@ -0,0 +1,582 @@
+#data
+<!doctype html><p><button><button>
+#errors
+(1,34): unexpected-start-tag-implies-end-tag
+(1,34): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|       <button>
+
+#data
+<!doctype html><p><button><address>
+#errors
+(1,35): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <address>
+
+#data
+<!doctype html><p><button><blockquote>
+#errors
+(1,38): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <blockquote>
+
+#data
+<!doctype html><p><button><menu>
+#errors
+(1,32): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <menu>
+
+#data
+<!doctype html><p><button><p>
+#errors
+(1,29): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <p>
+
+#data
+<!doctype html><p><button><ul>
+#errors
+(1,30): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <ul>
+
+#data
+<!doctype html><p><button><h1>
+#errors
+(1,30): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <h1>
+
+#data
+<!doctype html><p><button><h6>
+#errors
+(1,30): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <h6>
+
+#data
+<!doctype html><p><button><listing>
+#errors
+(1,35): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <listing>
+
+#data
+<!doctype html><p><button><pre>
+#errors
+(1,31): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <pre>
+
+#data
+<!doctype html><p><button><form>
+#errors
+(1,32): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <form>
+
+#data
+<!doctype html><p><button><li>
+#errors
+(1,30): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <li>
+
+#data
+<!doctype html><p><button><dd>
+#errors
+(1,30): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <dd>
+
+#data
+<!doctype html><p><button><dt>
+#errors
+(1,30): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <dt>
+
+#data
+<!doctype html><p><button><plaintext>
+#errors
+(1,37): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <plaintext>
+
+#data
+<!doctype html><p><button><table>
+#errors
+(1,33): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <table>
+
+#data
+<!doctype html><p><button><hr>
+#errors
+(1,30): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <hr>
+
+#data
+<!doctype html><p><button><xmp>
+#errors
+(1,31): expected-named-closing-tag-but-got-eof
+(1,31): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <xmp>
+
+#data
+<!doctype html><p><button></p>
+#errors
+(1,30): unexpected-end-tag
+(1,30): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <button>
+|         <p>
+
+#data
+<!doctype html><address><button></address>a
+#errors
+(1,42): end-tag-too-early
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <address>
+|       <button>
+|     "a"
+
+#data
+<!doctype html><address><button></address>a
+#errors
+(1,42): end-tag-too-early
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <address>
+|       <button>
+|     "a"
+
+#data
+<p><table></p>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,14): unexpected-end-tag-implies-table-voodoo
+(1,14): unexpected-end-tag
+(1,14): eof-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <p>
+|       <table>
+
+#data
+<!doctype html><svg>
+#errors
+(1,20): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+
+#data
+<!doctype html><p><figcaption>
+#errors
+(1,30): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|     <figcaption>
+
+#data
+<!doctype html><p><summary>
+#errors
+(1,27): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|     <summary>
+
+#data
+<!doctype html><form><table><form>
+#errors
+(1,34): unexpected-form-in-table
+(1,34): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <form>
+|       <table>
+
+#data
+<!doctype html><table><form><form>
+#errors
+(1,28): unexpected-form-in-table
+(1,34): unexpected-form-in-table
+(1,34): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <form>
+
+#data
+<!doctype html><table><form></table><form>
+#errors
+(1,28): unexpected-form-in-table
+(1,42): unexpected-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <form>
+
+#data
+<!doctype html><svg><foreignObject><p>
+#errors
+(1,38): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg foreignObject>
+|         <p>
+
+#data
+<!doctype html><svg><title>abc
+#errors
+(1,30): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg title>
+|         "abc"
+
+#data
+<option><span><option>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,22): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <option>
+|       <span>
+|         <option>
+
+#data
+<option><option>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <option>
+|     <option>
+
+#data
+<math><annotation-xml><div>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,27): unexpected-html-element-in-foreign-content
+(1,27): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|     <div>
+
+#data
+<math><annotation-xml encoding="application/svg+xml"><div>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,58): unexpected-html-element-in-foreign-content
+(1,58): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         encoding="application/svg+xml"
+|     <div>
+
+#data
+<math><annotation-xml encoding="application/xhtml+xml"><div>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,60): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         encoding="application/xhtml+xml"
+|         <div>
+
+#data
+<math><annotation-xml encoding="aPPlication/xhtmL+xMl"><div>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,60): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         encoding="aPPlication/xhtmL+xMl"
+|         <div>
+
+#data
+<math><annotation-xml encoding="text/html"><div>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,48): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         encoding="text/html"
+|         <div>
+
+#data
+<math><annotation-xml encoding="Text/htmL"><div>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,48): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         encoding="Text/htmL"
+|         <div>
+
+#data
+<math><annotation-xml encoding=" text/html "><div>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,50): unexpected-html-element-in-foreign-content
+(1,50): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         encoding=" text/html "
+|     <div>
+
+#data
+<math><annotation-xml> </annotation-xml>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,40): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         " "
+
+#data
+<math><annotation-xml>c</annotation-xml>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,40): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         "c"
+
+#data
+<math><annotation-xml><!--foo-->
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,32): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         <!-- foo -->
+
+#data
+<math><annotation-xml></svg>x
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,28): unexpected-end-tag
+(1,29): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         "x"
+
+#data
+<math><annotation-xml><svg>x
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,28): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         <svg svg>
+|           "x"
diff --git a/internal/html/testdata/webkit/tests21.dat b/internal/html/testdata/webkit/tests21.dat
new file mode 100644 (file)
index 0000000..1e2af7c
--- /dev/null
@@ -0,0 +1,325 @@
+#data
+<svg><![CDATA[foo]]>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,20): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "foo"
+
+#data
+<math><![CDATA[foo]]>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,21): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       "foo"
+
+#data
+<div><![CDATA[foo]]>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,7): expected-dashes-or-doctype
+(1,20): expected-closing-tag-but-got-eof
+#new-errors
+(1:14) cdata-in-html-content
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <!-- [CDATA[foo]] -->
+
+#data
+<svg><![CDATA[foo
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,17): expected-closing-tag-but-got-eof
+#new-errors
+(1:18) eof-in-cdata
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "foo"
+
+#data
+<svg><![CDATA[foo
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,17): expected-closing-tag-but-got-eof
+#new-errors
+(1:18) eof-in-cdata
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "foo"
+
+#data
+<svg><![CDATA[
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,14): expected-closing-tag-but-got-eof
+#new-errors
+(1:15) eof-in-cdata
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+
+#data
+<svg><![CDATA[]]>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,17): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+
+#data
+<svg><![CDATA[]] >]]>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,21): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "]] >"
+
+#data
+<svg><![CDATA[]] >]]>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,21): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "]] >"
+
+#data
+<svg><![CDATA[]]
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,16): expected-closing-tag-but-got-eof
+#new-errors
+(1:17) eof-in-cdata
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "]]"
+
+#data
+<svg><![CDATA[]
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,15): expected-closing-tag-but-got-eof
+#new-errors
+(1:16) eof-in-cdata
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "]"
+
+#data
+<svg><![CDATA[]>a
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,17): expected-closing-tag-but-got-eof
+#new-errors
+(1:18) eof-in-cdata
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "]>a"
+
+#data
+<!DOCTYPE html><svg><![CDATA[foo]]]>
+#errors
+(1,36): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "foo]"
+
+#data
+<!DOCTYPE html><svg><![CDATA[foo]]]]>
+#errors
+(1,37): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "foo]]"
+
+#data
+<!DOCTYPE html><svg><![CDATA[foo]]]]]>
+#errors
+(1,38): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "foo]]]"
+
+#data
+<svg><foreignObject><div><![CDATA[foo]]>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,27): expected-dashes-or-doctype
+(1,40): expected-closing-tag-but-got-eof
+#new-errors
+(1:34) cdata-in-html-content
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg foreignObject>
+|         <div>
+|           <!-- [CDATA[foo]] -->
+
+#data
+<svg><![CDATA[<svg>]]>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,22): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "<svg>"
+
+#data
+<svg><![CDATA[</svg>a]]>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,24): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "</svg>a"
+
+#data
+<svg><![CDATA[<svg>a
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,20): expected-closing-tag-but-got-eof
+#new-errors
+(1:21) eof-in-cdata
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "<svg>a"
+
+#data
+<svg><![CDATA[</svg>a
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,21): expected-closing-tag-but-got-eof
+#new-errors
+(1:22) eof-in-cdata
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "</svg>a"
+
+#data
+<svg><![CDATA[<svg>]]><path>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,28): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "<svg>"
+|       <svg path>
+
+#data
+<svg><![CDATA[<svg>]]></path>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,29): unexpected-end-tag
+(1,29): unexpected-end-tag
+(1,29): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "<svg>"
+
+#data
+<svg><![CDATA[<svg>]]><!--path-->
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,33): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "<svg>"
+|       <!-- path -->
+
+#data
+<svg><![CDATA[<svg>]]>path
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,26): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "<svg>path"
+
+#data
+<svg><![CDATA[<!--svg-->]]>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,27): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "<!--svg-->"
diff --git a/internal/html/testdata/webkit/tests22.dat b/internal/html/testdata/webkit/tests22.dat
new file mode 100644 (file)
index 0000000..31e6d9e
--- /dev/null
@@ -0,0 +1,190 @@
+#data
+<a><b><big><em><strong><div>X</a>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,33): adoption-agency-1.3
+(1,33): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       <b>
+|         <big>
+|           <em>
+|             <strong>
+|     <big>
+|       <em>
+|         <strong>
+|           <div>
+|             <a>
+|               "X"
+
+#data
+<a><b><div id=1><div id=2><div id=3><div id=4><div id=5><div id=6><div id=7><div id=8>A</a>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,91): adoption-agency-1.3
+(1,91): adoption-agency-1.3
+(1,91): adoption-agency-1.3
+(1,91): adoption-agency-1.3
+(1,91): adoption-agency-1.3
+(1,91): adoption-agency-1.3
+(1,91): adoption-agency-1.3
+(1,91): adoption-agency-1.3
+(1,91): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       <b>
+|     <b>
+|       <div>
+|         id="1"
+|         <a>
+|         <div>
+|           id="2"
+|           <a>
+|           <div>
+|             id="3"
+|             <a>
+|             <div>
+|               id="4"
+|               <a>
+|               <div>
+|                 id="5"
+|                 <a>
+|                 <div>
+|                   id="6"
+|                   <a>
+|                   <div>
+|                     id="7"
+|                     <a>
+|                     <div>
+|                       id="8"
+|                       <a>
+|                         "A"
+
+#data
+<a><b><div id=1><div id=2><div id=3><div id=4><div id=5><div id=6><div id=7><div id=8><div id=9>A</a>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,101): adoption-agency-1.3
+(1,101): adoption-agency-1.3
+(1,101): adoption-agency-1.3
+(1,101): adoption-agency-1.3
+(1,101): adoption-agency-1.3
+(1,101): adoption-agency-1.3
+(1,101): adoption-agency-1.3
+(1,101): adoption-agency-1.3
+(1,101): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       <b>
+|     <b>
+|       <div>
+|         id="1"
+|         <a>
+|         <div>
+|           id="2"
+|           <a>
+|           <div>
+|             id="3"
+|             <a>
+|             <div>
+|               id="4"
+|               <a>
+|               <div>
+|                 id="5"
+|                 <a>
+|                 <div>
+|                   id="6"
+|                   <a>
+|                   <div>
+|                     id="7"
+|                     <a>
+|                     <div>
+|                       id="8"
+|                       <a>
+|                         <div>
+|                           id="9"
+|                           "A"
+
+#data
+<a><b><div id=1><div id=2><div id=3><div id=4><div id=5><div id=6><div id=7><div id=8><div id=9><div id=10>A</a>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,112): adoption-agency-1.3
+(1,112): adoption-agency-1.3
+(1,112): adoption-agency-1.3
+(1,112): adoption-agency-1.3
+(1,112): adoption-agency-1.3
+(1,112): adoption-agency-1.3
+(1,112): adoption-agency-1.3
+(1,112): adoption-agency-1.3
+(1,112): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       <b>
+|     <b>
+|       <div>
+|         id="1"
+|         <a>
+|         <div>
+|           id="2"
+|           <a>
+|           <div>
+|             id="3"
+|             <a>
+|             <div>
+|               id="4"
+|               <a>
+|               <div>
+|                 id="5"
+|                 <a>
+|                 <div>
+|                   id="6"
+|                   <a>
+|                   <div>
+|                     id="7"
+|                     <a>
+|                     <div>
+|                       id="8"
+|                       <a>
+|                         <div>
+|                           id="9"
+|                           <div>
+|                             id="10"
+|                             "A"
+
+#data
+<cite><b><cite><i><cite><i><cite><i><div>X</b>TEST
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,46): adoption-agency-1.3
+(1,50): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <cite>
+|       <b>
+|         <cite>
+|           <i>
+|             <cite>
+|               <i>
+|                 <cite>
+|                   <i>
+|       <i>
+|         <i>
+|           <div>
+|             <b>
+|               "X"
+|             "TEST"
diff --git a/internal/html/testdata/webkit/tests23.dat b/internal/html/testdata/webkit/tests23.dat
new file mode 100644 (file)
index 0000000..49e4a4a
--- /dev/null
@@ -0,0 +1,168 @@
+#data
+<p><font size=4><font color=red><font size=4><font size=4><font size=4><font size=4><font size=4><font color=red><p>X
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,116): unexpected-end-tag
+(1,117): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <font>
+|         size="4"
+|         <font>
+|           color="red"
+|           <font>
+|             size="4"
+|             <font>
+|               size="4"
+|               <font>
+|                 size="4"
+|                 <font>
+|                   size="4"
+|                   <font>
+|                     size="4"
+|                     <font>
+|                       color="red"
+|     <p>
+|       <font>
+|         color="red"
+|         <font>
+|           size="4"
+|           <font>
+|             size="4"
+|             <font>
+|               size="4"
+|               <font>
+|                 color="red"
+|                 "X"
+
+#data
+<p><font size=4><font size=4><font size=4><font size=4><p>X
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,58): unexpected-end-tag
+(1,59): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <font>
+|         size="4"
+|         <font>
+|           size="4"
+|           <font>
+|             size="4"
+|             <font>
+|               size="4"
+|     <p>
+|       <font>
+|         size="4"
+|         <font>
+|           size="4"
+|           <font>
+|             size="4"
+|             "X"
+
+#data
+<p><font size=4><font size=4><font size=4><font size="5"><font size=4><p>X
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,73): unexpected-end-tag
+(1,74): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <font>
+|         size="4"
+|         <font>
+|           size="4"
+|           <font>
+|             size="4"
+|             <font>
+|               size="5"
+|               <font>
+|                 size="4"
+|     <p>
+|       <font>
+|         size="4"
+|         <font>
+|           size="4"
+|           <font>
+|             size="5"
+|             <font>
+|               size="4"
+|               "X"
+
+#data
+<p><font size=4 id=a><font size=4 id=b><font size=4><font size=4><p>X
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,68): unexpected-end-tag
+(1,69): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <font>
+|         id="a"
+|         size="4"
+|         <font>
+|           id="b"
+|           size="4"
+|           <font>
+|             size="4"
+|             <font>
+|               size="4"
+|     <p>
+|       <font>
+|         id="a"
+|         size="4"
+|         <font>
+|           id="b"
+|           size="4"
+|           <font>
+|             size="4"
+|             <font>
+|               size="4"
+|               "X"
+
+#data
+<p><b id=a><b id=a><b id=a><b><object><b id=a><b id=a>X</object><p>Y
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,64): end-tag-too-early
+(1,67): unexpected-end-tag
+(1,68): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <b>
+|         id="a"
+|         <b>
+|           id="a"
+|           <b>
+|             id="a"
+|             <b>
+|               <object>
+|                 <b>
+|                   id="a"
+|                   <b>
+|                     id="a"
+|                     "X"
+|     <p>
+|       <b>
+|         id="a"
+|         <b>
+|           id="a"
+|           <b>
+|             id="a"
+|             <b>
+|               "Y"
diff --git a/internal/html/testdata/webkit/tests24.dat b/internal/html/testdata/webkit/tests24.dat
new file mode 100644 (file)
index 0000000..f6dc7eb
--- /dev/null
@@ -0,0 +1,79 @@
+#data
+<!DOCTYPE html>&NotEqualTilde;
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "≂̸"
+
+#data
+<!DOCTYPE html>&NotEqualTilde;A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "≂̸A"
+
+#data
+<!DOCTYPE html>&ThickSpace;
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "  "
+
+#data
+<!DOCTYPE html>&ThickSpace;A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "  A"
+
+#data
+<!DOCTYPE html>&NotSubset;
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "⊂⃒"
+
+#data
+<!DOCTYPE html>&NotSubset;A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "⊂⃒A"
+
+#data
+<!DOCTYPE html>&Gopf;
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "𝔾"
+
+#data
+<!DOCTYPE html>&Gopf;A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "𝔾A"
diff --git a/internal/html/testdata/webkit/tests25.dat b/internal/html/testdata/webkit/tests25.dat
new file mode 100644 (file)
index 0000000..5bb369b
--- /dev/null
@@ -0,0 +1,288 @@
+#data
+<!DOCTYPE html><body><foo>A
+#errors
+(1,27): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <foo>
+|       "A"
+
+#data
+<!DOCTYPE html><body><area>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <area>
+|     "A"
+
+#data
+<!DOCTYPE html><body><base>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <base>
+|     "A"
+
+#data
+<!DOCTYPE html><body><basefont>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <basefont>
+|     "A"
+
+#data
+<!DOCTYPE html><body><bgsound>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <bgsound>
+|     "A"
+
+#data
+<!DOCTYPE html><body><br>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <br>
+|     "A"
+
+#data
+<!DOCTYPE html><body><col>A
+#errors
+(1,26): unexpected-start-tag-ignored
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "A"
+
+#data
+<!DOCTYPE html><body><command>A
+#errors
+eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <command>
+|       "A"
+
+#data
+<!DOCTYPE html><body><embed>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <embed>
+|     "A"
+
+#data
+<!DOCTYPE html><body><frame>A
+#errors
+(1,28): unexpected-start-tag-ignored
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "A"
+
+#data
+<!DOCTYPE html><body><hr>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <hr>
+|     "A"
+
+#data
+<!DOCTYPE html><body><img>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <img>
+|     "A"
+
+#data
+<!DOCTYPE html><body><input>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <input>
+|     "A"
+
+#data
+<!DOCTYPE html><body><keygen>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <keygen>
+|     "A"
+
+#data
+<!DOCTYPE html><keygen>A</keygen>B
+#errors
+33: Stray end tag “keygen”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <keygen>
+|     "AB"
+
+#data
+</keygen>A
+#errors
+9: End tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
+9: Stray end tag “keygen”.
+#document
+| <html>
+|   <head>
+|   <body>
+|     "A"
+
+#data
+<!DOCTYPE html></keygen>A
+#errors
+24: Stray end tag “keygen”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "A"
+
+#data
+<!DOCTYPE html><head></keygen>A
+#errors
+30: Stray end tag “keygen”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "A"
+
+#data
+<!DOCTYPE html><head></head></keygen>A
+#errors
+30: Stray end tag “keygen”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "A"
+
+#data
+<!DOCTYPE html><body></keygen>A
+#errors
+30: Stray end tag “keygen”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "A"
+
+#data
+<!DOCTYPE html><body><link>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <link>
+|     "A"
+
+#data
+<!DOCTYPE html><body><meta>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <meta>
+|     "A"
+
+#data
+<!DOCTYPE html><body><param>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <param>
+|     "A"
+
+#data
+<!DOCTYPE html><body><source>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <source>
+|     "A"
+
+#data
+<!DOCTYPE html><body><track>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <track>
+|     "A"
+
+#data
+<!DOCTYPE html><body><wbr>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <wbr>
+|     "A"
diff --git a/internal/html/testdata/webkit/tests26.dat b/internal/html/testdata/webkit/tests26.dat
new file mode 100644 (file)
index 0000000..de453b9
--- /dev/null
@@ -0,0 +1,393 @@
+#data
+<!DOCTYPE html><body><a href='#1'><nobr>1<nobr></a><br><a href='#2'><nobr>2<nobr></a><br><a href='#3'><nobr>3<nobr></a>
+#errors
+(1,47): unexpected-start-tag-implies-end-tag
+(1,51): adoption-agency-1.3
+(1,74): unexpected-start-tag-implies-end-tag
+(1,74): adoption-agency-1.3
+(1,81): unexpected-start-tag-implies-end-tag
+(1,85): adoption-agency-1.3
+(1,108): unexpected-start-tag-implies-end-tag
+(1,108): adoption-agency-1.3
+(1,115): unexpected-start-tag-implies-end-tag
+(1,119): adoption-agency-1.3
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       href="#1"
+|       <nobr>
+|         "1"
+|       <nobr>
+|     <nobr>
+|       <br>
+|       <a>
+|         href="#2"
+|     <a>
+|       href="#2"
+|       <nobr>
+|         "2"
+|       <nobr>
+|     <nobr>
+|       <br>
+|       <a>
+|         href="#3"
+|     <a>
+|       href="#3"
+|       <nobr>
+|         "3"
+|       <nobr>
+
+#data
+<!DOCTYPE html><body><b><nobr>1<nobr></b><i><nobr>2<nobr></i>3
+#errors
+(1,37): unexpected-start-tag-implies-end-tag
+(1,41): adoption-agency-1.3
+(1,50): unexpected-start-tag-implies-end-tag
+(1,50): adoption-agency-1.3
+(1,57): unexpected-start-tag-implies-end-tag
+(1,61): adoption-agency-1.3
+(1,62): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <nobr>
+|         "1"
+|       <nobr>
+|     <nobr>
+|       <i>
+|     <i>
+|       <nobr>
+|         "2"
+|       <nobr>
+|     <nobr>
+|       "3"
+
+#data
+<!DOCTYPE html><body><b><nobr>1<table><nobr></b><i><nobr>2<nobr></i>3
+#errors
+(1,44): foster-parenting-start-tag
+(1,48): foster-parenting-end-tag
+(1,48): adoption-agency-1.3
+(1,51): foster-parenting-start-tag
+(1,57): foster-parenting-start-tag
+(1,57): nobr-already-in-scope
+(1,57): adoption-agency-1.2
+(1,58): foster-parenting-character
+(1,64): foster-parenting-start-tag
+(1,64): nobr-already-in-scope
+(1,68): foster-parenting-end-tag
+(1,68): adoption-agency-1.2
+(1,69): foster-parenting-character
+(1,69): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <nobr>
+|         "1"
+|         <nobr>
+|           <i>
+|         <i>
+|           <nobr>
+|             "2"
+|           <nobr>
+|         <nobr>
+|           "3"
+|         <table>
+
+#data
+<!DOCTYPE html><body><b><nobr>1<table><tr><td><nobr></b><i><nobr>2<nobr></i>3
+#errors
+(1,56): unexpected-end-tag
+(1,65): unexpected-start-tag-implies-end-tag
+(1,65): adoption-agency-1.3
+(1,72): unexpected-start-tag-implies-end-tag
+(1,76): adoption-agency-1.3
+(1,77): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <nobr>
+|         "1"
+|         <table>
+|           <tbody>
+|             <tr>
+|               <td>
+|                 <nobr>
+|                   <i>
+|                 <i>
+|                   <nobr>
+|                     "2"
+|                   <nobr>
+|                 <nobr>
+|                   "3"
+
+#data
+<!DOCTYPE html><body><b><nobr>1<div><nobr></b><i><nobr>2<nobr></i>3
+#errors
+(1,42): unexpected-start-tag-implies-end-tag
+(1,42): adoption-agency-1.3
+(1,46): adoption-agency-1.3
+(1,46): adoption-agency-1.3
+(1,55): unexpected-start-tag-implies-end-tag
+(1,55): adoption-agency-1.3
+(1,62): unexpected-start-tag-implies-end-tag
+(1,66): adoption-agency-1.3
+(1,67): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <nobr>
+|         "1"
+|     <div>
+|       <b>
+|         <nobr>
+|         <nobr>
+|       <nobr>
+|         <i>
+|       <i>
+|         <nobr>
+|           "2"
+|         <nobr>
+|       <nobr>
+|         "3"
+
+#data
+<!DOCTYPE html><body><b><nobr>1<nobr></b><div><i><nobr>2<nobr></i>3
+#errors
+(1,37): unexpected-start-tag-implies-end-tag
+(1,41): adoption-agency-1.3
+(1,55): unexpected-start-tag-implies-end-tag
+(1,55): adoption-agency-1.3
+(1,62): unexpected-start-tag-implies-end-tag
+(1,66): adoption-agency-1.3
+(1,67): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <nobr>
+|         "1"
+|       <nobr>
+|     <div>
+|       <nobr>
+|         <i>
+|       <i>
+|         <nobr>
+|           "2"
+|         <nobr>
+|       <nobr>
+|         "3"
+
+#data
+<!DOCTYPE html><body><b><nobr>1<nobr><ins></b><i><nobr>
+#errors
+(1,37): unexpected-start-tag-implies-end-tag
+(1,46): adoption-agency-1.3
+(1,55): unexpected-start-tag-implies-end-tag
+(1,55): adoption-agency-1.3
+(1,55): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <nobr>
+|         "1"
+|       <nobr>
+|         <ins>
+|     <nobr>
+|       <i>
+|     <i>
+|       <nobr>
+
+#data
+<!DOCTYPE html><body><b><nobr>1<ins><nobr></b><i>2
+#errors
+(1,42): unexpected-start-tag-implies-end-tag
+(1,42): adoption-agency-1.3
+(1,46): adoption-agency-1.3
+(1,50): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <nobr>
+|         "1"
+|         <ins>
+|       <nobr>
+|     <nobr>
+|       <i>
+|         "2"
+
+#data
+<!DOCTYPE html><body><b>1<nobr></b><i><nobr>2</i>
+#errors
+(1,35): adoption-agency-1.3
+(1,44): unexpected-start-tag-implies-end-tag
+(1,44): adoption-agency-1.3
+(1,49): adoption-agency-1.3
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       "1"
+|       <nobr>
+|     <nobr>
+|       <i>
+|     <i>
+|       <nobr>
+|         "2"
+
+#data
+<p><code x</code></p>
+
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,11): invalid-character-in-attribute-name
+(1,12): unexpected-character-after-solidus-in-tag
+(1,21): unexpected-end-tag
+(2,0): expected-closing-tag-but-got-eof
+#new-errors
+(1:11) unexpected-character-in-attribute-name
+(1:13) unexpected-solidus-in-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <code>
+|         code=""
+|         x<=""
+|     <code>
+|       code=""
+|       x<=""
+|       "
+"
+
+#data
+<!DOCTYPE html><svg><foreignObject><p><i></p>a
+#errors
+(1,45): unexpected-end-tag
+(1,46): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg foreignObject>
+|         <p>
+|           <i>
+|         <i>
+|           "a"
+
+#data
+<!DOCTYPE html><table><tr><td><svg><foreignObject><p><i></p>a
+#errors
+(1,60): unexpected-end-tag
+(1,61): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <svg svg>
+|               <svg foreignObject>
+|                 <p>
+|                   <i>
+|                 <i>
+|                   "a"
+
+#data
+<!DOCTYPE html><math><mtext><p><i></p>a
+#errors
+(1,38): unexpected-end-tag
+(1,39): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mtext>
+|         <p>
+|           <i>
+|         <i>
+|           "a"
+
+#data
+<!DOCTYPE html><table><tr><td><math><mtext><p><i></p>a
+#errors
+(1,53): unexpected-end-tag
+(1,54): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <math math>
+|               <math mtext>
+|                 <p>
+|                   <i>
+|                 <i>
+|                   "a"
+
+#data
+<!DOCTYPE html><body><div><!/div>a
+#errors
+(1,28): expected-dashes-or-doctype
+(1,34): expected-closing-tag-but-got-eof
+#new-errors
+(1:29) incorrectly-opened-comment
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <!-- /div -->
+|       "a"
+
+#data
+<button><p><button>
+#errors
+Line 1 Col 8 Unexpected start tag (button). Expected DOCTYPE.
+Line 1 Col 19 Unexpected start tag (button) implies end tag (button).
+Line 1 Col 19 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <button>
+|       <p>
+|     <button>
diff --git a/internal/html/testdata/webkit/tests3.dat b/internal/html/testdata/webkit/tests3.dat
new file mode 100644 (file)
index 0000000..c7583d9
--- /dev/null
@@ -0,0 +1,305 @@
+#data
+<head></head><style></style>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,20): unexpected-start-tag-out-of-my-head
+#document
+| <html>
+|   <head>
+|     <style>
+|   <body>
+
+#data
+<head></head><script></script>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,21): unexpected-start-tag-out-of-my-head
+#document
+| <html>
+|   <head>
+|     <script>
+|   <body>
+
+#data
+<head></head><!-- --><style></style><!-- --><script></script>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,28): unexpected-start-tag-out-of-my-head
+(1,52): unexpected-start-tag-out-of-my-head
+#document
+| <html>
+|   <head>
+|     <style>
+|     <script>
+|   <!--   -->
+|   <!--   -->
+|   <body>
+
+#data
+<head></head><!-- -->x<style></style><!-- --><script></script>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <!--   -->
+|   <body>
+|     "x"
+|     <style>
+|     <!--   -->
+|     <script>
+
+#data
+<!DOCTYPE html><html><head></head><body><pre>
+</pre></body></html>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <pre>
+
+#data
+<!DOCTYPE html><html><head></head><body><pre>
+foo</pre></body></html>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <pre>
+|       "foo"
+
+#data
+<!DOCTYPE html><html><head></head><body><pre>
+
+foo</pre></body></html>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <pre>
+|       "
+foo"
+
+#data
+<!DOCTYPE html><html><head></head><body><pre>
+foo
+</pre></body></html>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <pre>
+|       "foo
+"
+
+#data
+<!DOCTYPE html><html><head></head><body><pre>x</pre><span>
+</span></body></html>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <pre>
+|       "x"
+|     <span>
+|       "
+"
+
+#data
+<!DOCTYPE html><html><head></head><body><pre>x
+y</pre></body></html>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <pre>
+|       "x
+y"
+
+#data
+<!DOCTYPE html><html><head></head><body><pre>x<div>
+y</pre></body></html>
+#errors
+(2,7): end-tag-too-early
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <pre>
+|       "x"
+|       <div>
+|         "
+y"
+
+#data
+<!DOCTYPE html><pre>&#x0a;&#x0a;A</pre>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <pre>
+|       "
+A"
+
+#data
+<!DOCTYPE html><HTML><META><HEAD></HEAD></HTML>
+#errors
+(1,33): two-heads-are-not-better-than-one
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <meta>
+|   <body>
+
+#data
+<!DOCTYPE html><HTML><HEAD><head></HEAD></HTML>
+#errors
+(1,33): two-heads-are-not-better-than-one
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+
+#data
+<textarea>foo<span>bar</span><i>baz
+#errors
+(1,10): expected-doctype-but-got-start-tag
+(1,35): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "foo<span>bar</span><i>baz"
+
+#data
+<title>foo<span>bar</em><i>baz
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,30): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|     <title>
+|       "foo<span>bar</em><i>baz"
+|   <body>
+
+#data
+<!DOCTYPE html><textarea>
+</textarea>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+
+#data
+<!DOCTYPE html><textarea>
+foo</textarea>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "foo"
+
+#data
+<!DOCTYPE html><textarea>
+
+foo</textarea>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "
+foo"
+
+#data
+<!DOCTYPE html><html><head></head><body><ul><li><div><p><li></ul></body></html>
+#errors
+(1,60): end-tag-too-early
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <ul>
+|       <li>
+|         <div>
+|           <p>
+|       <li>
+
+#data
+<!doctype html><nobr><nobr><nobr>
+#errors
+(1,27): unexpected-start-tag-implies-end-tag
+(1,33): unexpected-start-tag-implies-end-tag
+(1,33): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <nobr>
+|     <nobr>
+|     <nobr>
+
+#data
+<!doctype html><nobr><nobr></nobr><nobr>
+#errors
+(1,27): unexpected-start-tag-implies-end-tag
+(1,40): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <nobr>
+|     <nobr>
+|     <nobr>
+
+#data
+<!doctype html><html><body><p><table></table></body></html>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|     <table>
+
+#data
+<p><table></table>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <table>
diff --git a/internal/html/testdata/webkit/tests4.dat b/internal/html/testdata/webkit/tests4.dat
new file mode 100644 (file)
index 0000000..0a6174c
--- /dev/null
@@ -0,0 +1,58 @@
+#data
+direct div content
+#errors
+#document-fragment
+div
+#document
+| "direct div content"
+
+#data
+direct textarea content
+#errors
+#document-fragment
+textarea
+#document
+| "direct textarea content"
+
+#data
+textarea content with <em>pseudo</em> <foo>markup
+#errors
+#document-fragment
+textarea
+#document
+| "textarea content with <em>pseudo</em> <foo>markup"
+
+#data
+this is &#x0043;DATA inside a <style> element
+#errors
+#document-fragment
+style
+#document
+| "this is &#x0043;DATA inside a <style> element"
+
+#data
+</plaintext>
+#errors
+#document-fragment
+plaintext
+#document
+| "</plaintext>"
+
+#data
+setting html's innerHTML
+#errors
+#document-fragment
+html
+#document
+| <head>
+| <body>
+|   "setting html's innerHTML"
+
+#data
+<title>setting head's innerHTML</title>
+#errors
+#document-fragment
+head
+#document
+| <title>
+|   "setting head's innerHTML"
diff --git a/internal/html/testdata/webkit/tests5.dat b/internal/html/testdata/webkit/tests5.dat
new file mode 100644 (file)
index 0000000..1ef8cae
--- /dev/null
@@ -0,0 +1,210 @@
+#data
+<style> <!-- </style>x
+#errors
+(1,7): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <style>
+|       " <!-- "
+|   <body>
+|     "x"
+
+#data
+<style> <!-- </style> --> </style>x
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,34): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|     <style>
+|       " <!-- "
+|     " "
+|   <body>
+|     "--> x"
+
+#data
+<style> <!--> </style>x
+#errors
+(1,7): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <style>
+|       " <!--> "
+|   <body>
+|     "x"
+
+#data
+<style> <!---> </style>x
+#errors
+(1,7): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <style>
+|       " <!---> "
+|   <body>
+|     "x"
+
+#data
+<iframe> <!---> </iframe>x
+#errors
+(1,8): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <iframe>
+|       " <!---> "
+|     "x"
+
+#data
+<iframe> <!--- </iframe>->x</iframe> --> </iframe>x
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,36): unexpected-end-tag
+(1,50): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <iframe>
+|       " <!--- "
+|     "->x --> x"
+
+#data
+<script> <!-- </script> --> </script>x
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,37): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|     <script>
+|       " <!-- "
+|     " "
+|   <body>
+|     "--> x"
+
+#data
+<title> <!-- </title> --> </title>x
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,34): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|     <title>
+|       " <!-- "
+|     " "
+|   <body>
+|     "--> x"
+
+#data
+<textarea> <!--- </textarea>->x</textarea> --> </textarea>x
+#errors
+(1,10): expected-doctype-but-got-start-tag
+(1,42): unexpected-end-tag
+(1,58): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       " <!--- "
+|     "->x --> x"
+
+#data
+<style> <!</-- </style>x
+#errors
+(1,7): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <style>
+|       " <!</-- "
+|   <body>
+|     "x"
+
+#data
+<p><xmp></xmp>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|     <xmp>
+
+#data
+<xmp> <!-- > --> </xmp>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <xmp>
+|       " <!-- > --> "
+
+#data
+<title>&amp;</title>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <title>
+|       "&"
+|   <body>
+
+#data
+<title><!--&amp;--></title>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <title>
+|       "<!--&-->"
+|   <body>
+
+#data
+<title><!--</title>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|     <title>
+|       "<!--"
+|   <body>
+
+#data
+<noscript><!--</noscript>--></noscript>
+#errors
+(1,10): expected-doctype-but-got-start-tag
+(1,39): unexpected-end-tag
+#script-on
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       "<!--"
+|   <body>
+|     "-->"
+
+#data
+<noscript><!--</noscript>--></noscript>
+#errors
+(1,10): expected-doctype-but-got-start-tag
+#script-off
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       <!-- </noscript> -->
+|   <body>
diff --git a/internal/html/testdata/webkit/tests6.dat b/internal/html/testdata/webkit/tests6.dat
new file mode 100644 (file)
index 0000000..f399123
--- /dev/null
@@ -0,0 +1,663 @@
+#data
+<!doctype html></head> <head>
+#errors
+(1,29): unexpected-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   " "
+|   <body>
+
+#data
+<!doctype html><form><div></form><div>
+#errors
+(1,33): end-tag-too-early-ignored
+(1,38): expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <form>
+|       <div>
+|         <div>
+
+#data
+<!doctype html><title>&amp;</title>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <title>
+|       "&"
+|   <body>
+
+#data
+<!doctype html><title><!--&amp;--></title>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <title>
+|       "<!--&-->"
+|   <body>
+
+#data
+<!doctype>
+#errors
+(1,9): need-space-after-doctype
+(1,10): expected-doctype-name-but-got-right-bracket
+(1,10): unknown-doctype
+#new-errors
+(1:10) missing-doctype-name
+#document
+| <!DOCTYPE >
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!---x
+#errors
+(1,6): eof-in-comment
+(1,6): expected-doctype-but-got-eof
+#new-errors
+(1:7) eof-in-comment
+#document
+| <!-- -x -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<body>
+<div>
+#errors
+(1,6): unexpected-start-tag
+(2,5): expected-closing-tag-but-got-eof
+#document-fragment
+div
+#document
+| "
+"
+| <div>
+
+#data
+<frameset></frameset>
+foo
+#errors
+(1,10): expected-doctype-but-got-start-tag
+(2,1): unexpected-char-after-frameset
+(2,2): unexpected-char-after-frameset
+(2,3): unexpected-char-after-frameset
+#document
+| <html>
+|   <head>
+|   <frameset>
+|   "
+"
+
+#data
+<frameset></frameset>
+<noframes>
+#errors
+(1,10): expected-doctype-but-got-start-tag
+(2,10): expected-named-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <frameset>
+|   "
+"
+|   <noframes>
+
+#data
+<frameset></frameset>
+<div>
+#errors
+(1,10): expected-doctype-but-got-start-tag
+(2,5): unexpected-start-tag-after-frameset
+#document
+| <html>
+|   <head>
+|   <frameset>
+|   "
+"
+
+#data
+<frameset></frameset>
+</html>
+#errors
+(1,10): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <frameset>
+|   "
+"
+
+#data
+<frameset></frameset>
+</div>
+#errors
+(1,10): expected-doctype-but-got-start-tag
+(2,6): unexpected-end-tag-after-frameset
+#document
+| <html>
+|   <head>
+|   <frameset>
+|   "
+"
+
+#data
+<form><form>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,12): unexpected-start-tag
+(1,12): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <form>
+
+#data
+<button><button>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,16): unexpected-start-tag-implies-end-tag
+(1,16): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <button>
+|     <button>
+
+#data
+<table><tr><td></th>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,20): unexpected-end-tag
+(1,20): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+
+#data
+<table><caption><td>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,20): unexpected-cell-in-table-body
+(1,20): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|       <tbody>
+|         <tr>
+|           <td>
+
+#data
+<table><caption><div>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,21): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <div>
+
+#data
+</caption><div>
+#errors
+(1,10): XXX-undefined-error
+(1,15): expected-closing-tag-but-got-eof
+#document-fragment
+caption
+#document
+| <div>
+
+#data
+<table><caption><div></caption>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,31): expected-one-end-tag-but-got-another
+(1,31): eof-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <div>
+
+#data
+<table><caption></table>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+
+#data
+</table><div>
+#errors
+(1,8): unexpected-end-tag
+(1,13): expected-closing-tag-but-got-eof
+#document-fragment
+caption
+#document
+| <div>
+
+#data
+<table><caption></body></col></colgroup></html></tbody></td></tfoot></th></thead></tr>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,23): unexpected-end-tag
+(1,29): unexpected-end-tag
+(1,40): unexpected-end-tag
+(1,47): unexpected-end-tag
+(1,55): unexpected-end-tag
+(1,60): unexpected-end-tag
+(1,68): unexpected-end-tag
+(1,73): unexpected-end-tag
+(1,81): unexpected-end-tag
+(1,86): unexpected-end-tag
+(1,86): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+
+#data
+<table><caption><div></div>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,27): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <div>
+
+#data
+<table><tr><td></body></caption></col></colgroup></html>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,22): unexpected-end-tag
+(1,32): unexpected-end-tag
+(1,38): unexpected-end-tag
+(1,49): unexpected-end-tag
+(1,56): unexpected-end-tag
+(1,56): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+
+#data
+</table></tbody></tfoot></thead></tr><div>
+#errors
+(1,8): unexpected-end-tag
+(1,16): unexpected-end-tag
+(1,24): unexpected-end-tag
+(1,32): unexpected-end-tag
+(1,37): unexpected-end-tag
+(1,42): expected-closing-tag-but-got-eof
+#document-fragment
+td
+#document
+| <div>
+
+#data
+<table><colgroup>foo
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,18): foster-parenting-character-in-table
+(1,19): foster-parenting-character-in-table
+(1,20): foster-parenting-character-in-table
+(1,20): eof-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     "foo"
+|     <table>
+|       <colgroup>
+
+#data
+foo<col>
+#errors
+(1,1): unexpected-character-in-colgroup
+(1,2): unexpected-character-in-colgroup
+(1,3): unexpected-character-in-colgroup
+#document-fragment
+colgroup
+#document
+| <col>
+
+#data
+<table><colgroup></col>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,23): no-end-tag
+(1,23): eof-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <colgroup>
+
+#data
+<frameset><div>
+#errors
+(1,10): expected-doctype-but-got-start-tag
+(1,15): unexpected-start-tag-in-frameset
+(1,15): eof-in-frameset
+#document
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+</frameset><frame>
+#errors
+(1,11): unexpected-frameset-in-frameset-innerhtml
+#document-fragment
+frameset
+#document
+| <frame>
+
+#data
+<frameset></div>
+#errors
+(1,10): expected-doctype-but-got-start-tag
+(1,16): unexpected-end-tag-in-frameset
+(1,16): eof-in-frameset
+#document
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+</body><div>
+#errors
+(1,7): unexpected-close-tag
+(1,12): expected-closing-tag-but-got-eof
+#document-fragment
+body
+#document
+| <div>
+
+#data
+<table><tr><div>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,16): unexpected-start-tag-implies-table-voodoo
+(1,16): eof-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+</tr><td>
+#errors
+(1,5): unexpected-end-tag
+#document-fragment
+tr
+#document
+| <td>
+
+#data
+</tbody></tfoot></thead><td>
+#errors
+(1,8): unexpected-end-tag
+(1,16): unexpected-end-tag
+(1,24): unexpected-end-tag
+#document-fragment
+tr
+#document
+| <td>
+
+#data
+<table><tr><div><td>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,16): foster-parenting-start-tag
+(1,20): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+
+#data
+<caption><col><colgroup><tbody><tfoot><thead><tr>
+#errors
+(1,9): unexpected-start-tag
+(1,14): unexpected-start-tag
+(1,24): unexpected-start-tag
+(1,31): unexpected-start-tag
+(1,38): unexpected-start-tag
+(1,45): unexpected-start-tag
+#document-fragment
+tbody
+#document
+| <tr>
+
+#data
+<table><tbody></thead>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,22): unexpected-end-tag-in-table-body
+(1,22): eof-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+
+#data
+</table><tr>
+#errors
+(1,8): unexpected-end-tag
+#document-fragment
+tbody
+#document
+| <tr>
+
+#data
+<table><tbody></body></caption></col></colgroup></html></td></th></tr>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,21): unexpected-end-tag-in-table-body
+(1,31): unexpected-end-tag-in-table-body
+(1,37): unexpected-end-tag-in-table-body
+(1,48): unexpected-end-tag-in-table-body
+(1,55): unexpected-end-tag-in-table-body
+(1,60): unexpected-end-tag-in-table-body
+(1,65): unexpected-end-tag-in-table-body
+(1,70): unexpected-end-tag-in-table-body
+(1,70): eof-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+
+#data
+<table><tbody></div>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,20): unexpected-end-tag-implies-table-voodoo
+(1,20): end-tag-too-early
+(1,20): eof-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+
+#data
+<table><table>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,14): unexpected-start-tag-implies-end-tag
+(1,14): eof-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|     <table>
+
+#data
+<table></body></caption></col></colgroup></html></tbody></td></tfoot></th></thead></tr>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,14): unexpected-end-tag
+(1,24): unexpected-end-tag
+(1,30): unexpected-end-tag
+(1,41): unexpected-end-tag
+(1,48): unexpected-end-tag
+(1,56): unexpected-end-tag
+(1,61): unexpected-end-tag
+(1,69): unexpected-end-tag
+(1,74): unexpected-end-tag
+(1,82): unexpected-end-tag
+(1,87): unexpected-end-tag
+(1,87): eof-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+
+#data
+</table><tr>
+#errors
+(1,8): unexpected-end-tag
+#document-fragment
+table
+#document
+| <tbody>
+|   <tr>
+
+#data
+<body></body></html>
+#errors
+(1,20): unexpected-end-tag-after-body-innerhtml
+#document-fragment
+html
+#document
+| <head>
+| <body>
+
+#data
+<html><frameset></frameset></html> 
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <frameset>
+|   " "
+
+#data
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html></html>
+#errors
+#document
+| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "">
+| <html>
+|   <head>
+|   <body>
+
+#data
+<param><frameset></frameset>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,17): unexpected-start-tag
+#document
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<source><frameset></frameset>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,18): unexpected-start-tag
+#document
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<track><frameset></frameset>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,17): unexpected-start-tag
+#document
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+</html><frameset></frameset>
+#errors
+(1,7): expected-doctype-but-got-end-tag
+(1,17): expected-eof-but-got-start-tag
+(1,17): unexpected-start-tag
+#document
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+</body><frameset></frameset>
+#errors
+(1,7): expected-doctype-but-got-end-tag
+(1,17): unexpected-start-tag-after-body
+(1,17): unexpected-start-tag
+#document
+| <html>
+|   <head>
+|   <frameset>
diff --git a/internal/html/testdata/webkit/tests7.dat b/internal/html/testdata/webkit/tests7.dat
new file mode 100644 (file)
index 0000000..395dc72
--- /dev/null
@@ -0,0 +1,418 @@
+#data
+<!doctype html><body><title>X</title>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <title>
+|       "X"
+
+#data
+<!doctype html><table><title>X</title></table>
+#errors
+(1,29): unexpected-start-tag-implies-table-voodoo
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <title>
+|       "X"
+|     <table>
+
+#data
+<!doctype html><head></head><title>X</title>
+#errors
+(1,35): unexpected-start-tag-out-of-my-head
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <title>
+|       "X"
+|   <body>
+
+#data
+<!doctype html></head><title>X</title>
+#errors
+(1,29): unexpected-start-tag-out-of-my-head
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <title>
+|       "X"
+|   <body>
+
+#data
+<!doctype html><table><meta></table>
+#errors
+(1,28): unexpected-start-tag-implies-table-voodoo
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <meta>
+|     <table>
+
+#data
+<!doctype html><table>X<tr><td><table> <meta></table></table>
+#errors
+unexpected text in table
+(1,45): unexpected-start-tag-implies-table-voodoo
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "X"
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <meta>
+|             <table>
+|               " "
+
+#data
+<!doctype html><html> <head>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!doctype html> <head>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!doctype html><table><style> <tr>x </style> </table>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <style>
+|         " <tr>x "
+|       " "
+
+#data
+<!doctype html><table><TBODY><script> <tr>x </script> </table>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <script>
+|           " <tr>x "
+|         " "
+
+#data
+<!doctype html><p><applet><p>X</p></applet>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <applet>
+|         <p>
+|           "X"
+
+#data
+<!doctype html><p><object type="application/x-non-existant-plugin"><p>X</p></object>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <object>
+|         type="application/x-non-existant-plugin"
+|         <p>
+|           "X"
+
+#data
+<!doctype html><listing>
+X</listing>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <listing>
+|       "X"
+
+#data
+<!doctype html><select><input>X
+#errors
+(1,30): unexpected-input-in-select
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|     <input>
+|     "X"
+
+#data
+<!doctype html><select><select>X
+#errors
+(1,31): unexpected-select-in-select
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|     "X"
+
+#data
+<!doctype html><table><input type=hidDEN></table>
+#errors
+(1,41): unexpected-hidden-input-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <input>
+|         type="hidDEN"
+
+#data
+<!doctype html><table>X<input type=hidDEN></table>
+#errors
+(1,23): foster-parenting-character
+(1,42): unexpected-hidden-input-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "X"
+|     <table>
+|       <input>
+|         type="hidDEN"
+
+#data
+<!doctype html><table>  <input type=hidDEN></table>
+#errors
+(1,43): unexpected-hidden-input-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       "  "
+|       <input>
+|         type="hidDEN"
+
+#data
+<!doctype html><table>  <input type='hidDEN'></table>
+#errors
+(1,45): unexpected-hidden-input-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       "  "
+|       <input>
+|         type="hidDEN"
+
+#data
+<!doctype html><table><input type=" hidden"><input type=hidDEN></table>
+#errors
+(1,44): unexpected-start-tag-implies-table-voodoo
+(1,63): unexpected-hidden-input-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <input>
+|       type=" hidden"
+|     <table>
+|       <input>
+|         type="hidDEN"
+
+#data
+<!doctype html><table><select>X<tr>
+#errors
+(1,30): unexpected-start-tag-implies-table-voodoo
+(1,35): unexpected-table-element-start-tag-in-select-in-table
+(1,35): eof-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       "X"
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+<!doctype html><select>X</select>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       "X"
+
+#data
+<!DOCTYPE hTmL><html></html>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE HTML><html></html>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+
+#data
+<body>X</body></body>
+#errors
+(1,21): unexpected-end-tag-after-body
+#document-fragment
+html
+#document
+| <head>
+| <body>
+|   "X"
+
+#data
+<div><p>a</x> b
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,13): unexpected-end-tag
+(1,15): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <p>
+|         "a b"
+
+#data
+<table><tr><td><code></code> </table>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <code>
+|             " "
+
+#data
+<table><b><tr><td>aaa</td></tr>bbb</table>ccc
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,10): foster-parenting-start-tag
+(1,32): foster-parenting-character
+(1,33): foster-parenting-character
+(1,34): foster-parenting-character
+(1,45): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|     <b>
+|       "bbb"
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             "aaa"
+|     <b>
+|       "ccc"
+
+#data
+A<table><tr> B</tr> B</table>
+#errors
+(1,1): expected-doctype-but-got-chars
+(1,13): foster-parenting-character
+(1,14): foster-parenting-character
+(1,20): foster-parenting-character
+(1,21): foster-parenting-character
+#document
+| <html>
+|   <head>
+|   <body>
+|     "A B B"
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+A<table><tr> B</tr> </em>C</table>
+#errors
+(1,1): expected-doctype-but-got-chars
+(1,13): foster-parenting-character
+(1,14): foster-parenting-character
+(1,20): foster-parenting-character
+(1,25): unexpected-end-tag
+(1,25): unexpected-end-tag-in-special-element
+(1,26): foster-parenting-character
+#document
+| <html>
+|   <head>
+|   <body>
+|     "A BC"
+|     <table>
+|       <tbody>
+|         <tr>
+|         " "
+
+#data
+<select><keygen>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,16): unexpected-input-in-select
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|     <keygen>
diff --git a/internal/html/testdata/webkit/tests8.dat b/internal/html/testdata/webkit/tests8.dat
new file mode 100644 (file)
index 0000000..ba2e63d
--- /dev/null
@@ -0,0 +1,162 @@
+#data
+<div>
+<div></div>
+</span>x
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(3,7): unexpected-end-tag
+(3,8): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "
+"
+|       <div>
+|       "
+x"
+
+#data
+<div>x<div></div>
+</span>x
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(2,7): unexpected-end-tag
+(2,8): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "x"
+|       <div>
+|       "
+x"
+
+#data
+<div>x<div></div>x</span>x
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,25): unexpected-end-tag
+(1,26): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "x"
+|       <div>
+|       "xx"
+
+#data
+<div>x<div></div>y</span>z
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,25): unexpected-end-tag
+(1,26): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "x"
+|       <div>
+|       "yz"
+
+#data
+<table><div>x<div></div>x</span>x
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,12): foster-parenting-start-tag
+(1,13): foster-parenting-character
+(1,18): foster-parenting-start-tag
+(1,24): foster-parenting-end-tag
+(1,25): foster-parenting-start-tag
+(1,32): foster-parenting-end-tag
+(1,32): unexpected-end-tag
+(1,33): foster-parenting-character
+(1,33): eof-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "x"
+|       <div>
+|       "xx"
+|     <table>
+
+#data
+<table><li><li></table>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <li>
+|     <li>
+|     <table>
+
+#data
+x<table>x
+#errors
+(1,1): expected-doctype-but-got-chars
+(1,9): foster-parenting-character
+(1,9): eof-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     "xx"
+|     <table>
+
+#data
+x<table><table>x
+#errors
+(1,1): expected-doctype-but-got-chars
+(1,15): unexpected-start-tag-implies-end-tag
+(1,16): foster-parenting-character
+(1,16): eof-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     "x"
+|     <table>
+|     "x"
+|     <table>
+
+#data
+<b>a<div></div><div></b>y
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,24): adoption-agency-1.3
+(1,25): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       "a"
+|       <div>
+|     <div>
+|       <b>
+|       "y"
+
+#data
+<a><div><p></a>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,15): adoption-agency-1.3
+(1,15): adoption-agency-1.3
+(1,15): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|     <div>
+|       <a>
+|       <p>
+|         <a>
diff --git a/internal/html/testdata/webkit/tests9.dat b/internal/html/testdata/webkit/tests9.dat
new file mode 100644 (file)
index 0000000..f8d04b2
--- /dev/null
@@ -0,0 +1,472 @@
+#data
+<!DOCTYPE html><math></math>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+
+#data
+<!DOCTYPE html><body><math></math>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+
+#data
+<!DOCTYPE html><math><mi>
+#errors
+(1,25) expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+
+#data
+<!DOCTYPE html><math><annotation-xml><svg><u>
+#errors
+(1,45) unexpected-html-element-in-foreign-content
+(1,45) expected-closing-tag-but-got-eof
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         <svg svg>
+|     <u>
+
+#data
+<!DOCTYPE html><body><select><math></math></select>
+#errors
+(1,35) unexpected-start-tag-in-select
+(1,42) unexpected-end-tag-in-select
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+
+#data
+<!DOCTYPE html><body><select><option><math></math></option></select>
+#errors
+(1,43) unexpected-start-tag-in-select
+(1,50) unexpected-end-tag-in-select
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+
+#data
+<!DOCTYPE html><body><table><math></math></table>
+#errors
+(1,34) unexpected-start-tag-implies-table-voodoo
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|     <table>
+
+#data
+<!DOCTYPE html><body><table><math><mi>foo</mi></math></table>
+#errors
+(1,34) foster-parenting-start-token
+(1,39) foster-parenting-character
+(1,40) foster-parenting-character
+(1,41) foster-parenting-character
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+|         "foo"
+|     <table>
+
+#data
+<!DOCTYPE html><body><table><math><mi>foo</mi><mi>bar</mi></math></table>
+#errors
+(1,34) foster-parenting-start-tag
+(1,39) foster-parenting-character
+(1,40) foster-parenting-character
+(1,41) foster-parenting-character
+(1,51) foster-parenting-character
+(1,52) foster-parenting-character
+(1,53) foster-parenting-character
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+|         "foo"
+|       <math mi>
+|         "bar"
+|     <table>
+
+#data
+<!DOCTYPE html><body><table><tbody><math><mi>foo</mi><mi>bar</mi></math></tbody></table>
+#errors
+(1,41) foster-parenting-start-tag
+(1,46) foster-parenting-character
+(1,47) foster-parenting-character
+(1,48) foster-parenting-character
+(1,58) foster-parenting-character
+(1,59) foster-parenting-character
+(1,60) foster-parenting-character
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+|         "foo"
+|       <math mi>
+|         "bar"
+|     <table>
+|       <tbody>
+
+#data
+<!DOCTYPE html><body><table><tbody><tr><math><mi>foo</mi><mi>bar</mi></math></tr></tbody></table>
+#errors
+(1,45) foster-parenting-start-tag
+(1,50) foster-parenting-character
+(1,51) foster-parenting-character
+(1,52) foster-parenting-character
+(1,62) foster-parenting-character
+(1,63) foster-parenting-character
+(1,64) foster-parenting-character
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+|         "foo"
+|       <math mi>
+|         "bar"
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+<!DOCTYPE html><body><table><tbody><tr><td><math><mi>foo</mi><mi>bar</mi></math></td></tr></tbody></table>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <math math>
+|               <math mi>
+|                 "foo"
+|               <math mi>
+|                 "bar"
+
+#data
+<!DOCTYPE html><body><table><tbody><tr><td><math><mi>foo</mi><mi>bar</mi></math><p>baz</td></tr></tbody></table>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <math math>
+|               <math mi>
+|                 "foo"
+|               <math mi>
+|                 "bar"
+|             <p>
+|               "baz"
+
+#data
+<!DOCTYPE html><body><table><caption><math><mi>foo</mi><mi>bar</mi></math><p>baz</caption></table>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <math math>
+|           <math mi>
+|             "foo"
+|           <math mi>
+|             "bar"
+|         <p>
+|           "baz"
+
+#data
+<!DOCTYPE html><body><table><caption><math><mi>foo</mi><mi>bar</mi><p>baz</table><p>quux
+#errors
+(1,70) unexpected-html-element-in-foreign-content
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <math math>
+|           <math mi>
+|             "foo"
+|           <math mi>
+|             "bar"
+|         <p>
+|           "baz"
+|     <p>
+|       "quux"
+
+#data
+<!DOCTYPE html><body><table><caption><math><mi>foo</mi><mi>bar</mi>baz</table><p>quux
+#errors
+(1,78) unexpected-end-tag
+(1,78) expected-one-end-tag-but-got-another
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <math math>
+|           <math mi>
+|             "foo"
+|           <math mi>
+|             "bar"
+|           "baz"
+|     <p>
+|       "quux"
+
+#data
+<!DOCTYPE html><body><table><colgroup><math><mi>foo</mi><mi>bar</mi><p>baz</table><p>quux
+#errors
+(1,44) foster-parenting-start-tag
+(1,49) foster-parenting-character
+(1,50) foster-parenting-character
+(1,51) foster-parenting-character
+(1,61) foster-parenting-character
+(1,62) foster-parenting-character
+(1,63) foster-parenting-character
+(1,71) unexpected-html-element-in-foreign-content
+(1,71) foster-parenting-start-tag
+(1,63) foster-parenting-character
+(1,63) foster-parenting-character
+(1,63) foster-parenting-character
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+|         "foo"
+|       <math mi>
+|         "bar"
+|     <p>
+|       "baz"
+|     <table>
+|       <colgroup>
+|     <p>
+|       "quux"
+
+#data
+<!DOCTYPE html><body><table><tr><td><select><math><mi>foo</mi><mi>bar</mi><p>baz</table><p>quux
+#errors
+(1,50) unexpected-start-tag-in-select
+(1,54) unexpected-start-tag-in-select
+(1,62) unexpected-end-tag-in-select
+(1,66) unexpected-start-tag-in-select
+(1,74) unexpected-end-tag-in-select
+(1,77) unexpected-start-tag-in-select
+(1,88) unexpected-table-element-end-tag-in-select-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <select>
+|               "foobarbaz"
+|     <p>
+|       "quux"
+
+#data
+<!DOCTYPE html><body><table><select><math><mi>foo</mi><mi>bar</mi><p>baz</table><p>quux
+#errors
+(1,36) unexpected-start-tag-implies-table-voodoo
+(1,42) unexpected-start-tag-in-select
+(1,46) unexpected-start-tag-in-select
+(1,54) unexpected-end-tag-in-select
+(1,58) unexpected-start-tag-in-select
+(1,66) unexpected-end-tag-in-select
+(1,69) unexpected-start-tag-in-select
+(1,80) unexpected-table-element-end-tag-in-select-in-table
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       "foobarbaz"
+|     <table>
+|     <p>
+|       "quux"
+
+#data
+<!DOCTYPE html><body></body></html><math><mi>foo</mi><mi>bar</mi><p>baz
+#errors
+(1,41) expected-eof-but-got-start-tag
+(1,68) unexpected-html-element-in-foreign-content
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+|         "foo"
+|       <math mi>
+|         "bar"
+|     <p>
+|       "baz"
+
+#data
+<!DOCTYPE html><body></body><math><mi>foo</mi><mi>bar</mi><p>baz
+#errors
+(1,34) unexpected-start-tag-after-body
+(1,61) unexpected-html-element-in-foreign-content
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+|         "foo"
+|       <math mi>
+|         "bar"
+|     <p>
+|       "baz"
+
+#data
+<!DOCTYPE html><frameset><math><mi></mi><mi></mi><p><span>
+#errors
+(1,31) unexpected-start-tag-in-frameset
+(1,35) unexpected-start-tag-in-frameset
+(1,40) unexpected-end-tag-in-frameset
+(1,44) unexpected-start-tag-in-frameset
+(1,49) unexpected-end-tag-in-frameset
+(1,52) unexpected-start-tag-in-frameset
+(1,58) unexpected-start-tag-in-frameset
+(1,58) eof-in-frameset
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!DOCTYPE html><frameset></frameset><math><mi></mi><mi></mi><p><span>
+#errors
+(1,42) unexpected-start-tag-after-frameset
+(1,46) unexpected-start-tag-after-frameset
+(1,51) unexpected-end-tag-after-frameset
+(1,55) unexpected-start-tag-after-frameset
+(1,60) unexpected-end-tag-after-frameset
+(1,63) unexpected-start-tag-after-frameset
+(1,69) unexpected-start-tag-after-frameset
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!DOCTYPE html><body xlink:href=foo><math xlink:href=foo></math>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     xlink:href="foo"
+|     <math math>
+|       xlink href="foo"
+
+#data
+<!DOCTYPE html><body xlink:href=foo xml:lang=en><math><mi xml:lang=en xlink:href=foo></mi></math>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     xlink:href="foo"
+|     xml:lang="en"
+|     <math math>
+|       <math mi>
+|         xlink href="foo"
+|         xml lang="en"
+
+#data
+<!DOCTYPE html><body xlink:href=foo xml:lang=en><math><mi xml:lang=en xlink:href=foo /></math>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     xlink:href="foo"
+|     xml:lang="en"
+|     <math math>
+|       <math mi>
+|         xlink href="foo"
+|         xml lang="en"
+
+#data
+<!DOCTYPE html><body xlink:href=foo xml:lang=en><math><mi xml:lang=en xlink:href=foo />bar</math>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     xlink:href="foo"
+|     xml:lang="en"
+|     <math math>
+|       <math mi>
+|         xlink href="foo"
+|         xml lang="en"
+|       "bar"
diff --git a/internal/html/testdata/webkit/tests_innerHTML_1.dat b/internal/html/testdata/webkit/tests_innerHTML_1.dat
new file mode 100644 (file)
index 0000000..54f4368
--- /dev/null
@@ -0,0 +1,887 @@
+#data
+<body><span>
+#errors
+(1,6): unexpected-start-tag
+(1,12): expected-closing-tag-but-got-eof
+#document-fragment
+body
+#document
+| <span>
+
+#data
+<span><body>
+#errors
+(1,12): unexpected-start-tag
+(1,12): expected-closing-tag-but-got-eof
+#document-fragment
+body
+#document
+| <span>
+
+#data
+<span><body>
+#errors
+(1,12): unexpected-start-tag
+(1,12): expected-closing-tag-but-got-eof
+#document-fragment
+div
+#document
+| <span>
+
+#data
+<body><span>
+#errors
+(1,12): expected-closing-tag-but-got-eof
+#document-fragment
+html
+#document
+| <head>
+| <body>
+|   <span>
+
+#data
+<frameset><span>
+#errors
+(1,10): unexpected-start-tag
+(1,16): expected-closing-tag-but-got-eof
+#document-fragment
+body
+#document
+| <span>
+
+#data
+<span><frameset>
+#errors
+(1,16): unexpected-start-tag
+(1,16): expected-closing-tag-but-got-eof
+#document-fragment
+body
+#document
+| <span>
+
+#data
+<span><frameset>
+#errors
+(1,16): unexpected-start-tag
+(1,16): expected-closing-tag-but-got-eof
+#document-fragment
+div
+#document
+| <span>
+
+#data
+<frameset><span>
+#errors
+(1,16): unexpected-start-tag-in-frameset
+(1,16): eof-in-frameset
+#document-fragment
+html
+#document
+| <head>
+| <frameset>
+
+#data
+<table><tr>
+#errors
+(1,7): unexpected-start-tag
+#document-fragment
+table
+#document
+| <tbody>
+|   <tr>
+
+#data
+</table><tr>
+#errors
+(1,8): unexpected-end-tag
+#document-fragment
+table
+#document
+| <tbody>
+|   <tr>
+
+#data
+<a>
+#errors
+(1,3): unexpected-start-tag-implies-table-voodoo
+(1,3): eof-in-table
+#document-fragment
+table
+#document
+| <a>
+
+#data
+<a>
+#errors
+(1,3): unexpected-start-tag-implies-table-voodoo
+(1,3): eof-in-table
+#document-fragment
+table
+#document
+| <a>
+
+#data
+<a><caption>a
+#errors
+(1,3): unexpected-start-tag-implies-table-voodoo
+(1,13): expected-closing-tag-but-got-eof
+#document-fragment
+table
+#document
+| <a>
+| <caption>
+|   "a"
+
+#data
+<a><colgroup><col>
+#errors
+(1,3): foster-parenting-start-token
+(1,18): expected-closing-tag-but-got-eof
+#document-fragment
+table
+#document
+| <a>
+| <colgroup>
+|   <col>
+
+#data
+<a><tbody><tr>
+#errors
+(1,3): foster-parenting-start-tag
+#document-fragment
+table
+#document
+| <a>
+| <tbody>
+|   <tr>
+
+#data
+<a><tfoot><tr>
+#errors
+(1,3): foster-parenting-start-tag
+#document-fragment
+table
+#document
+| <a>
+| <tfoot>
+|   <tr>
+
+#data
+<a><thead><tr>
+#errors
+(1,3): foster-parenting-start-tag
+#document-fragment
+table
+#document
+| <a>
+| <thead>
+|   <tr>
+
+#data
+<a><tr>
+#errors
+(1,3): foster-parenting-start-tag
+#document-fragment
+table
+#document
+| <a>
+| <tbody>
+|   <tr>
+
+#data
+<a><th>
+#errors
+(1,3): unexpected-start-tag-implies-table-voodoo
+(1,7): unexpected-cell-in-table-body
+#document-fragment
+table
+#document
+| <a>
+| <tbody>
+|   <tr>
+|     <th>
+
+#data
+<a><td>
+#errors
+(1,3): unexpected-start-tag-implies-table-voodoo
+(1,7): unexpected-cell-in-table-body
+#document-fragment
+table
+#document
+| <a>
+| <tbody>
+|   <tr>
+|     <td>
+
+#data
+<table></table><tbody>
+#errors
+(1,22): unexpected-start-tag
+#document-fragment
+caption
+#document
+| <table>
+
+#data
+</table><span>
+#errors
+(1,8): unexpected-end-tag
+(1,14): expected-closing-tag-but-got-eof
+#document-fragment
+caption
+#document
+| <span>
+
+#data
+<span></table>
+#errors
+(1,14): unexpected-end-tag
+(1,14): expected-closing-tag-but-got-eof
+#document-fragment
+caption
+#document
+| <span>
+
+#data
+</caption><span>
+#errors
+(1,10): XXX-undefined-error
+(1,16): expected-closing-tag-but-got-eof
+#document-fragment
+caption
+#document
+| <span>
+
+#data
+<span></caption><span>
+#errors
+(1,16): XXX-undefined-error
+(1,22): expected-closing-tag-but-got-eof
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+<span><caption><span>
+#errors
+(1,15): unexpected-start-tag
+(1,21): expected-closing-tag-but-got-eof
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+<span><col><span>
+#errors
+(1,11): unexpected-start-tag
+(1,17): expected-closing-tag-but-got-eof
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+<span><colgroup><span>
+#errors
+(1,16): unexpected-start-tag
+(1,22): expected-closing-tag-but-got-eof
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+<span><html><span>
+#errors
+(1,12): non-html-root
+(1,18): expected-closing-tag-but-got-eof
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+<span><tbody><span>
+#errors
+(1,13): unexpected-start-tag
+(1,19): expected-closing-tag-but-got-eof
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+<span><td><span>
+#errors
+(1,10): unexpected-start-tag
+(1,16): expected-closing-tag-but-got-eof
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+<span><tfoot><span>
+#errors
+(1,13): unexpected-start-tag
+(1,19): expected-closing-tag-but-got-eof
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+<span><thead><span>
+#errors
+(1,13): unexpected-start-tag
+(1,19): expected-closing-tag-but-got-eof
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+<span><th><span>
+#errors
+(1,10): unexpected-start-tag
+(1,16): expected-closing-tag-but-got-eof
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+<span><tr><span>
+#errors
+(1,10): unexpected-start-tag
+(1,16): expected-closing-tag-but-got-eof
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+<span></table><span>
+#errors
+(1,14): unexpected-end-tag
+(1,20): expected-closing-tag-but-got-eof
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+</colgroup><col>
+#errors
+(1,11): XXX-undefined-error
+#document-fragment
+colgroup
+#document
+| <col>
+
+#data
+<a><col>
+#errors
+(1,3): XXX-undefined-error
+#document-fragment
+colgroup
+#document
+| <col>
+
+#data
+<caption><a>
+#errors
+(1,9): XXX-undefined-error
+(1,12): unexpected-start-tag-implies-table-voodoo
+(1,12): eof-in-table
+#document-fragment
+tbody
+#document
+| <a>
+
+#data
+<col><a>
+#errors
+(1,5): XXX-undefined-error
+(1,8): unexpected-start-tag-implies-table-voodoo
+(1,8): eof-in-table
+#document-fragment
+tbody
+#document
+| <a>
+
+#data
+<colgroup><a>
+#errors
+(1,10): XXX-undefined-error
+(1,13): unexpected-start-tag-implies-table-voodoo
+(1,13): eof-in-table
+#document-fragment
+tbody
+#document
+| <a>
+
+#data
+<tbody><a>
+#errors
+(1,7): XXX-undefined-error
+(1,10): unexpected-start-tag-implies-table-voodoo
+(1,10): eof-in-table
+#document-fragment
+tbody
+#document
+| <a>
+
+#data
+<tfoot><a>
+#errors
+(1,7): XXX-undefined-error
+(1,10): unexpected-start-tag-implies-table-voodoo
+(1,10): eof-in-table
+#document-fragment
+tbody
+#document
+| <a>
+
+#data
+<thead><a>
+#errors
+(1,7): XXX-undefined-error
+(1,10): unexpected-start-tag-implies-table-voodoo
+(1,10): eof-in-table
+#document-fragment
+tbody
+#document
+| <a>
+
+#data
+</table><a>
+#errors
+(1,8): XXX-undefined-error
+(1,11): unexpected-start-tag-implies-table-voodoo
+(1,11): eof-in-table
+#document-fragment
+tbody
+#document
+| <a>
+
+#data
+<a><tr>
+#errors
+(1,3): unexpected-start-tag-implies-table-voodoo
+#document-fragment
+tbody
+#document
+| <a>
+| <tr>
+
+#data
+<a><td>
+#errors
+(1,3): unexpected-start-tag-implies-table-voodoo
+(1,7): unexpected-cell-in-table-body
+#document-fragment
+tbody
+#document
+| <a>
+| <tr>
+|   <td>
+
+#data
+<a><td>
+#errors
+(1,3): unexpected-start-tag-implies-table-voodoo
+(1,7): unexpected-cell-in-table-body
+#document-fragment
+tbody
+#document
+| <a>
+| <tr>
+|   <td>
+
+#data
+<a><td>
+#errors
+(1,3): unexpected-start-tag-implies-table-voodoo
+(1,7): unexpected-cell-in-table-body
+#document-fragment
+tbody
+#document
+| <a>
+| <tr>
+|   <td>
+
+#data
+<td><table><tbody><a><tr>
+#errors
+(1,4): unexpected-cell-in-table-body
+(1,21): unexpected-start-tag-implies-table-voodoo
+(1,25): eof-in-table
+#document-fragment
+tbody
+#document
+| <tr>
+|   <td>
+|     <a>
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+</tr><td>
+#errors
+(1,5): XXX-undefined-error
+#document-fragment
+tr
+#document
+| <td>
+
+#data
+<td><table><a><tr></tr><tr>
+#errors
+(1,14): unexpected-start-tag-implies-table-voodoo
+(1,27): eof-in-table
+#document-fragment
+tr
+#document
+| <td>
+|   <a>
+|   <table>
+|     <tbody>
+|       <tr>
+|       <tr>
+
+#data
+<caption><td>
+#errors
+(1,9): XXX-undefined-error
+#document-fragment
+tr
+#document
+| <td>
+
+#data
+<col><td>
+#errors
+(1,5): XXX-undefined-error
+#document-fragment
+tr
+#document
+| <td>
+
+#data
+<colgroup><td>
+#errors
+(1,10): XXX-undefined-error
+#document-fragment
+tr
+#document
+| <td>
+
+#data
+<tbody><td>
+#errors
+(1,7): XXX-undefined-error
+#document-fragment
+tr
+#document
+| <td>
+
+#data
+<tfoot><td>
+#errors
+(1,7): XXX-undefined-error
+#document-fragment
+tr
+#document
+| <td>
+
+#data
+<thead><td>
+#errors
+(1,7): XXX-undefined-error
+#document-fragment
+tr
+#document
+| <td>
+
+#data
+<tr><td>
+#errors
+(1,4): XXX-undefined-error
+#document-fragment
+tr
+#document
+| <td>
+
+#data
+</table><td>
+#errors
+(1,8): XXX-undefined-error
+#document-fragment
+tr
+#document
+| <td>
+
+#data
+<td><table></table><td>
+#errors
+#document-fragment
+tr
+#document
+| <td>
+|   <table>
+| <td>
+
+#data
+<td><table></table><td>
+#errors
+#document-fragment
+tr
+#document
+| <td>
+|   <table>
+| <td>
+
+#data
+<caption><a>
+#errors
+(1,9): XXX-undefined-error
+(1,12): expected-closing-tag-but-got-eof
+#document-fragment
+td
+#document
+| <a>
+
+#data
+<col><a>
+#errors
+(1,5): XXX-undefined-error
+(1,8): expected-closing-tag-but-got-eof
+#document-fragment
+td
+#document
+| <a>
+
+#data
+<colgroup><a>
+#errors
+(1,10): XXX-undefined-error
+(1,13): expected-closing-tag-but-got-eof
+#document-fragment
+td
+#document
+| <a>
+
+#data
+<tbody><a>
+#errors
+(1,7): XXX-undefined-error
+(1,10): expected-closing-tag-but-got-eof
+#document-fragment
+td
+#document
+| <a>
+
+#data
+<tfoot><a>
+#errors
+(1,7): XXX-undefined-error
+(1,10): expected-closing-tag-but-got-eof
+#document-fragment
+td
+#document
+| <a>
+
+#data
+<th><a>
+#errors
+(1,4): XXX-undefined-error
+(1,7): expected-closing-tag-but-got-eof
+#document-fragment
+td
+#document
+| <a>
+
+#data
+<thead><a>
+#errors
+(1,7): XXX-undefined-error
+(1,10): expected-closing-tag-but-got-eof
+#document-fragment
+td
+#document
+| <a>
+
+#data
+<tr><a>
+#errors
+(1,4): XXX-undefined-error
+(1,7): expected-closing-tag-but-got-eof
+#document-fragment
+td
+#document
+| <a>
+
+#data
+</table><a>
+#errors
+(1,8): XXX-undefined-error
+(1,11): expected-closing-tag-but-got-eof
+#document-fragment
+td
+#document
+| <a>
+
+#data
+</tbody><a>
+#errors
+(1,8): XXX-undefined-error
+(1,11): expected-closing-tag-but-got-eof
+#document-fragment
+td
+#document
+| <a>
+
+#data
+</td><a>
+#errors
+(1,5): unexpected-end-tag
+(1,8): expected-closing-tag-but-got-eof
+#document-fragment
+td
+#document
+| <a>
+
+#data
+</tfoot><a>
+#errors
+(1,8): XXX-undefined-error
+(1,11): expected-closing-tag-but-got-eof
+#document-fragment
+td
+#document
+| <a>
+
+#data
+</thead><a>
+#errors
+(1,8): XXX-undefined-error
+(1,11): expected-closing-tag-but-got-eof
+#document-fragment
+td
+#document
+| <a>
+
+#data
+</th><a>
+#errors
+(1,5): unexpected-end-tag
+(1,8): expected-closing-tag-but-got-eof
+#document-fragment
+td
+#document
+| <a>
+
+#data
+</tr><a>
+#errors
+(1,5): XXX-undefined-error
+(1,8): expected-closing-tag-but-got-eof
+#document-fragment
+td
+#document
+| <a>
+
+#data
+<table><td><td>
+#errors
+(1,11): unexpected-cell-in-table-body
+(1,15): expected-closing-tag-but-got-eof
+#document-fragment
+td
+#document
+| <table>
+|   <tbody>
+|     <tr>
+|       <td>
+|       <td>
+
+#data
+</select><option>
+#errors
+(1,9): XXX-undefined-error
+#document-fragment
+select
+#document
+| <option>
+
+#data
+<input><option>
+#errors
+(1,7): unexpected-input-in-select
+#document-fragment
+select
+#document
+| <option>
+
+#data
+<keygen><option>
+#errors
+(1,8): unexpected-input-in-select
+#document-fragment
+select
+#document
+| <option>
+
+#data
+<textarea><option>
+#errors
+(1,10): unexpected-input-in-select
+#document-fragment
+select
+#document
+| <option>
+
+#data
+</html><!--abc-->
+#errors
+(1,7): unexpected-end-tag-after-body-innerhtml
+#document-fragment
+html
+#document
+| <head>
+| <body>
+| <!-- abc -->
+
+#data
+</frameset><frame>
+#errors
+(1,11): unexpected-frameset-in-frameset-innerhtml
+#document-fragment
+frameset
+#document
+| <frame>
+
+#data
+#errors
+#document-fragment
+html
+#document
+| <head>
+| <body>
diff --git a/internal/html/testdata/webkit/tricky01.dat b/internal/html/testdata/webkit/tricky01.dat
new file mode 100644 (file)
index 0000000..753502a
--- /dev/null
@@ -0,0 +1,336 @@
+#data
+<b><p>Bold </b> Not bold</p>
+Also not bold.
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,15): adoption-agency-1.3
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|     <p>
+|       <b>
+|         "Bold "
+|       " Not bold"
+|     "
+Also not bold."
+
+#data
+<html>
+<font color=red><i>Italic and Red<p>Italic and Red </font> Just italic.</p> Italic only.</i> Plain
+<p>I should not be red. <font color=red>Red. <i>Italic and red.</p>
+<p>Italic and red. </i> Red.</font> I should not be red.</p>
+<b>Bold <i>Bold and italic</b> Only Italic </i> Plain
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(2,58): adoption-agency-1.3
+(3,67): unexpected-end-tag
+(4,23): adoption-agency-1.3
+(4,35): adoption-agency-1.3
+(5,30): adoption-agency-1.3
+#document
+| <html>
+|   <head>
+|   <body>
+|     <font>
+|       color="red"
+|       <i>
+|         "Italic and Red"
+|     <i>
+|       <p>
+|         <font>
+|           color="red"
+|           "Italic and Red "
+|         " Just italic."
+|       " Italic only."
+|     " Plain
+"
+|     <p>
+|       "I should not be red. "
+|       <font>
+|         color="red"
+|         "Red. "
+|         <i>
+|           "Italic and red."
+|     <font>
+|       color="red"
+|       <i>
+|         "
+"
+|     <p>
+|       <font>
+|         color="red"
+|         <i>
+|           "Italic and red. "
+|         " Red."
+|       " I should not be red."
+|     "
+"
+|     <b>
+|       "Bold "
+|       <i>
+|         "Bold and italic"
+|     <i>
+|       " Only Italic "
+|     " Plain"
+
+#data
+<html><body>
+<p><font size="7">First paragraph.</p>
+<p>Second paragraph.</p></font>
+<b><p><i>Bold and Italic</b> Italic</p>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(2,38): unexpected-end-tag
+(4,28): adoption-agency-1.3
+(4,28): adoption-agency-1.3
+(4,39): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     "
+"
+|     <p>
+|       <font>
+|         size="7"
+|         "First paragraph."
+|     <font>
+|       size="7"
+|       "
+"
+|       <p>
+|         "Second paragraph."
+|     "
+"
+|     <b>
+|     <p>
+|       <b>
+|         <i>
+|           "Bold and Italic"
+|       <i>
+|         " Italic"
+
+#data
+<html>
+<dl>
+<dt><b>Boo
+<dd>Goo?
+</dl>
+</html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(4,4): end-tag-too-early
+(5,5): end-tag-too-early
+(6,7): expected-one-end-tag-but-got-another
+#document
+| <html>
+|   <head>
+|   <body>
+|     <dl>
+|       "
+"
+|       <dt>
+|         <b>
+|           "Boo
+"
+|       <dd>
+|         <b>
+|           "Goo?
+"
+|     <b>
+|       "
+"
+
+#data
+<html><body>
+<label><a><div>Hello<div>World</div></a></label>  
+</body></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(2,40): adoption-agency-1.3
+(2,48): unexpected-end-tag
+(3,7): expected-one-end-tag-but-got-another
+#document
+| <html>
+|   <head>
+|   <body>
+|     "
+"
+|     <label>
+|       <a>
+|       <div>
+|         <a>
+|           "Hello"
+|           <div>
+|             "World"
+|         "  
+"
+
+#data
+<table><center> <font>a</center> <img> <tr><td> </td> </tr> </table>
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,15): foster-parenting-start-tag
+(1,16): foster-parenting-character
+(1,22): foster-parenting-start-tag
+(1,23): foster-parenting-character
+(1,32): foster-parenting-end-tag
+(1,32): end-tag-too-early
+(1,33): foster-parenting-character
+(1,38): foster-parenting-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <center>
+|       " "
+|       <font>
+|         "a"
+|     <font>
+|       <img>
+|       " "
+|     <table>
+|       " "
+|       <tbody>
+|         <tr>
+|           <td>
+|             " "
+|           " "
+|         " "
+
+#data
+<table><tr><p><a><p>You should see this text.
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,14): unexpected-start-tag-implies-table-voodoo
+(1,17): unexpected-start-tag-implies-table-voodoo
+(1,20): unexpected-start-tag-implies-table-voodoo
+(1,20): closing-non-current-p-element
+(1,21): foster-parenting-character
+(1,22): foster-parenting-character
+(1,23): foster-parenting-character
+(1,24): foster-parenting-character
+(1,25): foster-parenting-character
+(1,26): foster-parenting-character
+(1,27): foster-parenting-character
+(1,28): foster-parenting-character
+(1,29): foster-parenting-character
+(1,30): foster-parenting-character
+(1,31): foster-parenting-character
+(1,32): foster-parenting-character
+(1,33): foster-parenting-character
+(1,34): foster-parenting-character
+(1,35): foster-parenting-character
+(1,36): foster-parenting-character
+(1,37): foster-parenting-character
+(1,38): foster-parenting-character
+(1,39): foster-parenting-character
+(1,40): foster-parenting-character
+(1,41): foster-parenting-character
+(1,42): foster-parenting-character
+(1,43): foster-parenting-character
+(1,44): foster-parenting-character
+(1,45): foster-parenting-character
+(1,45): eof-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       <a>
+|     <p>
+|       <a>
+|         "You should see this text."
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+<TABLE>
+<TR>
+<CENTER><CENTER><TD></TD></TR><TR>
+<FONT>
+<TABLE><tr></tr></TABLE>
+</P>
+<a></font><font></a>
+This page contains an insanely badly-nested tag sequence.
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(3,8): unexpected-start-tag-implies-table-voodoo
+(3,16): unexpected-start-tag-implies-table-voodoo
+(4,6): unexpected-start-tag-implies-table-voodoo
+(4,6): unexpected character token in table (the newline)
+(5,7): unexpected-start-tag-implies-end-tag
+(6,4): unexpected p end tag
+(7,10): adoption-agency-1.3
+(7,20): adoption-agency-1.3
+(8,57): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <center>
+|       <center>
+|     <font>
+|       "
+"
+|     <table>
+|       "
+"
+|       <tbody>
+|         <tr>
+|           "
+"
+|           <td>
+|         <tr>
+|           "
+"
+|     <table>
+|       <tbody>
+|         <tr>
+|     <font>
+|       "
+"
+|       <p>
+|       "
+"
+|       <a>
+|     <a>
+|       <font>
+|     <font>
+|       "
+This page contains an insanely badly-nested tag sequence."
+
+#data
+<html>
+<body>
+<b><nobr><div>This text is in a div inside a nobr</nobr>More text that should not be in the nobr, i.e., the
+nobr should have closed the div inside it implicitly. </b><pre>A pre tag outside everything else.</pre>
+</body>
+</html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(3,56): adoption-agency-1.3
+(4,58): adoption-agency-1.3
+(5,7): expected-one-end-tag-but-got-another
+#document
+| <html>
+|   <head>
+|   <body>
+|     "
+"
+|     <b>
+|       <nobr>
+|     <div>
+|       <b>
+|         <nobr>
+|           "This text is in a div inside a nobr"
+|         "More text that should not be in the nobr, i.e., the
+nobr should have closed the div inside it implicitly. "
+|       <pre>
+|         "A pre tag outside everything else."
+|       "
+
+"
diff --git a/internal/html/testdata/webkit/webkit01.dat b/internal/html/testdata/webkit/webkit01.dat
new file mode 100644 (file)
index 0000000..2127cfe
--- /dev/null
@@ -0,0 +1,755 @@
+#data
+Test
+#errors
+(1,4): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "Test"
+
+#data
+<div></div>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+
+#data
+<div>Test</div>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "Test"
+
+#data
+<di
+#errors
+(1,3): eof-in-tag-name
+(1,3): expected-doctype-but-got-eof
+#new-errors
+(1:4) eof-in-tag
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<div>Hello</div>
+<script>
+console.log("PASS");
+</script>
+<div>Bye</div>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "Hello"
+|     "
+"
+|     <script>
+|       "
+console.log("PASS");
+"
+|     "
+"
+|     <div>
+|       "Bye"
+
+#data
+<div foo="bar">Hello</div>
+#errors
+(1,15): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       foo="bar"
+|       "Hello"
+
+#data
+<div>Hello</div>
+<script>
+console.log("FOO<span>BAR</span>BAZ");
+</script>
+<div>Bye</div>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "Hello"
+|     "
+"
+|     <script>
+|       "
+console.log("FOO<span>BAR</span>BAZ");
+"
+|     "
+"
+|     <div>
+|       "Bye"
+
+#data
+<foo bar="baz"></foo><potato quack="duck"></potato>
+#errors
+(1,15): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <foo>
+|       bar="baz"
+|     <potato>
+|       quack="duck"
+
+#data
+<foo bar="baz"><potato quack="duck"></potato></foo>
+#errors
+(1,15): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <foo>
+|       bar="baz"
+|       <potato>
+|         quack="duck"
+
+#data
+<foo></foo bar="baz"><potato></potato quack="duck">
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,21): attributes-in-end-tag
+(1,51): attributes-in-end-tag
+#new-errors
+(1:21) end-tag-with-attributes
+(1:51) end-tag-with-attributes
+#document
+| <html>
+|   <head>
+|   <body>
+|     <foo>
+|     <potato>
+
+#data
+</ tttt>
+#errors
+(1,2): expected-closing-tag-but-got-char
+(1,8): expected-doctype-but-got-eof
+#new-errors
+(1:3) invalid-first-character-of-tag-name
+#document
+| <!--  tttt -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<div FOO ><img><img></div>
+#errors
+(1,10): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       foo=""
+|       <img>
+|       <img>
+
+#data
+<p>Test</p<p>Test2</p>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,13): unexpected-end-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "TestTest2"
+
+#data
+<rdar://problem/6869687>
+#errors
+(1,7): unexpected-character-after-solidus-in-tag
+(1,8): unexpected-character-after-solidus-in-tag
+(1,16): unexpected-character-after-solidus-in-tag
+(1,24): expected-doctype-but-got-start-tag
+(1,24): expected-closing-tag-but-got-eof
+#new-errors
+(1:8) unexpected-solidus-in-tag
+(1:9) unexpected-solidus-in-tag
+(1:17) unexpected-solidus-in-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <rdar:>
+|       6869687=""
+|       problem=""
+
+#data
+<A>test< /A>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,8): expected-tag-name
+(1,12): expected-closing-tag-but-got-eof
+#new-errors
+(1:9) invalid-first-character-of-tag-name
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       "test< /A>"
+
+#data
+&lt;
+#errors
+(1,4): expected-doctype-but-got-chars
+#document
+| <html>
+|   <head>
+|   <body>
+|     "<"
+
+#data
+<body foo='bar'><body foo='baz' yo='mama'>
+#errors
+(1,16): expected-doctype-but-got-start-tag
+(1,42): unexpected-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     foo="bar"
+|     yo="mama"
+
+#data
+<body></br foo="bar"></body>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,21): attributes-in-end-tag
+(1,21): unexpected-end-tag-treated-as
+#new-errors
+(1:21) end-tag-with-attributes
+#document
+| <html>
+|   <head>
+|   <body>
+|     <br>
+
+#data
+<bdy><br foo="bar"></body>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,26): expected-one-end-tag-but-got-another
+#document
+| <html>
+|   <head>
+|   <body>
+|     <bdy>
+|       <br>
+|         foo="bar"
+
+#data
+<body></body></br foo="bar">
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,28): attributes-in-end-tag
+(1,28): unexpected-end-tag-after-body
+(1,28): unexpected-end-tag-treated-as
+#new-errors
+(1:28) end-tag-with-attributes
+#document
+| <html>
+|   <head>
+|   <body>
+|     <br>
+
+#data
+<bdy></body><br foo="bar">
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,12): expected-one-end-tag-but-got-another
+(1,26): unexpected-start-tag-after-body
+(1,26): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <bdy>
+|       <br>
+|         foo="bar"
+
+#data
+<html><body></body></html><!-- Hi there -->
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+| <!--  Hi there  -->
+
+#data
+<html><body></body></html><!-- Comment A --><!-- Comment B --><!-- Comment C --><!-- Comment D --><!-- Comment E -->
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+| <!--  Comment A  -->
+| <!--  Comment B  -->
+| <!--  Comment C  -->
+| <!--  Comment D  -->
+| <!--  Comment E  -->
+
+#data
+<html><body></body></html>x<!-- Hi there -->
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,27): expected-eof-but-got-char
+#document
+| <html>
+|   <head>
+|   <body>
+|     "x"
+|     <!--  Hi there  -->
+
+#data
+<html><body></body></html>x<!-- Hi there --></html><!-- Again -->
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,27): expected-eof-but-got-char
+#document
+| <html>
+|   <head>
+|   <body>
+|     "x"
+|     <!--  Hi there  -->
+| <!--  Again  -->
+
+#data
+<html><body></body></html>x<!-- Hi there --></body></html><!-- Again -->
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,27): expected-eof-but-got-char
+#document
+| <html>
+|   <head>
+|   <body>
+|     "x"
+|     <!--  Hi there  -->
+| <!--  Again  -->
+
+#data
+<html><body><ruby><div><rp>xx</rp></div></ruby></body></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,27): XXX-undefined-error
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       <div>
+|         <rp>
+|           "xx"
+
+#data
+<html><body><ruby><div><rt>xx</rt></div></ruby></body></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,27): XXX-undefined-error
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       <div>
+|         <rt>
+|           "xx"
+
+#data
+<html><frameset><!--1--><noframes>A</noframes><!--2--></frameset><!--3--><noframes>B</noframes><!--4--></html><!--5--><noframes>C</noframes><!--6-->
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <frameset>
+|     <!-- 1 -->
+|     <noframes>
+|       "A"
+|     <!-- 2 -->
+|   <!-- 3 -->
+|   <noframes>
+|     "B"
+|   <!-- 4 -->
+|   <noframes>
+|     "C"
+| <!-- 5 -->
+| <!-- 6 -->
+
+#data
+<select><option>A<select><option>B<select><option>C<select><option>D<select><option>E<select><option>F<select><option>G<select>
+#errors
+(1,8): expected-doctype-but-got-start-tag
+(1,25): unexpected-select-in-select
+(1,59): unexpected-select-in-select
+(1,93): unexpected-select-in-select
+(1,127): unexpected-select-in-select
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+|         "A"
+|     <option>
+|       "B"
+|       <select>
+|         <option>
+|           "C"
+|     <option>
+|       "D"
+|       <select>
+|         <option>
+|           "E"
+|     <option>
+|       "F"
+|       <select>
+|         <option>
+|           "G"
+
+#data
+<dd><dd><dt><dt><dd><li><li>
+#errors
+(1,4): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <dd>
+|     <dd>
+|     <dt>
+|     <dt>
+|     <dd>
+|       <li>
+|       <li>
+
+#data
+<div><b></div><div><nobr>a<nobr>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,14): end-tag-too-early
+(1,32): unexpected-start-tag-implies-end-tag
+(1,32): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <b>
+|     <div>
+|       <b>
+|         <nobr>
+|           "a"
+|         <nobr>
+
+#data
+<head></head>
+<body></body>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   "
+"
+|   <body>
+
+#data
+<head></head> <style></style>ddd
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,21): unexpected-start-tag-out-of-my-head
+#document
+| <html>
+|   <head>
+|     <style>
+|   " "
+|   <body>
+|     "ddd"
+
+#data
+<kbd><table></kbd><col><select><tr>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,18): unexpected-end-tag-implies-table-voodoo
+(1,18): unexpected-end-tag
+(1,31): unexpected-start-tag-implies-table-voodoo
+(1,35): unexpected-table-element-start-tag-in-select-in-table
+(1,35): eof-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     <kbd>
+|       <select>
+|       <table>
+|         <colgroup>
+|           <col>
+|         <tbody>
+|           <tr>
+
+#data
+<kbd><table></kbd><col><select><tr></table><div>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,18): unexpected-end-tag-implies-table-voodoo
+(1,18): unexpected-end-tag
+(1,31): unexpected-start-tag-implies-table-voodoo
+(1,35): unexpected-table-element-start-tag-in-select-in-table
+(1,48): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <kbd>
+|       <select>
+|       <table>
+|         <colgroup>
+|           <col>
+|         <tbody>
+|           <tr>
+|       <div>
+
+#data
+<a><li><style></style><title></title></a>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,41): adoption-agency-1.3
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|     <li>
+|       <a>
+|         <style>
+|         <title>
+
+#data
+<font></p><p><meta><title></title></font>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,10): unexpected-end-tag
+(1,41): adoption-agency-1.3
+#document
+| <html>
+|   <head>
+|   <body>
+|     <font>
+|       <p>
+|     <p>
+|       <font>
+|         <meta>
+|         <title>
+
+#data
+<a><center><title></title><a>
+#errors
+(1,3): expected-doctype-but-got-start-tag
+(1,29): unexpected-start-tag-implies-end-tag
+(1,29): adoption-agency-1.3
+(1,29): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|     <center>
+|       <a>
+|         <title>
+|       <a>
+
+#data
+<svg><title><div>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,17): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg title>
+|         <div>
+
+#data
+<svg><title><rect><div>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,23): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg title>
+|         <rect>
+|           <div>
+
+#data
+<svg><title><svg><div>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,22): unexpected-html-element-in-foreign-content
+(1,22): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg title>
+|         <svg svg>
+|         <div>
+
+#data
+<img <="" FAIL>
+#errors
+(1,6): invalid-character-in-attribute-name
+(1,15): expected-doctype-but-got-start-tag
+#new-errors
+(1:6) unexpected-character-in-attribute-name
+#document
+| <html>
+|   <head>
+|   <body>
+|     <img>
+|       <=""
+|       fail=""
+
+#data
+<ul><li><div id='foo'/>A</li><li>B<div>C</div></li></ul>
+#errors
+(1,4): expected-doctype-but-got-start-tag
+(1,23): non-void-element-with-trailing-solidus
+(1,29): end-tag-too-early
+#new-errors
+(1:9-1:24) non-void-html-element-start-tag-with-trailing-solidus
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ul>
+|       <li>
+|         <div>
+|           id="foo"
+|           "A"
+|       <li>
+|         "B"
+|         <div>
+|           "C"
+
+#data
+<svg><em><desc></em>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,9): unexpected-html-element-in-foreign-content
+(1,20): adoption-agency-1.3
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|     <em>
+|       <desc>
+
+#data
+<table><tr><td><svg><desc><td></desc><circle>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <svg svg>
+|               <svg desc>
+|           <td>
+|             <circle>
+
+#data
+<svg><tfoot></mi><td>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+(1,17): unexpected-end-tag
+(1,17): unexpected-end-tag
+(1,21): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg tfoot>
+|         <svg td>
+
+#data
+<math><mrow><mrow><mn>1</mn></mrow><mi>a</mi></mrow></math>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mrow>
+|         <math mrow>
+|           <math mn>
+|             "1"
+|         <math mi>
+|           "a"
+
+#data
+<!doctype html><input type="hidden"><frameset>
+#errors
+(1,46): unexpected-start-tag
+(1,46): eof-in-frameset
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!doctype html><input type="button"><frameset>
+#errors
+(1,46): unexpected-start-tag
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <input>
+|       type="button"
diff --git a/internal/html/testdata/webkit/webkit02.dat b/internal/html/testdata/webkit/webkit02.dat
new file mode 100644 (file)
index 0000000..791991d
--- /dev/null
@@ -0,0 +1,303 @@
+#data
+<foo bar=qux/>
+#errors
+(1,14): expected-doctype-but-got-start-tag
+(1,14): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <foo>
+|       bar="qux/"
+
+#data
+<p id="status"><noscript><strong>A</strong></noscript><span>B</span></p>
+#errors
+(1,15): expected-doctype-but-got-start-tag
+#script-on
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       id="status"
+|       <noscript>
+|         "<strong>A</strong>"
+|       <span>
+|         "B"
+
+#data
+<p id="status"><noscript><strong>A</strong></noscript><span>B</span></p>
+#errors
+(1,15): expected-doctype-but-got-start-tag
+#script-off
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       id="status"
+|       <noscript>
+|         <strong>
+|           "A"
+|       <span>
+|         "B"
+
+#data
+<div><sarcasm><div></div></sarcasm></div>
+#errors
+(1,5): expected-doctype-but-got-start-tag
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <sarcasm>
+|         <div>
+
+#data
+<html><body><img src="" border="0" alt="><div>A</div></body></html>
+#errors
+(1,6): expected-doctype-but-got-start-tag
+(1,67): eof-in-attribute-value-double-quote
+#new-errors
+(1:68) eof-in-tag
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<table><td></tbody>A
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,11): unexpected-cell-in-table-body
+(1,20): foster-parenting-character
+(1,20): eof-in-table
+#document
+| <html>
+|   <head>
+|   <body>
+|     "A"
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+
+#data
+<table><td></thead>A
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,11): unexpected-cell-in-table-body
+(1,19): XXX-undefined-error
+(1,20): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             "A"
+
+#data
+<table><td></tfoot>A
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,11): unexpected-cell-in-table-body
+(1,19): XXX-undefined-error
+(1,20): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             "A"
+
+#data
+<table><thead><td></tbody>A
+#errors
+(1,7): expected-doctype-but-got-start-tag
+(1,18): unexpected-cell-in-table-body
+(1,26): XXX-undefined-error
+(1,27): expected-closing-tag-but-got-eof
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <thead>
+|         <tr>
+|           <td>
+|             "A"
+
+#data
+<legend>test</legend>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <legend>
+|       "test"
+
+#data
+<table><input>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <input>
+|     <table>
+
+#data
+<b><em><foo><foo><aside></b>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <em>
+|         <foo>
+|           <foo>
+|     <em>
+|       <aside>
+|         <b>
+
+#data
+<b><em><foo><foo><aside></b></em>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <em>
+|         <foo>
+|           <foo>
+|     <em>
+|     <aside>
+|       <em>
+|         <b>
+
+#data
+<b><em><foo><foo><foo><aside></b>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <em>
+|         <foo>
+|           <foo>
+|             <foo>
+|     <aside>
+|       <b>
+
+#data
+<b><em><foo><foo><foo><aside></b></em>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <em>
+|         <foo>
+|           <foo>
+|             <foo>
+|     <aside>
+|       <b>
+
+#data
+<b><em><foo><foo><foo><foo><foo><foo><foo><foo><foo><foo><aside></b></em>
+#errors
+#document-fragment
+div
+#document
+| <b>
+|   <em>
+|     <foo>
+|       <foo>
+|         <foo>
+|           <foo>
+|             <foo>
+|               <foo>
+|                 <foo>
+|                   <foo>
+|                     <foo>
+|                       <foo>
+| <aside>
+|   <b>
+
+#data
+<b><em><foo><foob><foob><foob><foob><fooc><fooc><fooc><fooc><food><aside></b></em>
+#errors
+#document-fragment
+div
+#document
+| <b>
+|   <em>
+|     <foo>
+|       <foob>
+|         <foob>
+|           <foob>
+|             <foob>
+|               <fooc>
+|                 <fooc>
+|                   <fooc>
+|                     <fooc>
+|                       <food>
+| <aside>
+|   <b>
+
+#data
+<option><XH<optgroup></optgroup>
+#errors
+#document-fragment
+select
+#document
+| <option>
+
+#data
+<svg><foreignObject><div>foo</div><plaintext></foreignObject></svg><div>bar</div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg foreignObject>
+|         <div>
+|           "foo"
+|         <plaintext>
+|           "</foreignObject></svg><div>bar</div>"
+
+#data
+<svg><foreignObject></foreignObject><title></svg>foo
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg foreignObject>
+|       <svg title>
+|     "foo"
+
+#data
+</foreignObject><plaintext><div>foo</div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "<div>foo</div>"
diff --git a/internal/html/token.go b/internal/html/token.go
new file mode 100644 (file)
index 0000000..a4861bd
--- /dev/null
@@ -0,0 +1,1272 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+       "bytes"
+       "errors"
+       "io"
+       "strconv"
+       "strings"
+
+       "git.earlybird.gay/today-engine/internal/html/atom"
+)
+
+// A TokenType is the type of a Token.
+type TokenType uint32
+
+const (
+       // ErrorToken means that an error occurred during tokenization.
+       ErrorToken TokenType = iota
+       // TextToken means a text node.
+       TextToken
+       // A StartTagToken looks like <a>.
+       StartTagToken
+       // An EndTagToken looks like </a>.
+       EndTagToken
+       // A SelfClosingTagToken tag looks like <br/>.
+       SelfClosingTagToken
+       // A CommentToken looks like <!--x-->.
+       CommentToken
+       // A DoctypeToken looks like <!DOCTYPE x>
+       DoctypeToken
+)
+
+// ErrBufferExceeded means that the buffering limit was exceeded.
+var ErrBufferExceeded = errors.New("max buffer exceeded")
+
+// String returns a string representation of the TokenType.
+func (t TokenType) String() string {
+       switch t {
+       case ErrorToken:
+               return "Error"
+       case TextToken:
+               return "Text"
+       case StartTagToken:
+               return "StartTag"
+       case EndTagToken:
+               return "EndTag"
+       case SelfClosingTagToken:
+               return "SelfClosingTag"
+       case CommentToken:
+               return "Comment"
+       case DoctypeToken:
+               return "Doctype"
+       }
+       return "Invalid(" + strconv.Itoa(int(t)) + ")"
+}
+
+// An Attribute is an attribute namespace-key-value triple. Namespace is
+// non-empty for foreign attributes like xlink, Key is alphabetic (and hence
+// does not contain escapable characters like '&', '<' or '>'), and Val is
+// unescaped (it looks like "a<b" rather than "a&lt;b").
+//
+// Namespace is only used by the parser, not the tokenizer.
+type Attribute struct {
+       Namespace, Key, Val string
+}
+
+// A Token consists of a TokenType and some Data (tag name for start and end
+// tags, content for text, comments and doctypes). A tag Token may also contain
+// a slice of Attributes. Data is unescaped for all Tokens (it looks like "a<b"
+// rather than "a&lt;b"). For tag Tokens, DataAtom is the atom for Data, or
+// zero if Data is not a known tag name.
+type Token struct {
+       Type     TokenType
+       DataAtom atom.Atom
+       Data     string
+       Attr     []Attribute
+}
+
+// tagString returns a string representation of a tag Token's Data and Attr.
+func (t Token) tagString() string {
+       if len(t.Attr) == 0 {
+               return t.Data
+       }
+       buf := bytes.NewBufferString(t.Data)
+       for _, a := range t.Attr {
+               buf.WriteByte(' ')
+               buf.WriteString(a.Key)
+               buf.WriteString(`="`)
+               escape(buf, a.Val)
+               buf.WriteByte('"')
+       }
+       return buf.String()
+}
+
+// String returns a string representation of the Token.
+func (t Token) String() string {
+       switch t.Type {
+       case ErrorToken:
+               return ""
+       case TextToken:
+               return EscapeString(t.Data)
+       case StartTagToken:
+               return "<" + t.tagString() + ">"
+       case EndTagToken:
+               return "</" + t.tagString() + ">"
+       case SelfClosingTagToken:
+               return "<" + t.tagString() + "/>"
+       case CommentToken:
+               return "<!--" + escapeCommentString(t.Data) + "-->"
+       case DoctypeToken:
+               return "<!DOCTYPE " + EscapeString(t.Data) + ">"
+       }
+       return "Invalid(" + strconv.Itoa(int(t.Type)) + ")"
+}
+
+// span is a range of bytes in a Tokenizer's buffer. The start is inclusive,
+// the end is exclusive.
+type span struct {
+       start, end int
+}
+
+// A Tokenizer returns a stream of HTML Tokens.
+type Tokenizer struct {
+       // r is the source of the HTML text.
+       r io.Reader
+       // tt is the TokenType of the current token.
+       tt TokenType
+       // err is the first error encountered during tokenization. It is possible
+       // for tt != Error && err != nil to hold: this means that Next returned a
+       // valid token but the subsequent Next call will return an error token.
+       // For example, if the HTML text input was just "plain", then the first
+       // Next call would set z.err to io.EOF but return a TextToken, and all
+       // subsequent Next calls would return an ErrorToken.
+       // err is never reset. Once it becomes non-nil, it stays non-nil.
+       err error
+       // readErr is the error returned by the io.Reader r. It is separate from
+       // err because it is valid for an io.Reader to return (n int, err1 error)
+       // such that n > 0 && err1 != nil, and callers should always process the
+       // n > 0 bytes before considering the error err1.
+       readErr error
+       // buf[raw.start:raw.end] holds the raw bytes of the current token.
+       // buf[raw.end:] is buffered input that will yield future tokens.
+       raw span
+       buf []byte
+       // maxBuf limits the data buffered in buf. A value of 0 means unlimited.
+       maxBuf int
+       // buf[data.start:data.end] holds the raw bytes of the current token's data:
+       // a text token's text, a tag token's tag name, etc.
+       data span
+       // pendingAttr is the attribute key and value currently being tokenized.
+       // When complete, pendingAttr is pushed onto attr. nAttrReturned is
+       // incremented on each call to TagAttr.
+       pendingAttr   [2]span
+       attr          [][2]span
+       nAttrReturned int
+       // rawTag is the "script" in "</script>" that closes the next token. If
+       // non-empty, the subsequent call to Next will return a raw or RCDATA text
+       // token: one that treats "<p>" as text instead of an element.
+       // rawTag's contents are lower-cased.
+       rawTag string
+       // textIsRaw is whether the current text token's data is not escaped.
+       textIsRaw bool
+       // convertNUL is whether NUL bytes in the current token's data should
+       // be converted into \ufffd replacement characters.
+       convertNUL bool
+       // allowCDATA is whether CDATA sections are allowed in the current context.
+       allowCDATA bool
+}
+
+// AllowCDATA sets whether or not the tokenizer recognizes <![CDATA[foo]]> as
+// the text "foo". The default value is false, which means to recognize it as
+// a bogus comment "<!-- [CDATA[foo]] -->" instead.
+//
+// Strictly speaking, an HTML5 compliant tokenizer should allow CDATA if and
+// only if tokenizing foreign content, such as MathML and SVG. However,
+// tracking foreign-contentness is difficult to do purely in the tokenizer,
+// as opposed to the parser, due to HTML integration points: an <svg> element
+// can contain a <foreignObject> that is foreign-to-SVG but not foreign-to-
+// HTML. For strict compliance with the HTML5 tokenization algorithm, it is the
+// responsibility of the user of a tokenizer to call AllowCDATA as appropriate.
+// In practice, if using the tokenizer without caring whether MathML or SVG
+// CDATA is text or comments, such as tokenizing HTML to find all the anchor
+// text, it is acceptable to ignore this responsibility.
+func (z *Tokenizer) AllowCDATA(allowCDATA bool) {
+       z.allowCDATA = allowCDATA
+}
+
+// NextIsNotRawText instructs the tokenizer that the next token should not be
+// considered as 'raw text'. Some elements, such as script and title elements,
+// normally require the next token after the opening tag to be 'raw text' that
+// has no child elements. For example, tokenizing "<title>a<b>c</b>d</title>"
+// yields a start tag token for "<title>", a text token for "a<b>c</b>d", and
+// an end tag token for "</title>". There are no distinct start tag or end tag
+// tokens for the "<b>" and "</b>".
+//
+// This tokenizer implementation will generally look for raw text at the right
+// times. Strictly speaking, an HTML5 compliant tokenizer should not look for
+// raw text if in foreign content: <title> generally needs raw text, but a
+// <title> inside an <svg> does not. Another example is that a <textarea>
+// generally needs raw text, but a <textarea> is not allowed as an immediate
+// child of a <select>; in normal parsing, a <textarea> implies </select>, but
+// one cannot close the implicit element when parsing a <select>'s InnerHTML.
+// Similarly to AllowCDATA, tracking the correct moment to override raw-text-
+// ness is difficult to do purely in the tokenizer, as opposed to the parser.
+// For strict compliance with the HTML5 tokenization algorithm, it is the
+// responsibility of the user of a tokenizer to call NextIsNotRawText as
+// appropriate. In practice, like AllowCDATA, it is acceptable to ignore this
+// responsibility for basic usage.
+//
+// Note that this 'raw text' concept is different from the one offered by the
+// Tokenizer.Raw method.
+func (z *Tokenizer) NextIsNotRawText() {
+       z.rawTag = ""
+}
+
+// Err returns the error associated with the most recent ErrorToken token.
+// This is typically io.EOF, meaning the end of tokenization.
+func (z *Tokenizer) Err() error {
+       if z.tt != ErrorToken {
+               return nil
+       }
+       return z.err
+}
+
+// readByte returns the next byte from the input stream, doing a buffered read
+// from z.r into z.buf if necessary. z.buf[z.raw.start:z.raw.end] remains a contiguous byte
+// slice that holds all the bytes read so far for the current token.
+// It sets z.err if the underlying reader returns an error.
+// Pre-condition: z.err == nil.
+func (z *Tokenizer) readByte() byte {
+       if z.raw.end >= len(z.buf) {
+               // Our buffer is exhausted and we have to read from z.r. Check if the
+               // previous read resulted in an error.
+               if z.readErr != nil {
+                       z.err = z.readErr
+                       return 0
+               }
+               // We copy z.buf[z.raw.start:z.raw.end] to the beginning of z.buf. If the length
+               // z.raw.end - z.raw.start is more than half the capacity of z.buf, then we
+               // allocate a new buffer before the copy.
+               c := cap(z.buf)
+               d := z.raw.end - z.raw.start
+               var buf1 []byte
+               if 2*d > c {
+                       buf1 = make([]byte, d, 2*c)
+               } else {
+                       buf1 = z.buf[:d]
+               }
+               copy(buf1, z.buf[z.raw.start:z.raw.end])
+               if x := z.raw.start; x != 0 {
+                       // Adjust the data/attr spans to refer to the same contents after the copy.
+                       z.data.start -= x
+                       z.data.end -= x
+                       z.pendingAttr[0].start -= x
+                       z.pendingAttr[0].end -= x
+                       z.pendingAttr[1].start -= x
+                       z.pendingAttr[1].end -= x
+                       for i := range z.attr {
+                               z.attr[i][0].start -= x
+                               z.attr[i][0].end -= x
+                               z.attr[i][1].start -= x
+                               z.attr[i][1].end -= x
+                       }
+               }
+               z.raw.start, z.raw.end, z.buf = 0, d, buf1[:d]
+               // Now that we have copied the live bytes to the start of the buffer,
+               // we read from z.r into the remainder.
+               var n int
+               n, z.readErr = readAtLeastOneByte(z.r, buf1[d:cap(buf1)])
+               if n == 0 {
+                       z.err = z.readErr
+                       return 0
+               }
+               z.buf = buf1[:d+n]
+       }
+       x := z.buf[z.raw.end]
+       z.raw.end++
+       if z.maxBuf > 0 && z.raw.end-z.raw.start >= z.maxBuf {
+               z.err = ErrBufferExceeded
+               return 0
+       }
+       return x
+}
+
+// Buffered returns a slice containing data buffered but not yet tokenized.
+func (z *Tokenizer) Buffered() []byte {
+       return z.buf[z.raw.end:]
+}
+
+// readAtLeastOneByte wraps an io.Reader so that reading cannot return (0, nil).
+// It returns io.ErrNoProgress if the underlying r.Read method returns (0, nil)
+// too many times in succession.
+func readAtLeastOneByte(r io.Reader, b []byte) (int, error) {
+       for i := 0; i < 100; i++ {
+               if n, err := r.Read(b); n != 0 || err != nil {
+                       return n, err
+               }
+       }
+       return 0, io.ErrNoProgress
+}
+
+// skipWhiteSpace skips past any white space.
+func (z *Tokenizer) skipWhiteSpace() {
+       if z.err != nil {
+               return
+       }
+       for {
+               c := z.readByte()
+               if z.err != nil {
+                       return
+               }
+               switch c {
+               case ' ', '\n', '\r', '\t', '\f':
+                       // No-op.
+               default:
+                       z.raw.end--
+                       return
+               }
+       }
+}
+
+// readRawOrRCDATA reads until the next "</foo>", where "foo" is z.rawTag and
+// is typically something like "script" or "textarea".
+func (z *Tokenizer) readRawOrRCDATA() {
+       if z.rawTag == "script" {
+               z.readScript()
+               z.textIsRaw = true
+               z.rawTag = ""
+               return
+       }
+loop:
+       for {
+               c := z.readByte()
+               if z.err != nil {
+                       break loop
+               }
+               if c != '<' {
+                       continue loop
+               }
+               c = z.readByte()
+               if z.err != nil {
+                       break loop
+               }
+               if c != '/' {
+                       z.raw.end--
+                       continue loop
+               }
+               if z.readRawEndTag() || z.err != nil {
+                       break loop
+               }
+       }
+       z.data.end = z.raw.end
+       // A textarea's or title's RCDATA can contain escaped entities.
+       z.textIsRaw = z.rawTag != "textarea" && z.rawTag != "title"
+       z.rawTag = ""
+}
+
+// readRawEndTag attempts to read a tag like "</foo>", where "foo" is z.rawTag.
+// If it succeeds, it backs up the input position to reconsume the tag and
+// returns true. Otherwise it returns false. The opening "</" has already been
+// consumed.
+func (z *Tokenizer) readRawEndTag() bool {
+       for i := 0; i < len(z.rawTag); i++ {
+               c := z.readByte()
+               if z.err != nil {
+                       return false
+               }
+               if c != z.rawTag[i] && c != z.rawTag[i]-('a'-'A') {
+                       z.raw.end--
+                       return false
+               }
+       }
+       c := z.readByte()
+       if z.err != nil {
+               return false
+       }
+       switch c {
+       case ' ', '\n', '\r', '\t', '\f', '/', '>':
+               // The 3 is 2 for the leading "</" plus 1 for the trailing character c.
+               z.raw.end -= 3 + len(z.rawTag)
+               return true
+       }
+       z.raw.end--
+       return false
+}
+
+// readScript reads until the next </script> tag, following the byzantine
+// rules for escaping/hiding the closing tag.
+func (z *Tokenizer) readScript() {
+       defer func() {
+               z.data.end = z.raw.end
+       }()
+       var c byte
+
+scriptData:
+       c = z.readByte()
+       if z.err != nil {
+               return
+       }
+       if c == '<' {
+               goto scriptDataLessThanSign
+       }
+       goto scriptData
+
+scriptDataLessThanSign:
+       c = z.readByte()
+       if z.err != nil {
+               return
+       }
+       switch c {
+       case '/':
+               goto scriptDataEndTagOpen
+       case '!':
+               goto scriptDataEscapeStart
+       }
+       z.raw.end--
+       goto scriptData
+
+scriptDataEndTagOpen:
+       if z.readRawEndTag() || z.err != nil {
+               return
+       }
+       goto scriptData
+
+scriptDataEscapeStart:
+       c = z.readByte()
+       if z.err != nil {
+               return
+       }
+       if c == '-' {
+               goto scriptDataEscapeStartDash
+       }
+       z.raw.end--
+       goto scriptData
+
+scriptDataEscapeStartDash:
+       c = z.readByte()
+       if z.err != nil {
+               return
+       }
+       if c == '-' {
+               goto scriptDataEscapedDashDash
+       }
+       z.raw.end--
+       goto scriptData
+
+scriptDataEscaped:
+       c = z.readByte()
+       if z.err != nil {
+               return
+       }
+       switch c {
+       case '-':
+               goto scriptDataEscapedDash
+       case '<':
+               goto scriptDataEscapedLessThanSign
+       }
+       goto scriptDataEscaped
+
+scriptDataEscapedDash:
+       c = z.readByte()
+       if z.err != nil {
+               return
+       }
+       switch c {
+       case '-':
+               goto scriptDataEscapedDashDash
+       case '<':
+               goto scriptDataEscapedLessThanSign
+       }
+       goto scriptDataEscaped
+
+scriptDataEscapedDashDash:
+       c = z.readByte()
+       if z.err != nil {
+               return
+       }
+       switch c {
+       case '-':
+               goto scriptDataEscapedDashDash
+       case '<':
+               goto scriptDataEscapedLessThanSign
+       case '>':
+               goto scriptData
+       }
+       goto scriptDataEscaped
+
+scriptDataEscapedLessThanSign:
+       c = z.readByte()
+       if z.err != nil {
+               return
+       }
+       if c == '/' {
+               goto scriptDataEscapedEndTagOpen
+       }
+       if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' {
+               goto scriptDataDoubleEscapeStart
+       }
+       z.raw.end--
+       goto scriptData
+
+scriptDataEscapedEndTagOpen:
+       if z.readRawEndTag() || z.err != nil {
+               return
+       }
+       goto scriptDataEscaped
+
+scriptDataDoubleEscapeStart:
+       z.raw.end--
+       for i := 0; i < len("script"); i++ {
+               c = z.readByte()
+               if z.err != nil {
+                       return
+               }
+               if c != "script"[i] && c != "SCRIPT"[i] {
+                       z.raw.end--
+                       goto scriptDataEscaped
+               }
+       }
+       c = z.readByte()
+       if z.err != nil {
+               return
+       }
+       switch c {
+       case ' ', '\n', '\r', '\t', '\f', '/', '>':
+               goto scriptDataDoubleEscaped
+       }
+       z.raw.end--
+       goto scriptDataEscaped
+
+scriptDataDoubleEscaped:
+       c = z.readByte()
+       if z.err != nil {
+               return
+       }
+       switch c {
+       case '-':
+               goto scriptDataDoubleEscapedDash
+       case '<':
+               goto scriptDataDoubleEscapedLessThanSign
+       }
+       goto scriptDataDoubleEscaped
+
+scriptDataDoubleEscapedDash:
+       c = z.readByte()
+       if z.err != nil {
+               return
+       }
+       switch c {
+       case '-':
+               goto scriptDataDoubleEscapedDashDash
+       case '<':
+               goto scriptDataDoubleEscapedLessThanSign
+       }
+       goto scriptDataDoubleEscaped
+
+scriptDataDoubleEscapedDashDash:
+       c = z.readByte()
+       if z.err != nil {
+               return
+       }
+       switch c {
+       case '-':
+               goto scriptDataDoubleEscapedDashDash
+       case '<':
+               goto scriptDataDoubleEscapedLessThanSign
+       case '>':
+               goto scriptData
+       }
+       goto scriptDataDoubleEscaped
+
+scriptDataDoubleEscapedLessThanSign:
+       c = z.readByte()
+       if z.err != nil {
+               return
+       }
+       if c == '/' {
+               goto scriptDataDoubleEscapeEnd
+       }
+       z.raw.end--
+       goto scriptDataDoubleEscaped
+
+scriptDataDoubleEscapeEnd:
+       if z.readRawEndTag() {
+               z.raw.end += len("</script>")
+               goto scriptDataEscaped
+       }
+       if z.err != nil {
+               return
+       }
+       goto scriptDataDoubleEscaped
+}
+
+// readComment reads the next comment token starting with "<!--". The opening
+// "<!--" has already been consumed.
+func (z *Tokenizer) readComment() {
+       // When modifying this function, consider manually increasing the
+       // maxSuffixLen constant in func TestComments, from 6 to e.g. 9 or more.
+       // That increase should only be temporary, not committed, as it
+       // exponentially affects the test running time.
+
+       z.data.start = z.raw.end
+       defer func() {
+               if z.data.end < z.data.start {
+                       // It's a comment with no data, like <!-->.
+                       z.data.end = z.data.start
+               }
+       }()
+
+       var dashCount int
+       beginning := true
+       for {
+               c := z.readByte()
+               if z.err != nil {
+                       z.data.end = z.calculateAbruptCommentDataEnd()
+                       return
+               }
+               switch c {
+               case '-':
+                       dashCount++
+                       continue
+               case '>':
+                       if dashCount >= 2 || beginning {
+                               z.data.end = z.raw.end - len("-->")
+                               return
+                       }
+               case '!':
+                       if dashCount >= 2 {
+                               c = z.readByte()
+                               if z.err != nil {
+                                       z.data.end = z.calculateAbruptCommentDataEnd()
+                                       return
+                               } else if c == '>' {
+                                       z.data.end = z.raw.end - len("--!>")
+                                       return
+                               } else if c == '-' {
+                                       dashCount = 1
+                                       beginning = false
+                                       continue
+                               }
+                       }
+               }
+               dashCount = 0
+               beginning = false
+       }
+}
+
+func (z *Tokenizer) calculateAbruptCommentDataEnd() int {
+       raw := z.Raw()
+       const prefixLen = len("<!--")
+       if len(raw) >= prefixLen {
+               raw = raw[prefixLen:]
+               if hasSuffix(raw, "--!") {
+                       return z.raw.end - 3
+               } else if hasSuffix(raw, "--") {
+                       return z.raw.end - 2
+               } else if hasSuffix(raw, "-") {
+                       return z.raw.end - 1
+               }
+       }
+       return z.raw.end
+}
+
+func hasSuffix(b []byte, suffix string) bool {
+       if len(b) < len(suffix) {
+               return false
+       }
+       b = b[len(b)-len(suffix):]
+       for i := range b {
+               if b[i] != suffix[i] {
+                       return false
+               }
+       }
+       return true
+}
+
+// readUntilCloseAngle reads until the next ">".
+func (z *Tokenizer) readUntilCloseAngle() {
+       z.data.start = z.raw.end
+       for {
+               c := z.readByte()
+               if z.err != nil {
+                       z.data.end = z.raw.end
+                       return
+               }
+               if c == '>' {
+                       z.data.end = z.raw.end - len(">")
+                       return
+               }
+       }
+}
+
+// readMarkupDeclaration reads the next token starting with "<!". It might be
+// a "<!--comment-->", a "<!DOCTYPE foo>", a "<![CDATA[section]]>" or
+// "<!a bogus comment". The opening "<!" has already been consumed.
+func (z *Tokenizer) readMarkupDeclaration() TokenType {
+       z.data.start = z.raw.end
+       var c [2]byte
+       for i := 0; i < 2; i++ {
+               c[i] = z.readByte()
+               if z.err != nil {
+                       z.data.end = z.raw.end
+                       return CommentToken
+               }
+       }
+       if c[0] == '-' && c[1] == '-' {
+               z.readComment()
+               return CommentToken
+       }
+       z.raw.end -= 2
+       if z.readDoctype() {
+               return DoctypeToken
+       }
+       if z.allowCDATA && z.readCDATA() {
+               z.convertNUL = true
+               return TextToken
+       }
+       // It's a bogus comment.
+       z.readUntilCloseAngle()
+       return CommentToken
+}
+
+// readDoctype attempts to read a doctype declaration and returns true if
+// successful. The opening "<!" has already been consumed.
+func (z *Tokenizer) readDoctype() bool {
+       const s = "DOCTYPE"
+       for i := 0; i < len(s); i++ {
+               c := z.readByte()
+               if z.err != nil {
+                       z.data.end = z.raw.end
+                       return false
+               }
+               if c != s[i] && c != s[i]+('a'-'A') {
+                       // Back up to read the fragment of "DOCTYPE" again.
+                       z.raw.end = z.data.start
+                       return false
+               }
+       }
+       if z.skipWhiteSpace(); z.err != nil {
+               z.data.start = z.raw.end
+               z.data.end = z.raw.end
+               return true
+       }
+       z.readUntilCloseAngle()
+       return true
+}
+
+// readCDATA attempts to read a CDATA section and returns true if
+// successful. The opening "<!" has already been consumed.
+func (z *Tokenizer) readCDATA() bool {
+       const s = "[CDATA["
+       for i := 0; i < len(s); i++ {
+               c := z.readByte()
+               if z.err != nil {
+                       z.data.end = z.raw.end
+                       return false
+               }
+               if c != s[i] {
+                       // Back up to read the fragment of "[CDATA[" again.
+                       z.raw.end = z.data.start
+                       return false
+               }
+       }
+       z.data.start = z.raw.end
+       brackets := 0
+       for {
+               c := z.readByte()
+               if z.err != nil {
+                       z.data.end = z.raw.end
+                       return true
+               }
+               switch c {
+               case ']':
+                       brackets++
+               case '>':
+                       if brackets >= 2 {
+                               z.data.end = z.raw.end - len("]]>")
+                               return true
+                       }
+                       brackets = 0
+               default:
+                       brackets = 0
+               }
+       }
+}
+
+// startTagIn returns whether the start tag in z.buf[z.data.start:z.data.end]
+// case-insensitively matches any element of ss.
+func (z *Tokenizer) startTagIn(ss ...string) bool {
+loop:
+       for _, s := range ss {
+               if z.data.end-z.data.start != len(s) {
+                       continue loop
+               }
+               for i := 0; i < len(s); i++ {
+                       c := z.buf[z.data.start+i]
+                       if 'A' <= c && c <= 'Z' {
+                               c += 'a' - 'A'
+                       }
+                       if c != s[i] {
+                               continue loop
+                       }
+               }
+               return true
+       }
+       return false
+}
+
+// readStartTag reads the next start tag token. The opening "<a" has already
+// been consumed, where 'a' means anything in [A-Za-z].
+func (z *Tokenizer) readStartTag() TokenType {
+       z.readTag(true)
+       if z.err != nil {
+               return ErrorToken
+       }
+       // Several tags flag the tokenizer's next token as raw.
+       c, raw := z.buf[z.data.start], false
+       if 'A' <= c && c <= 'Z' {
+               c += 'a' - 'A'
+       }
+       switch c {
+       case 'i':
+               raw = z.startTagIn("iframe")
+       case 'n':
+               raw = z.startTagIn("noembed", "noframes", "noscript")
+       case 'p':
+               raw = z.startTagIn("plaintext")
+       case 's':
+               raw = z.startTagIn("script", "style")
+       case 't':
+               raw = z.startTagIn("textarea", "title")
+       case 'x':
+               raw = z.startTagIn("xmp")
+       }
+       if raw {
+               z.rawTag = strings.ToLower(string(z.buf[z.data.start:z.data.end]))
+       }
+       // Look for a self-closing token like "<br/>".
+       if z.err == nil && z.buf[z.raw.end-2] == '/' {
+               return SelfClosingTagToken
+       }
+       return StartTagToken
+}
+
+// readTag reads the next tag token and its attributes. If saveAttr, those
+// attributes are saved in z.attr, otherwise z.attr is set to an empty slice.
+// The opening "<a" or "</a" has already been consumed, where 'a' means anything
+// in [A-Za-z].
+func (z *Tokenizer) readTag(saveAttr bool) {
+       z.attr = z.attr[:0]
+       z.nAttrReturned = 0
+       // Read the tag name and attribute key/value pairs.
+       z.readTagName()
+       if z.skipWhiteSpace(); z.err != nil {
+               return
+       }
+       for {
+               c := z.readByte()
+               if z.err != nil || c == '>' {
+                       break
+               }
+               z.raw.end--
+               z.readTagAttrKey()
+               z.readTagAttrVal()
+               // Save pendingAttr if saveAttr and that attribute has a non-empty key.
+               if saveAttr && z.pendingAttr[0].start != z.pendingAttr[0].end {
+                       z.attr = append(z.attr, z.pendingAttr)
+               }
+               if z.skipWhiteSpace(); z.err != nil {
+                       break
+               }
+       }
+}
+
+// readTagName sets z.data to the "div" in "<div k=v>". The reader (z.raw.end)
+// is positioned such that the first byte of the tag name (the "d" in "<div")
+// has already been consumed.
+func (z *Tokenizer) readTagName() {
+       z.data.start = z.raw.end - 1
+       for {
+               c := z.readByte()
+               if z.err != nil {
+                       z.data.end = z.raw.end
+                       return
+               }
+               switch c {
+               case ' ', '\n', '\r', '\t', '\f':
+                       z.data.end = z.raw.end - 1
+                       return
+               case '/', '>':
+                       z.raw.end--
+                       z.data.end = z.raw.end
+                       return
+               }
+       }
+}
+
+// readTagAttrKey sets z.pendingAttr[0] to the "k" in "<div k=v>".
+// Precondition: z.err == nil.
+func (z *Tokenizer) readTagAttrKey() {
+       z.pendingAttr[0].start = z.raw.end
+       for {
+               c := z.readByte()
+               if z.err != nil {
+                       z.pendingAttr[0].end = z.raw.end
+                       return
+               }
+               switch c {
+               case '=':
+                       if z.pendingAttr[0].start+1 == z.raw.end {
+                               // WHATWG 13.2.5.32, if we see an equals sign before the attribute name
+                               // begins, we treat it as a character in the attribute name and continue.
+                               continue
+                       }
+                       fallthrough
+               case ' ', '\n', '\r', '\t', '\f', '/', '>':
+                       // WHATWG 13.2.5.33 Attribute name state
+                       // We need to reconsume the char in the after attribute name state to support the / character
+                       z.raw.end--
+                       z.pendingAttr[0].end = z.raw.end
+                       return
+               }
+       }
+}
+
+// readTagAttrVal sets z.pendingAttr[1] to the "v" in "<div k=v>".
+func (z *Tokenizer) readTagAttrVal() {
+       z.pendingAttr[1].start = z.raw.end
+       z.pendingAttr[1].end = z.raw.end
+       if z.skipWhiteSpace(); z.err != nil {
+               return
+       }
+       c := z.readByte()
+       if z.err != nil {
+               return
+       }
+       if c == '/' {
+               // WHATWG 13.2.5.34 After attribute name state
+               // U+002F SOLIDUS (/) - Switch to the self-closing start tag state.
+               return
+       }
+       if c != '=' {
+               z.raw.end--
+               return
+       }
+       if z.skipWhiteSpace(); z.err != nil {
+               return
+       }
+       quote := z.readByte()
+       if z.err != nil {
+               return
+       }
+       switch quote {
+       case '>':
+               z.raw.end--
+               return
+
+       case '\'', '"':
+               z.pendingAttr[1].start = z.raw.end
+               for {
+                       c := z.readByte()
+                       if z.err != nil {
+                               z.pendingAttr[1].end = z.raw.end
+                               return
+                       }
+                       if c == quote {
+                               z.pendingAttr[1].end = z.raw.end - 1
+                               return
+                       }
+               }
+
+       default:
+               z.pendingAttr[1].start = z.raw.end - 1
+               for {
+                       c := z.readByte()
+                       if z.err != nil {
+                               z.pendingAttr[1].end = z.raw.end
+                               return
+                       }
+                       switch c {
+                       case ' ', '\n', '\r', '\t', '\f':
+                               z.pendingAttr[1].end = z.raw.end - 1
+                               return
+                       case '>':
+                               z.raw.end--
+                               z.pendingAttr[1].end = z.raw.end
+                               return
+                       }
+               }
+       }
+}
+
+// Next scans the next token and returns its type.
+func (z *Tokenizer) Next() TokenType {
+       z.raw.start = z.raw.end
+       z.data.start = z.raw.end
+       z.data.end = z.raw.end
+       if z.err != nil {
+               z.tt = ErrorToken
+               return z.tt
+       }
+       if z.rawTag != "" {
+               if z.rawTag == "plaintext" {
+                       // Read everything up to EOF.
+                       for z.err == nil {
+                               z.readByte()
+                       }
+                       z.data.end = z.raw.end
+                       z.textIsRaw = true
+               } else {
+                       z.readRawOrRCDATA()
+               }
+               if z.data.end > z.data.start {
+                       z.tt = TextToken
+                       z.convertNUL = true
+                       return z.tt
+               }
+       }
+       z.textIsRaw = false
+       z.convertNUL = false
+
+loop:
+       for {
+               c := z.readByte()
+               if z.err != nil {
+                       break loop
+               }
+               if c != '<' {
+                       continue loop
+               }
+
+               // Check if the '<' we have just read is part of a tag, comment
+               // or doctype. If not, it's part of the accumulated text token.
+               c = z.readByte()
+               if z.err != nil {
+                       break loop
+               }
+               var tokenType TokenType
+               switch {
+               case 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z':
+                       tokenType = StartTagToken
+               case c == '/':
+                       tokenType = EndTagToken
+               case c == '!' || c == '?':
+                       // We use CommentToken to mean any of "<!--actual comments-->",
+                       // "<!DOCTYPE declarations>" and "<?xml processing instructions?>".
+                       tokenType = CommentToken
+               default:
+                       // Reconsume the current character.
+                       z.raw.end--
+                       continue
+               }
+
+               // We have a non-text token, but we might have accumulated some text
+               // before that. If so, we return the text first, and return the non-
+               // text token on the subsequent call to Next.
+               if x := z.raw.end - len("<a"); z.raw.start < x {
+                       z.raw.end = x
+                       z.data.end = x
+                       z.tt = TextToken
+                       return z.tt
+               }
+               switch tokenType {
+               case StartTagToken:
+                       z.tt = z.readStartTag()
+                       return z.tt
+               case EndTagToken:
+                       c = z.readByte()
+                       if z.err != nil {
+                               break loop
+                       }
+                       if c == '>' {
+                               // "</>" does not generate a token at all. Generate an empty comment
+                               // to allow passthrough clients to pick up the data using Raw.
+                               // Reset the tokenizer state and start again.
+                               z.tt = CommentToken
+                               return z.tt
+                       }
+                       if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' {
+                               z.readTag(false)
+                               if z.err != nil {
+                                       z.tt = ErrorToken
+                               } else {
+                                       z.tt = EndTagToken
+                               }
+                               return z.tt
+                       }
+                       z.raw.end--
+                       z.readUntilCloseAngle()
+                       z.tt = CommentToken
+                       return z.tt
+               case CommentToken:
+                       if c == '!' {
+                               z.tt = z.readMarkupDeclaration()
+                               return z.tt
+                       }
+                       z.raw.end--
+                       z.readUntilCloseAngle()
+                       z.tt = CommentToken
+                       return z.tt
+               }
+       }
+       if z.raw.start < z.raw.end {
+               z.data.end = z.raw.end
+               z.tt = TextToken
+               return z.tt
+       }
+       z.tt = ErrorToken
+       return z.tt
+}
+
+// Raw returns the unmodified text of the current token. Calling Next, Token,
+// Text, TagName or TagAttr may change the contents of the returned slice.
+//
+// The token stream's raw bytes partition the byte stream (up until an
+// ErrorToken). There are no overlaps or gaps between two consecutive token's
+// raw bytes. One implication is that the byte offset of the current token is
+// the sum of the lengths of all previous tokens' raw bytes.
+func (z *Tokenizer) Raw() []byte {
+       return z.buf[z.raw.start:z.raw.end]
+}
+
+// convertNewlines converts "\r" and "\r\n" in s to "\n".
+// The conversion happens in place, but the resulting slice may be shorter.
+func convertNewlines(s []byte) []byte {
+       for i, c := range s {
+               if c != '\r' {
+                       continue
+               }
+
+               src := i + 1
+               if src >= len(s) || s[src] != '\n' {
+                       s[i] = '\n'
+                       continue
+               }
+
+               dst := i
+               for src < len(s) {
+                       if s[src] == '\r' {
+                               if src+1 < len(s) && s[src+1] == '\n' {
+                                       src++
+                               }
+                               s[dst] = '\n'
+                       } else {
+                               s[dst] = s[src]
+                       }
+                       src++
+                       dst++
+               }
+               return s[:dst]
+       }
+       return s
+}
+
+var (
+       nul         = []byte("\x00")
+       replacement = []byte("\ufffd")
+)
+
+// Text returns the unescaped text of a text, comment or doctype token. The
+// contents of the returned slice may change on the next call to Next.
+func (z *Tokenizer) Text() []byte {
+       switch z.tt {
+       case TextToken, CommentToken, DoctypeToken:
+               s := z.buf[z.data.start:z.data.end]
+               z.data.start = z.raw.end
+               z.data.end = z.raw.end
+               s = convertNewlines(s)
+               if (z.convertNUL || z.tt == CommentToken) && bytes.Contains(s, nul) {
+                       s = bytes.Replace(s, nul, replacement, -1)
+               }
+               if !z.textIsRaw {
+                       s = unescape(s, false)
+               }
+               return s
+       }
+       return nil
+}
+
+// TagName returns the lower-cased name of a tag token (the `img` out of
+// `<IMG SRC="foo">`) and whether the tag has attributes.
+// The contents of the returned slice may change on the next call to Next.
+func (z *Tokenizer) TagName() (name []byte, hasAttr bool) {
+       if z.data.start < z.data.end {
+               switch z.tt {
+               case StartTagToken, EndTagToken, SelfClosingTagToken:
+                       s := z.buf[z.data.start:z.data.end]
+                       z.data.start = z.raw.end
+                       z.data.end = z.raw.end
+                       return lower(s), z.nAttrReturned < len(z.attr)
+               }
+       }
+       return nil, false
+}
+
+// TagAttr returns the lower-cased key and unescaped value of the next unparsed
+// attribute for the current tag token and whether there are more attributes.
+// The contents of the returned slices may change on the next call to Next.
+func (z *Tokenizer) TagAttr() (key, val []byte, moreAttr bool) {
+       if z.nAttrReturned < len(z.attr) {
+               switch z.tt {
+               case StartTagToken, SelfClosingTagToken:
+                       x := z.attr[z.nAttrReturned]
+                       z.nAttrReturned++
+                       key = z.buf[x[0].start:x[0].end]
+                       val = z.buf[x[1].start:x[1].end]
+                       return key, unescape(convertNewlines(val), true), z.nAttrReturned < len(z.attr)
+               }
+       }
+       return nil, nil, false
+}
+
+// Token returns the current Token. The result's Data and Attr values remain
+// valid after subsequent Next calls.
+func (z *Tokenizer) Token() Token {
+       t := Token{Type: z.tt}
+       switch z.tt {
+       case TextToken, CommentToken, DoctypeToken:
+               t.Data = string(z.Text())
+       case StartTagToken, SelfClosingTagToken, EndTagToken:
+               name, moreAttr := z.TagName()
+               for moreAttr {
+                       var key, val []byte
+                       key, val, moreAttr = z.TagAttr()
+                       t.Attr = append(t.Attr, Attribute{"", atom.String(key), string(val)})
+               }
+               if a := atom.Lookup(name); a != 0 {
+                       t.DataAtom, t.Data = a, a.String()
+               } else {
+                       t.DataAtom, t.Data = 0, string(name)
+               }
+       }
+       return t
+}
+
+// SetMaxBuf sets a limit on the amount of data buffered during tokenization.
+// A value of 0 means unlimited.
+func (z *Tokenizer) SetMaxBuf(n int) {
+       z.maxBuf = n
+}
+
+// NewTokenizer returns a new HTML Tokenizer for the given Reader.
+// The input is assumed to be UTF-8 encoded.
+func NewTokenizer(r io.Reader) *Tokenizer {
+       return NewTokenizerFragment(r, "")
+}
+
+// NewTokenizerFragment returns a new HTML Tokenizer for the given Reader, for
+// tokenizing an existing element's InnerHTML fragment. contextTag is that
+// element's tag, such as "div" or "iframe".
+//
+// For example, how the InnerHTML "a<b" is tokenized depends on whether it is
+// for a <p> tag or a <script> tag.
+//
+// The input is assumed to be UTF-8 encoded.
+func NewTokenizerFragment(r io.Reader, contextTag string) *Tokenizer {
+       z := &Tokenizer{
+               r:   r,
+               buf: make([]byte, 0, 4096),
+       }
+       if contextTag != "" {
+               switch s := strings.ToLower(contextTag); s {
+               case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "title", "textarea", "xmp":
+                       z.rawTag = s
+               }
+       }
+       return z
+}
diff --git a/internal/html/token_test.go b/internal/html/token_test.go
new file mode 100644 (file)
index 0000000..a36d112
--- /dev/null
@@ -0,0 +1,917 @@
+// Copyright 2010 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package html
+
+import (
+       "bytes"
+       "io"
+       "os"
+       "reflect"
+       "runtime"
+       "strings"
+       "testing"
+)
+
+// https://github.com/golang/go/issues/58246
+const issue58246 = `<!--[if gte mso 12]>
+  <xml>
+      <o:OfficeDocumentSettings>
+      <o:AllowPNG/>
+      <o:PixelsPerInch>96</o:PixelsPerInch>
+      </o:OfficeDocumentSettings>
+    </xml>
+<![endif]-->`
+
+type tokenTest struct {
+       // A short description of the test case.
+       desc string
+       // The HTML to parse.
+       html string
+       // The string representations of the expected tokens, joined by '$'.
+       golden string
+}
+
+var tokenTests = []tokenTest{
+       {
+               "empty",
+               "",
+               "",
+       },
+       // A single text node. The tokenizer should not break text nodes on whitespace,
+       // nor should it normalize whitespace within a text node.
+       {
+               "text",
+               "foo  bar",
+               "foo  bar",
+       },
+       // An entity.
+       {
+               "entity",
+               "one &lt; two",
+               "one &lt; two",
+       },
+       // A start, self-closing and end tag. The tokenizer does not care if the start
+       // and end tokens don't match; that is the job of the parser.
+       {
+               "tags",
+               "<a>b<c/>d</e>",
+               "<a>$b$<c/>$d$</e>",
+       },
+       // Angle brackets that aren't a tag.
+       {
+               "not a tag #0",
+               "<",
+               "&lt;",
+       },
+       {
+               "not a tag #1",
+               "</",
+               "&lt;/",
+       },
+       {
+               "not a tag #2",
+               "</>",
+               "<!---->",
+       },
+       {
+               "not a tag #3",
+               "a</>b",
+               "a$<!---->$b",
+       },
+       {
+               "not a tag #4",
+               "</ >",
+               "<!-- -->",
+       },
+       {
+               "not a tag #5",
+               "</.",
+               "<!--.-->",
+       },
+       {
+               "not a tag #6",
+               "</.>",
+               "<!--.-->",
+       },
+       {
+               "not a tag #7",
+               "a < b",
+               "a &lt; b",
+       },
+       {
+               "not a tag #8",
+               "<.>",
+               "&lt;.&gt;",
+       },
+       {
+               "not a tag #9",
+               "a<<<b>>>c",
+               "a&lt;&lt;$<b>$&gt;&gt;c",
+       },
+       {
+               "not a tag #10",
+               "if x<0 and y < 0 then x*y>0",
+               "if x&lt;0 and y &lt; 0 then x*y&gt;0",
+       },
+       {
+               "not a tag #11",
+               "<<p>",
+               "&lt;$<p>",
+       },
+       // EOF in a tag name.
+       {
+               "tag name eof #0",
+               "<a",
+               "",
+       },
+       {
+               "tag name eof #1",
+               "<a ",
+               "",
+       },
+       {
+               "tag name eof #2",
+               "a<b",
+               "a",
+       },
+       {
+               "tag name eof #3",
+               "<a><b",
+               "<a>",
+       },
+       {
+               "tag name eof #4",
+               `<a x`,
+               ``,
+       },
+       // Some malformed tags that are missing a '>'.
+       {
+               "malformed tag #0",
+               `<p</p>`,
+               `<p< p="">`,
+       },
+       {
+               "malformed tag #1",
+               `<p </p>`,
+               `<p <="" p="">`,
+       },
+       {
+               "malformed tag #2",
+               `<p id`,
+               ``,
+       },
+       {
+               "malformed tag #3",
+               `<p id=`,
+               ``,
+       },
+       {
+               "malformed tag #4",
+               `<p id=>`,
+               `<p id="">`,
+       },
+       {
+               "malformed tag #5",
+               `<p id=0`,
+               ``,
+       },
+       {
+               "malformed tag #6",
+               `<p id=0</p>`,
+               `<p id="0&lt;/p">`,
+       },
+       {
+               "malformed tag #7",
+               `<p id="0</p>`,
+               ``,
+       },
+       {
+               "malformed tag #8",
+               `<p id="0"</p>`,
+               `<p id="0" <="" p="">`,
+       },
+       {
+               "malformed tag #9",
+               `<p></p id`,
+               `<p>`,
+       },
+       // Raw text and RCDATA.
+       {
+               "basic raw text",
+               "<script><a></b></script>",
+               "<script>$&lt;a&gt;&lt;/b&gt;$</script>",
+       },
+       {
+               "unfinished script end tag",
+               "<SCRIPT>a</SCR",
+               "<script>$a&lt;/SCR",
+       },
+       {
+               "broken script end tag",
+               "<SCRIPT>a</SCR ipt>",
+               "<script>$a&lt;/SCR ipt&gt;",
+       },
+       {
+               "EOF in script end tag",
+               "<SCRIPT>a</SCRipt",
+               "<script>$a&lt;/SCRipt",
+       },
+       {
+               "scriptx end tag",
+               "<SCRIPT>a</SCRiptx",
+               "<script>$a&lt;/SCRiptx",
+       },
+       {
+               "' ' completes script end tag",
+               "<SCRIPT>a</SCRipt ",
+               "<script>$a",
+       },
+       {
+               "'>' completes script end tag",
+               "<SCRIPT>a</SCRipt>",
+               "<script>$a$</script>",
+       },
+       {
+               "self-closing script end tag",
+               "<SCRIPT>a</SCRipt/>",
+               "<script>$a$</script>",
+       },
+       {
+               "nested script tag",
+               "<SCRIPT>a</SCRipt<script>",
+               "<script>$a&lt;/SCRipt&lt;script&gt;",
+       },
+       {
+               "script end tag after unfinished",
+               "<SCRIPT>a</SCRipt</script>",
+               "<script>$a&lt;/SCRipt$</script>",
+       },
+       {
+               "script/style mismatched tags",
+               "<script>a</style>",
+               "<script>$a&lt;/style&gt;",
+       },
+       {
+               "style element with entity",
+               "<style>&apos;",
+               "<style>$&amp;apos;",
+       },
+       {
+               "textarea with tag",
+               "<textarea><div></textarea>",
+               "<textarea>$&lt;div&gt;$</textarea>",
+       },
+       {
+               "title with tag and entity",
+               "<title><b>K&amp;R C</b></title>",
+               "<title>$&lt;b&gt;K&amp;R C&lt;/b&gt;$</title>",
+       },
+       {
+               "title with trailing '&lt;' entity",
+               "<title>foobar<</title>",
+               "<title>$foobar&lt;$</title>",
+       },
+       // DOCTYPE tests.
+       {
+               "Proper DOCTYPE",
+               "<!DOCTYPE html>",
+               "<!DOCTYPE html>",
+       },
+       {
+               "DOCTYPE with no space",
+               "<!doctypehtml>",
+               "<!DOCTYPE html>",
+       },
+       {
+               "DOCTYPE with two spaces",
+               "<!doctype  html>",
+               "<!DOCTYPE html>",
+       },
+       {
+               "looks like DOCTYPE but isn't",
+               "<!DOCUMENT html>",
+               "<!--DOCUMENT html-->",
+       },
+       {
+               "DOCTYPE at EOF",
+               "<!DOCtype",
+               "<!DOCTYPE >",
+       },
+       // XML processing instructions.
+       {
+               "XML processing instruction",
+               "<?xml?>",
+               "<!--?xml?-->",
+       },
+       // Comments. See also func TestComments.
+       {
+               "comment0",
+               "abc<b><!-- skipme --></b>def",
+               "abc$<b>$<!-- skipme -->$</b>$def",
+       },
+       {
+               "comment1",
+               "a<!-->z",
+               "a$<!---->$z",
+       },
+       {
+               "comment2",
+               "a<!--->z",
+               "a$<!---->$z",
+       },
+       {
+               "comment3",
+               "a<!--x>-->z",
+               "a$<!--x>-->$z",
+       },
+       {
+               "comment4",
+               "a<!--x->-->z",
+               "a$<!--x-&gt;-->$z",
+       },
+       {
+               "comment5",
+               "a<!>z",
+               "a$<!---->$z",
+       },
+       {
+               "comment6",
+               "a<!->z",
+               "a$<!----->$z",
+       },
+       {
+               "comment7",
+               "a<!---<>z",
+               "a$<!---<>z-->",
+       },
+       {
+               "comment8",
+               "a<!--z",
+               "a$<!--z-->",
+       },
+       {
+               "comment9",
+               "a<!--z-",
+               "a$<!--z-->",
+       },
+       {
+               "comment10",
+               "a<!--z--",
+               "a$<!--z-->",
+       },
+       {
+               "comment11",
+               "a<!--z---",
+               "a$<!--z--->",
+       },
+       {
+               "comment12",
+               "a<!--z----",
+               "a$<!--z---->",
+       },
+       {
+               "comment13",
+               "a<!--x--!>z",
+               "a$<!--x-->$z",
+       },
+       {
+               "comment14",
+               "a<!--!-->z",
+               "a$<!--!-->$z",
+       },
+       {
+               "comment15",
+               "a<!-- !-->z",
+               "a$<!-- !-->$z",
+       },
+       {
+               "comment16",
+               "a<!--i\x00j-->z",
+               "a$<!--i\uFFFDj-->$z",
+       },
+       {
+               "comment17",
+               "a<!--\x00",
+               "a$<!--\uFFFD-->",
+       },
+       {
+               "comment18",
+               "a<!--<!-->z",
+               "a$<!--<!-->$z",
+       },
+       {
+               "comment19",
+               "a<!--<!--",
+               "a$<!--<!-->",
+       },
+       {
+               "comment20",
+               "a<!--ij--kl-->z",
+               "a$<!--ij--kl-->$z",
+       },
+       {
+               "comment21",
+               "a<!--ij--kl--!>z",
+               "a$<!--ij--kl-->$z",
+       },
+       {
+               "comment22",
+               "a<!--!--!<--!-->z",
+               "a$<!--!--!<--!-->$z",
+       },
+       {
+               "comment23",
+               "a<!--&gt;-->z",
+               "a$<!--&gt;-->$z",
+       },
+       {
+               "comment24",
+               "a<!--&gt;>x",
+               "a$<!--&gt;>x-->",
+       },
+       {
+               "comment25",
+               "a<!--&gt;&gt;",
+               "a$<!--&gt;>-->",
+       },
+       {
+               "comment26",
+               "a<!--&gt;&gt;-",
+               "a$<!--&gt;>-->",
+       },
+       {
+               "comment27",
+               "a<!--&gt;&gt;-->z",
+               "a$<!--&gt;>-->$z",
+       },
+       {
+               "comment28",
+               "a<!--&amp;&gt;-->z",
+               "a$<!--&amp;>-->$z",
+       },
+       {
+               "comment29",
+               "a<!--&amp;gt;-->z",
+               "a$<!--&amp;gt;-->$z",
+       },
+       {
+               "comment30",
+               "a<!--&nosuchentity;-->z",
+               "a$<!--&amp;nosuchentity;-->$z",
+       },
+       {
+               "comment31",
+               "a<!--i>>j-->z",
+               "a$<!--i>>j-->$z",
+       },
+       {
+               "comment32",
+               "a<!--i!>>j-->z",
+               "a$<!--i!&gt;>j-->$z",
+       },
+       // https://stackoverflow.design/email/base/mso/#targeting-specific-outlook-versions
+       // says "[For] Windows Outlook 2003 and above... conditional comments allow
+       // us to add bits of HTML that are only read by the Word-based versions of
+       // Outlook". These comments (with angle brackets) should pass through
+       // unchanged (by this Go package) when rendering.
+       //
+       // We should also still escape ">" as "&gt;" when necessary.
+       // https://github.com/golang/go/issues/48237
+       //
+       // The "your code" example below comes from that stackoverflow.design link
+       // above but note that it can contain angle-bracket-rich XML.
+       // https://github.com/golang/go/issues/58246
+       {
+               "issue48237CommentWithAmpgtsemi1",
+               "a<!--<p></p>&lt;!--[video]--&gt;-->z",
+               "a$<!--<p></p><!--[video]--&gt;-->$z",
+       },
+       {
+               "issue48237CommentWithAmpgtsemi2",
+               "a<!--<p></p>&lt;!--[video]--!&gt;-->z",
+               "a$<!--<p></p><!--[video]--!&gt;-->$z",
+       },
+       {
+               "issue58246MicrosoftOutlookComment1",
+               "a<!--[if mso]> your code <![endif]-->z",
+               "a$<!--[if mso]> your code <![endif]-->$z",
+       },
+       {
+               "issue58246MicrosoftOutlookComment2",
+               "a" + issue58246 + "z",
+               "a$" + issue58246 + "$z",
+       },
+       // An attribute with a backslash.
+       {
+               "backslash",
+               `<p id="a\"b">`,
+               `<p id="a\" b"="">`,
+       },
+       // Entities, tag name and attribute key lower-casing, and whitespace
+       // normalization within a tag.
+       {
+               "tricky",
+               "<p \t\n iD=\"a&quot;B\"  foo=\"bar\"><EM>te&lt;&amp;;xt</em></p>",
+               `<p id="a&#34;B" foo="bar">$<em>$te&lt;&amp;;xt$</em>$</p>`,
+       },
+       // A nonexistent entity. Tokenizing and converting back to a string should
+       // escape the "&" to become "&amp;".
+       {
+               "noSuchEntity",
+               `<a b="c&noSuchEntity;d">&lt;&alsoDoesntExist;&`,
+               `<a b="c&amp;noSuchEntity;d">$&lt;&amp;alsoDoesntExist;&amp;`,
+       },
+       {
+               "entity without semicolon",
+               `&notit;&notin;<a b="q=z&amp=5&notice=hello&not;=world">`,
+               `¬it;∉$<a b="q=z&amp;amp=5&amp;notice=hello¬=world">`,
+       },
+       {
+               "entity with digits",
+               "&frac12;",
+               "½",
+       },
+       // Attribute tests:
+       // http://dev.w3.org/html5/pf-summary/Overview.html#attributes
+       {
+               "Empty attribute",
+               `<input disabled FOO>`,
+               `<input disabled="" foo="">`,
+       },
+       {
+               "Empty attribute, whitespace",
+               `<input disabled FOO >`,
+               `<input disabled="" foo="">`,
+       },
+       {
+               "Unquoted attribute value",
+               `<input value=yes FOO=BAR>`,
+               `<input value="yes" foo="BAR">`,
+       },
+       {
+               "Unquoted attribute value, spaces",
+               `<input value = yes FOO = BAR>`,
+               `<input value="yes" foo="BAR">`,
+       },
+       {
+               "Unquoted attribute value, trailing space",
+               `<input value=yes FOO=BAR >`,
+               `<input value="yes" foo="BAR">`,
+       },
+       {
+               "Single-quoted attribute value",
+               `<input value='yes' FOO='BAR'>`,
+               `<input value="yes" foo="BAR">`,
+       },
+       {
+               "Single-quoted attribute value, trailing space",
+               `<input value='yes' FOO='BAR' >`,
+               `<input value="yes" foo="BAR">`,
+       },
+       {
+               "Double-quoted attribute value",
+               `<input value="I'm an attribute" FOO="BAR">`,
+               `<input value="I&#39;m an attribute" foo="BAR">`,
+       },
+       {
+               "Attribute name characters",
+               `<meta http-equiv="content-type">`,
+               `<meta http-equiv="content-type">`,
+       },
+       {
+               "Mixed attributes",
+               `a<P V="0 1" w='2' X=3 y>z`,
+               `a$<p v="0 1" w="2" x="3" y="">$z`,
+       },
+       {
+               "Attributes with a solitary single quote",
+               `<p id=can't><p id=won't>`,
+               `<p id="can&#39;t">$<p id="won&#39;t">`,
+       },
+       // WHATWG 13.2.5.32 equals sign before attribute name state
+       {
+               "equals sign before attribute name",
+               `<p  =>`,
+               `<p =="">`,
+       },
+       {
+               "equals sign before attribute name, extra cruft",
+               `<p  =asd>`,
+               `<p =asd="">`,
+       },
+       {
+               "forward slash before attribute name",
+               `<p/=">`,
+               `<p ="="">`,
+       },
+       {
+               "forward slash before attribute name with spaces around",
+               `<p / =">`,
+               `<p ="="">`,
+       },
+       {
+               "forward slash after attribute name followed by a character",
+               `<p a/ ="">`,
+               `<p a="" =""="">`,
+       },
+}
+
+func TestTokenizer(t *testing.T) {
+       for _, tt := range tokenTests {
+               t.Run(tt.desc, func(t *testing.T) {
+                       z := NewTokenizer(strings.NewReader(tt.html))
+                       if tt.golden != "" {
+                               for i, s := range strings.Split(tt.golden, "$") {
+                                       if z.Next() == ErrorToken {
+                                               t.Errorf("%s token %d: want %q got error %v", tt.desc, i, s, z.Err())
+                                               return
+                                       }
+                                       actual := z.Token().String()
+                                       if s != actual {
+                                               t.Errorf("%s token %d: want %q got %q", tt.desc, i, s, actual)
+                                               return
+                                       }
+                               }
+                       }
+                       z.Next()
+                       if z.Err() != io.EOF {
+                               t.Errorf("%s: want EOF got %q", tt.desc, z.Err())
+                       }
+               })
+       }
+}
+
+func TestMaxBuffer(t *testing.T) {
+       // Exceeding the maximum buffer size generates ErrBufferExceeded.
+       z := NewTokenizer(strings.NewReader("<" + strings.Repeat("t", 10)))
+       z.SetMaxBuf(5)
+       tt := z.Next()
+       if got, want := tt, ErrorToken; got != want {
+               t.Fatalf("token type: got: %v want: %v", got, want)
+       }
+       if got, want := z.Err(), ErrBufferExceeded; got != want {
+               t.Errorf("error type: got: %v want: %v", got, want)
+       }
+       if got, want := string(z.Raw()), "<tttt"; got != want {
+               t.Fatalf("buffered before overflow: got: %q want: %q", got, want)
+       }
+}
+
+func TestMaxBufferReconstruction(t *testing.T) {
+       // Exceeding the maximum buffer size at any point while tokenizing permits
+       // reconstructing the original input.
+tests:
+       for _, test := range tokenTests {
+               for maxBuf := 1; ; maxBuf++ {
+                       r := strings.NewReader(test.html)
+                       z := NewTokenizer(r)
+                       z.SetMaxBuf(maxBuf)
+                       var tokenized bytes.Buffer
+                       for {
+                               tt := z.Next()
+                               tokenized.Write(z.Raw())
+                               if tt == ErrorToken {
+                                       if err := z.Err(); err != io.EOF && err != ErrBufferExceeded {
+                                               t.Errorf("%s: unexpected error: %v", test.desc, err)
+                                       }
+                                       break
+                               }
+                       }
+                       // Anything tokenized along with untokenized input or data left in the reader.
+                       assembled, err := io.ReadAll(io.MultiReader(&tokenized, bytes.NewReader(z.Buffered()), r))
+                       if err != nil {
+                               t.Errorf("%s: ReadAll: %v", test.desc, err)
+                               continue tests
+                       }
+                       if got, want := string(assembled), test.html; got != want {
+                               t.Errorf("%s: reassembled html:\n got: %q\nwant: %q", test.desc, got, want)
+                               continue tests
+                       }
+                       // EOF indicates that we completed tokenization and hence found the max
+                       // maxBuf that generates ErrBufferExceeded, so continue to the next test.
+                       if z.Err() == io.EOF {
+                               break
+                       }
+               } // buffer sizes
+       } // tests
+}
+
+func TestPassthrough(t *testing.T) {
+       // Accumulating the raw output for each parse event should reconstruct the
+       // original input.
+       for _, test := range tokenTests {
+               z := NewTokenizer(strings.NewReader(test.html))
+               var parsed bytes.Buffer
+               for {
+                       tt := z.Next()
+                       parsed.Write(z.Raw())
+                       if tt == ErrorToken {
+                               break
+                       }
+               }
+               if got, want := parsed.String(), test.html; got != want {
+                       t.Errorf("%s: parsed output:\n got: %q\nwant: %q", test.desc, got, want)
+               }
+       }
+}
+
+func TestBufAPI(t *testing.T) {
+       s := "0<a>1</a>2<b>3<a>4<a>5</a>6</b>7</a>8<a/>9"
+       z := NewTokenizer(bytes.NewBufferString(s))
+       var result bytes.Buffer
+       depth := 0
+loop:
+       for {
+               tt := z.Next()
+               switch tt {
+               case ErrorToken:
+                       if z.Err() != io.EOF {
+                               t.Error(z.Err())
+                       }
+                       break loop
+               case TextToken:
+                       if depth > 0 {
+                               result.Write(z.Text())
+                       }
+               case StartTagToken, EndTagToken:
+                       tn, _ := z.TagName()
+                       if len(tn) == 1 && tn[0] == 'a' {
+                               if tt == StartTagToken {
+                                       depth++
+                               } else {
+                                       depth--
+                               }
+                       }
+               }
+       }
+       u := "14567"
+       v := string(result.Bytes())
+       if u != v {
+               t.Errorf("TestBufAPI: want %q got %q", u, v)
+       }
+}
+
+func TestConvertNewlines(t *testing.T) {
+       testCases := map[string]string{
+               "Mac\rDOS\r\nUnix\n":    "Mac\nDOS\nUnix\n",
+               "Unix\nMac\rDOS\r\n":    "Unix\nMac\nDOS\n",
+               "DOS\r\nDOS\r\nDOS\r\n": "DOS\nDOS\nDOS\n",
+               "":                      "",
+               "\n":                    "\n",
+               "\n\r":                  "\n\n",
+               "\r":                    "\n",
+               "\r\n":                  "\n",
+               "\r\n\n":                "\n\n",
+               "\r\n\r":                "\n\n",
+               "\r\n\r\n":              "\n\n",
+               "\r\r":                  "\n\n",
+               "\r\r\n":                "\n\n",
+               "\r\r\n\n":              "\n\n\n",
+               "\r\r\r\n":              "\n\n\n",
+               "\r \n":                 "\n \n",
+               "xyz":                   "xyz",
+       }
+       for in, want := range testCases {
+               if got := string(convertNewlines([]byte(in))); got != want {
+                       t.Errorf("input %q: got %q, want %q", in, got, want)
+               }
+       }
+}
+
+func TestReaderEdgeCases(t *testing.T) {
+       const s = "<p>An io.Reader can return (0, nil) or (n, io.EOF).</p>"
+       testCases := []io.Reader{
+               &zeroOneByteReader{s: s},
+               &eofStringsReader{s: s},
+               &stuckReader{},
+       }
+       for i, tc := range testCases {
+               got := []TokenType{}
+               z := NewTokenizer(tc)
+               for {
+                       tt := z.Next()
+                       if tt == ErrorToken {
+                               break
+                       }
+                       got = append(got, tt)
+               }
+               if err := z.Err(); err != nil && err != io.EOF {
+                       if err != io.ErrNoProgress {
+                               t.Errorf("i=%d: %v", i, err)
+                       }
+                       continue
+               }
+               want := []TokenType{
+                       StartTagToken,
+                       TextToken,
+                       EndTagToken,
+               }
+               if !reflect.DeepEqual(got, want) {
+                       t.Errorf("i=%d: got %v, want %v", i, got, want)
+                       continue
+               }
+       }
+}
+
+// zeroOneByteReader is like a strings.Reader that alternates between
+// returning 0 bytes and 1 byte at a time.
+type zeroOneByteReader struct {
+       s string
+       n int
+}
+
+func (r *zeroOneByteReader) Read(p []byte) (int, error) {
+       if len(p) == 0 {
+               return 0, nil
+       }
+       if len(r.s) == 0 {
+               return 0, io.EOF
+       }
+       r.n++
+       if r.n%2 != 0 {
+               return 0, nil
+       }
+       p[0], r.s = r.s[0], r.s[1:]
+       return 1, nil
+}
+
+// eofStringsReader is like a strings.Reader but can return an (n, err) where
+// n > 0 && err != nil.
+type eofStringsReader struct {
+       s string
+}
+
+func (r *eofStringsReader) Read(p []byte) (int, error) {
+       n := copy(p, r.s)
+       r.s = r.s[n:]
+       if r.s != "" {
+               return n, nil
+       }
+       return n, io.EOF
+}
+
+// stuckReader is an io.Reader that always returns no data and no error.
+type stuckReader struct{}
+
+func (*stuckReader) Read(p []byte) (int, error) {
+       return 0, nil
+}
+
+const (
+       rawLevel = iota
+       lowLevel
+       highLevel
+)
+
+func benchmarkTokenizer(b *testing.B, level int) {
+       buf, err := os.ReadFile("testdata/go1.html")
+       if err != nil {
+               b.Fatalf("could not read testdata/go1.html: %v", err)
+       }
+       b.SetBytes(int64(len(buf)))
+       runtime.GC()
+       b.ReportAllocs()
+       b.ResetTimer()
+       for i := 0; i < b.N; i++ {
+               z := NewTokenizer(bytes.NewBuffer(buf))
+               for {
+                       tt := z.Next()
+                       if tt == ErrorToken {
+                               if err := z.Err(); err != nil && err != io.EOF {
+                                       b.Fatalf("tokenizer error: %v", err)
+                               }
+                               break
+                       }
+                       switch level {
+                       case rawLevel:
+                               // Calling z.Raw just returns the raw bytes of the token. It does
+                               // not unescape &lt; to <, or lower-case tag names and attribute keys.
+                               z.Raw()
+                       case lowLevel:
+                               // Caling z.Text, z.TagName and z.TagAttr returns []byte values
+                               // whose contents may change on the next call to z.Next.
+                               switch tt {
+                               case TextToken, CommentToken, DoctypeToken:
+                                       z.Text()
+                               case StartTagToken, SelfClosingTagToken:
+                                       _, more := z.TagName()
+                                       for more {
+                                               _, _, more = z.TagAttr()
+                                       }
+                               case EndTagToken:
+                                       z.TagName()
+                               }
+                       case highLevel:
+                               // Calling z.Token converts []byte values to strings whose validity
+                               // extend beyond the next call to z.Next.
+                               z.Token()
+                       }
+               }
+       }
+}
+
+func BenchmarkRawLevelTokenizer(b *testing.B)  { benchmarkTokenizer(b, rawLevel) }
+func BenchmarkLowLevelTokenizer(b *testing.B)  { benchmarkTokenizer(b, lowLevel) }
+func BenchmarkHighLevelTokenizer(b *testing.B) { benchmarkTokenizer(b, highLevel) }