type parser … func (p *parser) top() *Node { … } var defaultScopeStopTags … type scope … const defaultScope … const listItemScope … const buttonScope … const tableScope … const tableRowScope … const tableBodyScope … const selectScope … // popUntil pops the stack of open elements at the highest element whose tag // is in matchTags, provided there is no higher element in the scope's stop // tags (as defined in section 12.2.4.2). It returns whether or not there was // such an element. If there was not, popUntil leaves the stack unchanged. // // For example, the set of stop tags for table scope is: "html", "table". If // the stack was: // ["html", "body", "font", "table", "b", "i", "u"] // then popUntil(tableScope, "font") would return false, but // popUntil(tableScope, "i") would return true and the stack would become: // ["html", "body", "font", "table", "b"] // // If an element's tag is in both the stop tags and matchTags, then the stack // will be popped and the function returns true (provided, of course, there was // no higher element in the stack that was also in the stop tags). For example, // popUntil(tableScope, "table") returns true and leaves: // ["html", "body", "font"] func (p *parser) popUntil(s scope, matchTags ...a.Atom) bool { … } // indexOfElementInScope returns the index in p.oe of the highest element whose // tag is in matchTags that is in scope. If no matching element is in scope, it // returns -1. func (p *parser) indexOfElementInScope(s scope, matchTags ...a.Atom) int { … } // elementInScope is like popUntil, except that it doesn't modify the stack of // open elements. func (p *parser) elementInScope(s scope, matchTags ...a.Atom) bool { … } // clearStackToContext pops elements off the stack of open elements until a // scope-defined element is found. func (p *parser) clearStackToContext(s scope) { … } // parseGenericRawTextElement implements the generic raw text element parsing // algorithm defined in 12.2.6.2. // https://html.spec.whatwg.org/multipage/parsing.html#parsing-elements-that-contain-only-text // TODO: Since both RAWTEXT and RCDATA states are treated as tokenizer's part // officially, need to make tokenizer consider both states. func (p *parser) parseGenericRawTextElement() { … } // generateImpliedEndTags pops nodes off the stack of open elements as long as // the top node has a tag name of dd, dt, li, optgroup, option, p, rb, rp, rt or rtc. // If exceptions are specified, nodes with that name will not be popped off. func (p *parser) generateImpliedEndTags(exceptions ...string) { … } // addChild adds a child node n to the top element, and pushes n onto the stack // of open elements if it is an element node. func (p *parser) addChild(n *Node) { … } // shouldFosterParent returns whether the next node to be added should be // foster parented. func (p *parser) shouldFosterParent() bool { … } // fosterParent adds a child node according to the foster parenting rules. // Section 12.2.6.1, "foster parenting". func (p *parser) fosterParent(n *Node) { … } // addText adds text to the preceding node if it is a text node, or else it // calls addChild with a new text node. func (p *parser) addText(text string) { … } // addElement adds a child element based on the current token. func (p *parser) addElement() { … } // Section 12.2.4.3. func (p *parser) addFormattingElement() { … } // Section 12.2.4.3. func (p *parser) clearActiveFormattingElements() { … } // Section 12.2.4.3. func (p *parser) reconstructActiveFormattingElements() { … } // Section 12.2.5. func (p *parser) acknowledgeSelfClosingTag() { … } type insertionMode … // setOriginalIM sets the insertion mode to return to after completing a text or // inTableText insertion mode. // Section 12.2.4.1, "using the rules for". func (p *parser) setOriginalIM() { … } // Section 12.2.4.1, "reset the insertion mode". func (p *parser) resetInsertionMode() { … } const whitespace … // Section 12.2.6.4.1. func initialIM(p *parser) bool { … } // Section 12.2.6.4.2. func beforeHTMLIM(p *parser) bool { … } // Section 12.2.6.4.3. func beforeHeadIM(p *parser) bool { … } // Section 12.2.6.4.4. func inHeadIM(p *parser) bool { … } // Section 12.2.6.4.5. func inHeadNoscriptIM(p *parser) bool { … } // Section 12.2.6.4.6. func afterHeadIM(p *parser) bool { … } // copyAttributes copies attributes of src not found on dst to dst. func copyAttributes(dst *Node, src Token) { … } // Section 12.2.6.4.7. func inBodyIM(p *parser) bool { … } func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) { … } // inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM. // "Any other end tag" handling from 12.2.6.5 The rules for parsing tokens in foreign content // https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inforeign func (p *parser) inBodyEndTagOther(tagAtom a.Atom, tagName string) { … } // Section 12.2.6.4.8. func textIM(p *parser) bool { … } // Section 12.2.6.4.9. func inTableIM(p *parser) bool { … } // Section 12.2.6.4.11. func inCaptionIM(p *parser) bool { … } // Section 12.2.6.4.12. func inColumnGroupIM(p *parser) bool { … } // Section 12.2.6.4.13. func inTableBodyIM(p *parser) bool { … } // Section 12.2.6.4.14. func inRowIM(p *parser) bool { … } // Section 12.2.6.4.15. func inCellIM(p *parser) bool { … } // Section 12.2.6.4.16. func inSelectIM(p *parser) bool { … } // Section 12.2.6.4.17. func inSelectInTableIM(p *parser) bool { … } // Section 12.2.6.4.18. func inTemplateIM(p *parser) bool { … } // Section 12.2.6.4.19. func afterBodyIM(p *parser) bool { … } // Section 12.2.6.4.20. func inFramesetIM(p *parser) bool { … } // Section 12.2.6.4.21. func afterFramesetIM(p *parser) bool { … } // Section 12.2.6.4.22. func afterAfterBodyIM(p *parser) bool { … } // Section 12.2.6.4.23. func afterAfterFramesetIM(p *parser) bool { … } func ignoreTheRemainingTokens(p *parser) bool { … } const whitespaceOrNUL … // Section 12.2.6.5 func parseForeignContent(p *parser) bool { … } // Section 12.2.4.2. func (p *parser) adjustedCurrentNode() *Node { … } // Section 12.2.6. func (p *parser) inForeignContent() bool { … } // parseImpliedToken parses a token as though it had appeared in the parser's // input. func (p *parser) parseImpliedToken(t TokenType, dataAtom a.Atom, data string) { … } // parseCurrentToken runs the current token through the parsing routines // until it is consumed. func (p *parser) parseCurrentToken() { … } func (p *parser) parse() error { … } // Parse returns the parse tree for the HTML from the given Reader. // // It implements the HTML5 parsing algorithm // (https://html.spec.whatwg.org/multipage/syntax.html#tree-construction), // which is very complicated. The resultant tree can contain implicitly created // nodes that have no explicit <tag> listed in r's data, and nodes' parents can // differ from the nesting implied by a naive processing of start and end // <tag>s. Conversely, explicit <tag>s in r's data can be silently dropped, // with no corresponding node in the resulting tree. // // The input is assumed to be UTF-8 encoded. func Parse(r io.Reader) (*Node, error) { … } // ParseFragment parses a fragment of HTML and returns the nodes that were // found. If the fragment is the InnerHTML for an existing element, pass that // element in context. // // It has the same intricacies as Parse. func ParseFragment(r io.Reader, context *Node) ([]*Node, error) { … } type ParseOption … // ParseOptionEnableScripting configures the scripting flag. // https://html.spec.whatwg.org/multipage/webappapis.html#enabling-and-disabling-scripting // // By default, scripting is enabled. func ParseOptionEnableScripting(enable bool) ParseOption { … } // ParseWithOptions is like Parse, with options. func ParseWithOptions(r io.Reader, opts ...ParseOption) (*Node, error) { … } // ParseFragmentWithOptions is like ParseFragment, with options. func ParseFragmentWithOptions(r io.Reader, context *Node, opts ...ParseOption) ([]*Node, error) { … }