Skip to content

Instantly share code, notes, and snippets.

@HalCanary
Last active May 25, 2023 14:22
Show Gist options
  • Save HalCanary/fd4ec75ae950196454f09051a5d8779a to your computer and use it in GitHub Desktop.
Save HalCanary/fd4ec75ae950196454f09051a5d8779a to your computer and use it in GitHub Desktop.
package main
import (
"regexp"
"sort"
"strings"
"golang.org/x/net/html"
)
func AppendNodes(node *html.Node, children ...*html.Node) *html.Node {
for _, c := range children {
if c != nil {
node.AppendChild(c)
}
}
return node
}
func DocumentNode(doctype string, children ...*html.Node) *html.Node {
node := &html.Node{Type: html.DocumentNode}
node.AppendChild(&html.Node{Type: html.DoctypeNode, Data: doctype})
return AppendNodes(node, children...)
}
func TextNode(data string) *html.Node {
return &html.Node{Type: html.TextNode, Data: data}
}
func CommentNode(data string) *html.Node {
return &html.Node{Type: html.CommentNode, Data: data}
}
// Return an element with given attributes and children.
func Element(tag string, attributes map[string]string, children ...*html.Node) *html.Node {
node := &html.Node{Type: html.ElementNode, Data: tag}
if len(attributes) > 0 {
keys := make([]string, 0, len(attributes))
for k := range attributes {
keys = append(keys, k)
}
sort.Strings(keys)
for _, k := range keys {
node.Attr = append(node.Attr, html.Attribute{Key: k, Val: attributes[k]})
}
}
return AppendNodes(node, children...)
}
////////////////////////////////////////////////////////////////////////////////
func FindNodeByTagAndAttrib(root *html.Node, tag, key, value string) *html.Node {
// I unrolled a recursive function to use no recursion, or heap allocation,
// only loops. This is allowed by the existance of Parent pointer.
if root == nil {
return nil
}
node := root
for {
if node.Type == html.ElementNode && (tag == "" || node.Data == tag) {
if key == "" {
return node
}
for _, attr := range node.Attr {
if attr.Key == key && attr.Val == value {
return node
}
}
}
if node.FirstChild != nil {
node = node.FirstChild
continue
}
for {
if node == root || node == nil {
return nil
}
if node.NextSibling != nil {
node = node.NextSibling
break
}
node = node.Parent
}
}
}
func FindNodeByTag(node *html.Node, tag string) *html.Node {
return FindNodeByTagAndAttrib(node, tag, "", "")
}
func FindNodeById(node *html.Node, id string) *html.Node {
return FindNodeByTagAndAttrib(node, "", "id", id)
}
func FindNodeByAttribute(node *html.Node, key, value string) *html.Node {
return FindNodeByTagAndAttrib(node, "", key, value)
}
var whitespaceRegexp = regexp.MustCompile("\\s+")
// Extract and combine all Text Nodes under given node.
func ExtractText(root *html.Node) string {
if root == nil {
return ""
}
var accumulator strings.Builder
node := root
for {
if node.Type == html.TextNode {
accumulator.WriteString(whitespaceRegexp.ReplaceAllString(node.Data, " "))
}
if node.Type == html.ElementNode {
switch node.Data {
case "br":
accumulator.WriteString("\n")
case "hr":
accumulator.WriteString("\n* * *\n")
case "p":
accumulator.WriteString("\n\n")
}
}
if node.FirstChild != nil {
node = node.FirstChild
continue
}
for {
if node == root || node == nil {
return accumulator.String()
}
if node.NextSibling != nil {
node = node.NextSibling
break
}
node = node.Parent
}
}
}
////////////////////////////////////////////////////////////////////////////////
func example(w io.Writer) error {
return html.Render(w,
DocumentNode("html",
TextNode("\n"),
Element("html", map[string]string{"lang": "en"},
TextNode("\n"),
Element("head", nil,
TextNode("\n"),
Element("meta", map[string]string{"charset": "utf-8"}),
TextNode("\n"),
CommentNode("\nthis is an example\n"),
TextNode("\n"),
Element("meta", map[string]string{
"name": "viewport", "content": "width=device-width, initial-scale=1.0"}),
TextNode("\n"),
Element("title", nil, TextNode("Hello World!")),
TextNode("\n"),
Element("style", nil, TextNode("\nbody{font-family: sans-serif;}\n")),
TextNode("\n"),
),
TextNode("\n"),
Element("body", nil,
Element("h1", nil, TextNode("Hello World!")),
),
TextNode("\n"),
),
TextNode("\n"),
),
)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment