Skip to content

Instantly share code, notes, and snippets.

@natenho
Created December 23, 2022 15:35
Show Gist options
  • Save natenho/7ed3378ed6186925dddac9846164fd0c to your computer and use it in GitHub Desktop.
Save natenho/7ed3378ed6186925dddac9846164fd0c to your computer and use it in GitHub Desktop.
A way to convert HTML into json using go, extracted from chatGPT
package main
import (
"encoding/json"
"fmt"
"io"
"strings"
"golang.org/x/net/html"
)
func main() {
// HTML string to parse
htmlStr := `<html><head><title>Example</title></head><body><h1>Hello, World!</h1></body></html>`
// Parse the HTML string
doc, err := html.Parse(strings.NewReader(htmlStr))
if err != nil {
fmt.Println(err)
return
}
// Traverse the HTML document and create a JSON object
jsonObj := make(map[string]interface{})
traverseHTMLNode(doc, jsonObj)
// Print the JSON object
jsonBytes, err := json.MarshalIndent(jsonObj, "", " ")
if err != nil {
fmt.Println(err)
return
}
fmt.Println(string(jsonBytes))
}
func traverseHTMLNode(n *html.Node, jsonObj map[string]interface{}) {
if n.Type == html.ElementNode {
// Add the element name and attributes to the JSON object
jsonObj[n.Data] = make(map[string]interface{})
for _, attr := range n.Attr {
jsonObj[n.Data].(map[string]interface{})[attr.Key] = attr.Val
}
} else if n.Type == html.TextNode {
// Add the text content to the JSON object
jsonObj["text"] = n.Data
}
// Recursively traverse the children of the node
for c := n.FirstChild; c != nil; c = c.NextSibling {
traverseHTMLNode(c, jsonObj)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment