Created
December 23, 2022 15:35
-
-
Save natenho/7ed3378ed6186925dddac9846164fd0c to your computer and use it in GitHub Desktop.
A way to convert HTML into json using go, extracted from chatGPT
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"encoding/json" | |
"fmt" | |
"io" | |
"strings" | |
"golang.org/x/net/html" | |
) | |
func main() { | |
// HTML string to parse | |
htmlStr := `<html><head><title>Example</title></head><body><h1>Hello, World!</h1></body></html>` | |
// Parse the HTML string | |
doc, err := html.Parse(strings.NewReader(htmlStr)) | |
if err != nil { | |
fmt.Println(err) | |
return | |
} | |
// Traverse the HTML document and create a JSON object | |
jsonObj := make(map[string]interface{}) | |
traverseHTMLNode(doc, jsonObj) | |
// Print the JSON object | |
jsonBytes, err := json.MarshalIndent(jsonObj, "", " ") | |
if err != nil { | |
fmt.Println(err) | |
return | |
} | |
fmt.Println(string(jsonBytes)) | |
} | |
func traverseHTMLNode(n *html.Node, jsonObj map[string]interface{}) { | |
if n.Type == html.ElementNode { | |
// Add the element name and attributes to the JSON object | |
jsonObj[n.Data] = make(map[string]interface{}) | |
for _, attr := range n.Attr { | |
jsonObj[n.Data].(map[string]interface{})[attr.Key] = attr.Val | |
} | |
} else if n.Type == html.TextNode { | |
// Add the text content to the JSON object | |
jsonObj["text"] = n.Data | |
} | |
// Recursively traverse the children of the node | |
for c := n.FirstChild; c != nil; c = c.NextSibling { | |
traverseHTMLNode(c, jsonObj) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment