Skip to content

Instantly share code, notes, and snippets.

@sheepla
Created September 9, 2023 08:29
Show Gist options
  • Save sheepla/f838aa46bb8dbcbf5cc57abc4b6809ae to your computer and use it in GitHub Desktop.
Save sheepla/f838aa46bb8dbcbf5cc57abc4b6809ae to your computer and use it in GitHub Desktop.
Remove html tags from string with Go
package main
import (
"bytes"
"fmt"
"strings"
"golang.org/x/net/html"
)
func main() {
text := removeHtmlTagsFromText(`
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Document</title>
</head>
<body>
<div>
<p>Hello, World!</p>
<ul>
<li>List1</li>
</ul>
<ul>
<li>List2</li>
</ul>
<ul>
<li>List3</li>
</ul>
<ul>
<li>List4</li>
</ul>
<ul>
<li>List5</li>
</ul>
</div>
</body>
</html>
`)
fmt.Println(text)
}
func removeHtmlTags(node *html.Node, buf *bytes.Buffer) {
if node.Type == html.TextNode {
buf.WriteString(node.Data)
}
for child := node.FirstChild; child != nil; child = child.NextSibling {
removeHtmlTags(child, buf)
}
}
func removeHtmlTagsFromText(text string) string {
node, err := html.Parse(strings.NewReader(text))
if err != nil {
// If it cannot be parsed text as HTML, return the text as is.
return text
}
buf := &bytes.Buffer{}
removeHtmlTags(node, buf)
return buf.String()
}
@sheepla
Copy link
Author

sheepla commented Sep 9, 2023

Will output

  Document



    Hello, World!

      List1


      List2


      List3


      List4


      List5





Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment