Skip to content

Instantly share code, notes, and snippets.

@vaskoz
Created March 6, 2014 15:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vaskoz/9392485 to your computer and use it in GitHub Desktop.
Save vaskoz/9392485 to your computer and use it in GitHub Desktop.
Go Research Skeleton for testing out Gokogiri
package main
import (
g "github.com/moovweb/gokogiri"
"github.com/moovweb/gokogiri/html"
. "io/ioutil"
"fmt"
"os"
"strconv"
)
func main() {
file, err := ReadFile("data.html")
if err != nil {
panic("Can't load the file")
}
htmlDoc, err := g.ParseHtml(file)
if err != nil {
panic("Can't parse HTML")
}
removeXpath(htmlDoc, "//head")
removeXpath(htmlDoc, "//style")
removeXpath(htmlDoc, "//script")
removeXpath(htmlDoc, "//a")
removeXpath(htmlDoc, "//comment()")
WriteFile("output.html",
[]byte(htmlDoc.String()), os.FileMode(0666))
}
func removeXpath(doc *html.HtmlDocument, xpath string) {
results, err := doc.Search(xpath)
if err != nil {
panic("Some kind of error during search for xpath: " + xpath)
}
fmt.Println("Number of hits for " + xpath + " is: " + strconv.Itoa(len(results)))
for _, node := range results {
node.Remove()
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment