Create a gist now

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Parsing HTML in Go/Golang using goQuery to extract data from only one of multiple tables. Demonstrates nested Find statements.
package main
import (
"fmt"
"log"
"strings"
"github.com/PuerkitoBio/goquery"
)
func goGet() {
var headings, row []string
var rows [][]string
data := `<html><body>
<table>
<tr><th>Heading 1</th><th>Heading two</th></tr>
<tr><td>Data 11</td><td>Data 12</td></tr>
<tr><td>Data 21</td><td>Data 22</td></tr>
<tr><td>Data 31</td><td>Data 32</td></tr>
<tr><td>Data 41</td><td>Data 42</td></tr>
</table>
<p>Stuff in here</p>
<table>
<tr><th>Heading 21</th><th>Heading 2two</th></tr>
<tr><td>Data 211</td><td>Data 212</td></tr>
<tr><td>Data 221</td><td>Data 222</td></tr>
<tr><td>Data 231</td><td><span></span><span><a href="">Data 232</a></span></td></tr>
<tr><td>Data 241</td><td>Data 242</td></tr>
</table>
</body>
</html>
`
doc, err := goquery.NewDocumentFromReader(strings.NewReader(data))
if err != nil {
fmt.Println("No url found")
log.Fatal(err)
}
// Find each table
doc.Find("table").Each(func(index int, tablehtml *goquery.Selection) {
tablehtml.Find("tr").Each(func(indextr int, rowhtml *goquery.Selection) {
rowhtml.Find("th").Each(func(indexth int, tableheading *goquery.Selection) {
headings = append(headings, tableheading.Text())
})
rowhtml.Find("td").Each(func(indexth int, tablecell *goquery.Selection) {
row = append(row, tablecell.Text())
})
rows = append(rows, row)
row = nil
})
})
fmt.Println("####### headings = ", len(headings), headings)
fmt.Println("####### rows = ", len(rows), rows)
}
func main() {
goGet()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment