Skip to content

Instantly share code, notes, and snippets.

@nakabonne
Last active November 6, 2017 13:57
Show Gist options
  • Save nakabonne/106f2a1419c4d39e5ab947294b9af9d6 to your computer and use it in GitHub Desktop.
Save nakabonne/106f2a1419c4d39e5ab947294b9af9d6 to your computer and use it in GitHub Desktop.
func getSERPsURLs(baseURL *url.URL, depth int) (pages []*url.URL, err error) {
doc, err := getDoc(baseURL)
i := 0
doc.Find("#nav").Each(func(_ int, table *goquery.Selection) {
table.Find("tbody").Each(func(_ int, trs *goquery.Selection) {
trs.Find("tr").Each(func(_ int, tds *goquery.Selection) {
tds.Find("td").Each(func(_ int, srg *goquery.Selection) {
srg.Find("a").Each(func(_ int, s *goquery.Selection) {
if i >= depth-1 {
return
}
href, exists := s.Attr("href")
if exists {
nextURL, _ := baseURL.Parse(href)
pages = append(pages, nextURL)
i++
} else {
err = errors.New("failed to retrieve the search result page")
return
}
})
})
})
})
})
return
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment