Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
package googlescraper
import (
"fmt"
"github.com/PuerkitoBio/goquery"
"net/http"
"strings"
)
type GoogleResult struct {
ResultRank int
ResultURL string
ResultTitle string
ResultDesc string
}
var googleDomains = map[string]string{
"com": "https://www.google.com/search?q=",
"uk": "https://www.google.co.uk/search?q=",
"ru": "https://www.google.ru/search?q=",
"fr": "https://www.google.fr/search?q=",
}
func buildGoogleUrl(searchTerm string, countryCode string, languageCode string) string {
searchTerm = strings.Trim(searchTerm, " ")
searchTerm = strings.Replace(searchTerm, " ", "+", -1)
if googleBase, found := googleDomains[countryCode]; found {
return fmt.Sprintf("%s%s&num=100&hl=%s", googleBase, searchTerm, languageCode)
} else {
return fmt.Sprintf("%s%s&num=100&hl=%s", googleDomains["com"], searchTerm, languageCode)
}
}
func googleRequest(searchURL string) (*http.Response, error) {
baseClient := &http.Client{}
req, _ := http.NewRequest("GET", searchURL, nil)
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36")
res, err := baseClient.Do(req)
if err != nil {
return nil, err
} else {
return res, nil
}
}
func googleResultParser(response *http.Response) ([]GoogleResult, error) {
doc, err := goquery.NewDocumentFromResponse(response)
if err != nil {
return nil, err
}
results := []GoogleResult{}
sel := doc.Find("div.g")
rank := 1
for i := range sel.Nodes {
item := sel.Eq(i)
linkTag := item.Find("a")
link, _ := linkTag.Attr("href")
titleTag := item.Find("h3.r")
descTag := item.Find("span.st")
desc := descTag.Text()
title := titleTag.Text()
link = strings.Trim(link, " ")
if link != "" && link != "#" {
result := GoogleResult{
rank,
link,
title,
desc,
}
results = append(results, result)
rank += 1
}
}
return results, err
}
func GoogleScrape(searchTerm string, countryCode string, languageCode string) ([]GoogleResult, error) {
googleUrl := buildGoogleUrl(searchTerm, countryCode, languageCode)
res, err := googleRequest(googleUrl)
if err != nil {
return nil, err
}
scrapes, err := googleResultParser(res)
if err != nil {
return nil, err
} else {
return scrapes, nil
}
}
@fedir

This comment has been minimized.

Copy link

commented Sep 12, 2019

Nice tiny script, thanks for the sharing.
It's seems, what the title tag has an another HTML structure, should be adjusted.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.