Skip to content

Instantly share code, notes, and snippets.

@janithl
Last active January 28, 2020 15:20
Show Gist options
  • Save janithl/a7c7369785c0b7b26c17f7a2673f8bea to your computer and use it in GitHub Desktop.
Save janithl/a7c7369785c0b7b26c17f7a2673f8bea to your computer and use it in GitHub Desktop.
package main
import (
"encoding/json"
"io/ioutil"
"net/http"
"os"
"regexp"
"time"
)
// Site type holds information about sites
type Site struct {
Name, URL, Version, TimeTaken string
}
var sites = []Site{
{"Site A", "https://a.web.site", "N/A", ""},
{"Site B", "https://b.web.site", "N/A", ""},
{"Site C", "https://c.web.site", "N/A", ""},
}
// Parser type holds information about the parsing
// A new instance of it is created on each scraping Goroutine
type Parser struct {
url, body string
}
func (p *Parser) setURL(url string) {
p.url = url
p.body = ""
}
// getMatches tries to match the given regex and return the value
func (p *Parser) getMatches(regexString string, index int) string {
regex, _ := regexp.Compile(regexString)
matches := regex.FindStringSubmatch(p.body)
if len(matches) == index+1 {
return matches[index]
}
return "N/A"
}
// getVersion simply gets and reads the page body
func (p *Parser) getVersion() string {
res, err := http.Get(p.url)
if err != nil {
return "Fetch Error"
}
body, err := ioutil.ReadAll(res.Body)
res.Body.Close()
if err != nil {
return "Parse Error"
}
p.body = string(body)
return p.getMatches("(<meta name=\"generator\" content=\")([^\"]+)", 2)
}
// getSiteVersion gets called in a loop, initialises a Parser, does the scrape,
// and does simple time keeping of how long a scrape takes
func getSiteVersion(key int, done chan<- bool) {
start := time.Now() // to check the time taken
site := Parser{}
site.setURL(sites[key].URL)
sites[key].Version = site.getVersion()
sites[key].TimeTaken = time.Since(start).String()
done <- true
}
func main() {
// We're simply using a channel called done to make sure all
// the Goroutines are done executing before we quit the application
done := make(chan bool)
for index := range sites {
go getSiteVersion(index, done)
}
for i := 0; i < len(sites); i++ {
<-done
}
// Finally, we output the sites slice as JSON
enc := json.NewEncoder(os.Stdout)
enc.Encode(sites)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment