Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save AngelFreak/46492efeba1b19d5dc3ac5d483d3ee8d to your computer and use it in GitHub Desktop.
Save AngelFreak/46492efeba1b19d5dc3ac5d483d3ee8d to your computer and use it in GitHub Desktop.
xkcd_scrape
// This gets todays XKCD and saves it to disk.
package main
import (
"io"
"log"
"net/http"
"os"
"regexp"
"strings"
"github.com/gocolly/colly/v2"
)
var getXkcdNumberRegex = regexp.MustCompile(`([0-9][0-9][0-9][0-9]?)`)
func main() {
// Instantiate default collector
c := colly.NewCollector(
// Allow requests only to xkcd.com
colly.AllowedDomains("xkcd.com"),
)
// Extract product details
c.OnHTML("#middleContainer", func(e *colly.HTMLElement) {
// Gets todays XKCD name, and format it.
xkcdName := strings.ToLower(e.ChildText("#ctitle"))
if strings.Contains(xkcdName, " ") {
xkcdName = strings.ReplaceAll(xkcdName, " ", "_")
}
// Gets todays XKCD number
xkcdNum := getXkcdNumberRegex.FindString(e.Text)
// Get todays XKCD image url, to download teh image later
xkcdImgUrl := "https:" + e.ChildAttr("#comic > img", "src")
//Get the response bytes from the url
response, err := http.Get(xkcdImgUrl)
if err != nil {
log.Fatal(err)
}
defer response.Body.Close()
// If we do not get a http styatus code 200 e.g ok. Fatal.
if response.StatusCode != 200 {
log.Fatal(err, "Received non 200 response code")
}
// Open a file for writing
file, err := os.Create("./" + xkcdName + "_" + xkcdNum + ".png")
if err != nil {
log.Fatal(err)
}
defer file.Close()
// Use io.Copy to just dump the response body to the file. This supports huge files
_, err = io.Copy(file, response.Body)
if err != nil {
log.Fatal(err)
}
})
c.Visit("https://xkcd.com/")
log.Println("Scraping finished")
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment