Created
July 28, 2021 10:46
-
-
Save AngelFreak/46492efeba1b19d5dc3ac5d483d3ee8d to your computer and use it in GitHub Desktop.
xkcd_scrape
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// This gets todays XKCD and saves it to disk. | |
package main | |
import ( | |
"io" | |
"log" | |
"net/http" | |
"os" | |
"regexp" | |
"strings" | |
"github.com/gocolly/colly/v2" | |
) | |
var getXkcdNumberRegex = regexp.MustCompile(`([0-9][0-9][0-9][0-9]?)`) | |
func main() { | |
// Instantiate default collector | |
c := colly.NewCollector( | |
// Allow requests only to xkcd.com | |
colly.AllowedDomains("xkcd.com"), | |
) | |
// Extract product details | |
c.OnHTML("#middleContainer", func(e *colly.HTMLElement) { | |
// Gets todays XKCD name, and format it. | |
xkcdName := strings.ToLower(e.ChildText("#ctitle")) | |
if strings.Contains(xkcdName, " ") { | |
xkcdName = strings.ReplaceAll(xkcdName, " ", "_") | |
} | |
// Gets todays XKCD number | |
xkcdNum := getXkcdNumberRegex.FindString(e.Text) | |
// Get todays XKCD image url, to download teh image later | |
xkcdImgUrl := "https:" + e.ChildAttr("#comic > img", "src") | |
//Get the response bytes from the url | |
response, err := http.Get(xkcdImgUrl) | |
if err != nil { | |
log.Fatal(err) | |
} | |
defer response.Body.Close() | |
// If we do not get a http styatus code 200 e.g ok. Fatal. | |
if response.StatusCode != 200 { | |
log.Fatal(err, "Received non 200 response code") | |
} | |
// Open a file for writing | |
file, err := os.Create("./" + xkcdName + "_" + xkcdNum + ".png") | |
if err != nil { | |
log.Fatal(err) | |
} | |
defer file.Close() | |
// Use io.Copy to just dump the response body to the file. This supports huge files | |
_, err = io.Copy(file, response.Body) | |
if err != nil { | |
log.Fatal(err) | |
} | |
}) | |
c.Visit("https://xkcd.com/") | |
log.Println("Scraping finished") | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment