Skip to content

Instantly share code, notes, and snippets.

@merrypuck
Created April 6, 2016 21:11
Show Gist options
  • Save merrypuck/621b95140fdb8481936cc79a559b0060 to your computer and use it in GitHub Desktop.
Save merrypuck/621b95140fdb8481936cc79a559b0060 to your computer and use it in GitHub Desktop.
from selenium import webdriver
import urllib
import os
import shutil
driver = webdriver.Chrome("./chromedriver")
# if not os.path.exists("xkcd"):
# os.mkdir("xkcd")
# else:
# shutil.rmtree('xkcd')
# os.mkdir("xkcd")
for num in range(1, 1647):
driver.get("http://xkcd.com/" + str(num))
data = driver.execute_script("""
var comic = document.getElementById("comic");
var title = document.getElementById("ctitle").textContent;
var img = document.getElementById("comic");
var alt = img.title;
var src = img.src;
if(!src) {
for(var i = 0; i < comic.children.length; i++) {
console.log("IMG");
if(comic.children[i].tagName === "IMG") {
var src = comic.children[i].src;
}
}
if(!src) {
var src = comic.children[0].children[0].src;
}
}
return [title, src, alt]
""")
title = data[0]
title = title.replace("/", "_")
src = data[1]
alt = data[2]
urllib.urlretrieve(src, "xkcd8/" + str(num) + "." + title + ".png")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment