Skip to content

Instantly share code, notes, and snippets.

@Aaron1011
Created September 3, 2014 23:31
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Aaron1011/d3b56325881cd639506a to your computer and use it in GitHub Desktop.
Save Aaron1011/d3b56325881cd639506a to your computer and use it in GitHub Desktop.
xkcd 1416 - 'Pixels' image scraper
import requests
import os
JSON_BASE = 'http://c.xkcd.com/turtle/'
IMAGE_BASE = 'http://imgs.xkcd.com/turtledown/'
IMAGE_SUFFIX = '-tiled.png'
START = 'turtles'
IMAGE_DIR = 'imgs/'
if not os.path.exists(IMAGE_DIR):
os.mkdir(IMAGE_DIR)
seen = set()
def fetch_json(name):
return requests.get(JSON_BASE + name).json()
def fetch_image(name, directory):
if not os.path.exists(os.path.join(IMAGE_DIR, directory)):
os.mkdir(os.path.join(IMAGE_DIR, directory))
f = open(os.path.join(IMAGE_DIR, directory, name + IMAGE_SUFFIX), 'wb')
f.write(requests.get(os.path.join(IMAGE_BASE, name + IMAGE_SUFFIX)).content)
f.close()
def extract(seen, current):
for key in current:
for item in current[key]:
if item not in seen:
seen.add(item)
fetch_image(item, key)
extract(seen, fetch_json(item))
def main():
seen = set()
extract(seen, {'white': [START]})
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment