Skip to content

Instantly share code, notes, and snippets.

@calebreister
Last active July 15, 2016 16:42
Show Gist options
  • Save calebreister/948c7e1a227802a3ab2515d0488636ad to your computer and use it in GitHub Desktop.
Save calebreister/948c7e1a227802a3ab2515d0488636ad to your computer and use it in GitHub Desktop.
Python script that downloads xkcd comics to basePath, prepends the comic number to the filename, and puts the alt text in 0-alt.txt. I recommend setting basePath to an absolute path.
#!/usr/bin/python
#Download xkcd comics by their index numbers (passed as arguments)
import sys
import re
import json
from urllib.request import *
basePath = '/home/caleb/Pictures/Comics/xkcd/'
#Read alt text file into memory
altFile = open(basePath+'0-alt.txt', encoding='utf-8')
altText = [line.strip() for line in altFile]
altFile.close()
sys.argv.pop(0) #remove argv[0] (name of script)
for comicNum in sys.argv:
#Skip duplicates
skip = False
for l in altText:
if (str(comicNum)+':' in l):
skip = True
if skip == True:
continue
#Download alt text
url = urlopen('http://xkcd.com/'+str(comicNum)+'/info.0.json')
data = json.loads(url.read().decode('utf-8'))
altText.append(str(comicNum)+': '+data['alt'])
#Download image
m = re.search('(?<=\/)[\w]+\.(jpg|png)', data['img'])
url = urlretrieve(data['img'], basePath+str(comicNum)+'-'+m.group())
#Write alt text file to disk
altFile = open(basePath+'0-alt.txt', 'w', encoding='utf-8')
for l in altText:
if l == '': continue
altFile.write(l+'\n\n')
altFile.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment