Skip to content

Instantly share code, notes, and snippets.

@categulario
Created November 4, 2016 22:29
Show Gist options
  • Save categulario/fea2ff98e57a826ad3316a0d05a665d7 to your computer and use it in GitHub Desktop.
Save categulario/fea2ff98e57a826ad3316a0d05a665d7 to your computer and use it in GitHub Desktop.
Downloads comics from xkcd.com
import requests
import os
import re
def get_line(iterator, condition):
counter = 0
for line in iterator:
if condition(line):
return line
counter += 1
def write_to_index(number):
with open('index.txt', 'a') as indexfile:
indexfile.write(number+'\n')
if __name__ == '__main__':
response = requests.get('http://xkcd.com')
line = str(get_line(response.iter_lines(), lambda x:x.startswith(b'Permanent')))
max_comic_number = int(re.search(r'com/([0-9]+)', line).group(1))
print('Will download up to {} images'.format(max_comic_number))
if os.path.isfile('index.txt'):
with open('index.txt', 'r') as indexfile:
index = [line.strip() for line in indexfile]
else:
index = []
for i in range(1, max_comic_number):
number = str(i)
if number in index:
print('Skip already downloaded comic #{}'.format(i))
continue
print('Checking {}... '.format(i), end='', flush=True)
response = requests.get('http://xkcd.com/%d/'%i, timeout=2)
if response.status_code != 200:
print('Request failed...')
continue
line = get_line(response.iter_lines(), lambda x:x.startswith(b'Image'))
if line is None:
with open('response.html', 'w') as responsefile:
responsefile.write(response.text)
print('you should check response.html')
continue
image_url = str(line).split(' ')[-1][:-1]
image_name = image_url.split('/')[-1]
if not image_name:
print('This is a notable xkcd, view it in the browser')
continue
if os.path.isfile(image_name):
print('already exists, adding to index...'.format(i))
write_to_index(number)
continue
print('Downloading {}... '.format(image_url), end='')
image_response = requests.get(image_url)
with open(image_name, 'wb') as f:
f.write(image_response.content)
print ('Downloaded {}'.format(i))
write_to_index(number)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment