categulario/xkcd.py

## xkcd.py
import requests
import os
import re

def get_line(iterator, condition):
    counter = 0
    for line in iterator:
        if condition(line):
            return line
        counter += 1

def write_to_index(number):
        with open('index.txt', 'a') as indexfile:
            indexfile.write(number+'\n')

if __name__ == '__main__':
    response = requests.get('http://xkcd.com')
    line = str(get_line(response.iter_lines(), lambda x:x.startswith(b'Permanent')))
    max_comic_number = int(re.search(r'com/([0-9]+)', line).group(1))

    print('Will download up to {} images'.format(max_comic_number))

    if os.path.isfile('index.txt'):
        with open('index.txt', 'r') as indexfile:
            index = [line.strip() for line in indexfile]
    else:
        index = []

    for i in range(1, max_comic_number):
        number = str(i)
        if number in index:
            print('Skip already downloaded comic #{}'.format(i))
            continue

        print('Checking {}... '.format(i), end='', flush=True)
        response = requests.get('http://xkcd.com/%d/'%i, timeout=2)

        if response.status_code != 200:
            print('Request failed...')
            continue

        line = get_line(response.iter_lines(), lambda x:x.startswith(b'Image'))

        if line is None:
            with open('response.html', 'w') as responsefile:
                responsefile.write(response.text)
            print('you should check response.html')
            continue

        image_url  = str(line).split(' ')[-1][:-1]
        image_name = image_url.split('/')[-1]

        if not image_name:
            print('This is a notable xkcd, view it in the browser')
            continue

        if os.path.isfile(image_name):
            print('already exists, adding to index...'.format(i))
            write_to_index(number)
            continue

        print('Downloading {}... '.format(image_url), end='')
        image_response = requests.get(image_url)

        with open(image_name, 'wb') as f:
            f.write(image_response.content)

            print ('Downloaded {}'.format(i))

        write_to_index(number)
	import requests
	import os
	import re

	def get_line(iterator, condition):
	counter = 0
	for line in iterator:
	if condition(line):
	return line
	counter += 1

	def write_to_index(number):
	with open('index.txt', 'a') as indexfile:
	indexfile.write(number+'\n')

	if __name__ == '__main__':
	response = requests.get('http://xkcd.com')
	line = str(get_line(response.iter_lines(), lambda x:x.startswith(b'Permanent')))
	max_comic_number = int(re.search(r'com/([0-9]+)', line).group(1))

	print('Will download up to {} images'.format(max_comic_number))

	if os.path.isfile('index.txt'):
	with open('index.txt', 'r') as indexfile:
	index = [line.strip() for line in indexfile]
	else:
	index = []

	for i in range(1, max_comic_number):
	number = str(i)
	if number in index:
	print('Skip already downloaded comic #{}'.format(i))
	continue

	print('Checking {}... '.format(i), end='', flush=True)
	response = requests.get('http://xkcd.com/%d/'%i, timeout=2)

	if response.status_code != 200:
	print('Request failed...')
	continue

	line = get_line(response.iter_lines(), lambda x:x.startswith(b'Image'))

	if line is None:
	with open('response.html', 'w') as responsefile:
	responsefile.write(response.text)
	print('you should check response.html')
	continue

	image_url = str(line).split(' ')[-1][:-1]
	image_name = image_url.split('/')[-1]

	if not image_name:
	print('This is a notable xkcd, view it in the browser')
	continue

	if os.path.isfile(image_name):
	print('already exists, adding to index...'.format(i))
	write_to_index(number)
	continue

	print('Downloading {}... '.format(image_url), end='')
	image_response = requests.get(image_url)

	with open(image_name, 'wb') as f:
	f.write(image_response.content)

	print ('Downloaded {}'.format(i))

	write_to_index(number)