APadierna/download_xkcd_strips.py

## download_xkcd_strips.py
#!/usr/bin/env python
"""
Download all the comic strips listed in the XKCD archive page into the current
directory.
"""

from __future__ import print_function

import re
import sys
import urllib

# for backwards compatibility
if sys.version_info[0] > 2:
    import urllib.request as ul
else:
    import urllib as ul


def main():
    get_xkcd_strips()


def get_xkcd_strips():
    """
    Connect to the XKCD index page and return a dict with all the available
    comic strips and its URL.
    """
    xkdc_index=str(ul.urlopen('http://xkcd.com/archive/').read())

    xkcd_strip_pattern = '<a href="/(?P<strip_id>\d+)/" title="(?P<date>[\d-]+)">(?P<title>[\w\s]+)</a><br/>'

    xkcd_strips_url = re.findall(xkcd_strip_pattern, xkdc_index)

    for xkcd_strip in xkcd_strips_url:
        get_comic_image('_'.join(xkcd_strip), 'http://xkcd.com/'+xkcd_strip[0])


def get_comic_image(comic_name, comic_url):
    print('--\nchecking:', comic_url)
    html=str(ul.urlopen(comic_url).read())
    strip_pattern = 'http://imgs\.xkcd\.com/comics/[\w\d\.]+'

    image_url = re.search(strip_pattern, html)
    if image_url:
        print('downloading:', image_url.group())
        try:
            ul.urlretrieve(image_url.group(), comic_name+'.png')
        except IOError:
            print('Error: Unable to obtain image from:', comic_url.group())


if __name__ == '__main__':
    main()
	#!/usr/bin/env python
	"""
	Download all the comic strips listed in the XKCD archive page into the current
	directory.
	"""

	from __future__ import print_function

	import re
	import sys
	import urllib

	# for backwards compatibility
	if sys.version_info[0] > 2:
	import urllib.request as ul
	else:
	import urllib as ul


	def main():
	get_xkcd_strips()


	def get_xkcd_strips():
	"""
	Connect to the XKCD index page and return a dict with all the available
	comic strips and its URL.
	"""
	xkdc_index=str(ul.urlopen('http://xkcd.com/archive/').read())

	xkcd_strip_pattern = '<a href="/(?P<strip_id>\d+)/" title="(?P<date>[\d-]+)">(?P<title>[\w\s]+)</a><br/>'

	xkcd_strips_url = re.findall(xkcd_strip_pattern, xkdc_index)

	for xkcd_strip in xkcd_strips_url:
	get_comic_image('_'.join(xkcd_strip), 'http://xkcd.com/'+xkcd_strip[0])


	def get_comic_image(comic_name, comic_url):
	print('--\nchecking:', comic_url)
	html=str(ul.urlopen(comic_url).read())
	strip_pattern = 'http://imgs\.xkcd\.com/comics/[\w\d\.]+'

	image_url = re.search(strip_pattern, html)
	if image_url:
	print('downloading:', image_url.group())
	try:
	ul.urlretrieve(image_url.group(), comic_name+'.png')
	except IOError:
	print('Error: Unable to obtain image from:', comic_url.group())


	if __name__ == '__main__':
	main()