sharno/Download Calvin and Hobbes.py

## Download Calvin and Hobbes.py
import urllib2
import urllib
from bs4 import BeautifulSoup

url = "http://www.gocomics.com/calvinandhobbes/1985/11/18"
domain = "http://www.gocomics.com"

def make_soup(url):
	hdr = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
       'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
       'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
       'Accept-Encoding': 'none',
       'Accept-Language': 'en-US,en;q=0.8',
       'Connection': 'keep-alive'}
	request = urllib2.Request(url, headers=hdr)
	html = urllib2.urlopen(request).read()
	return BeautifulSoup(html, "lxml")

def get_image(soup, url):
	images = [img for img in soup.findAll('img', {'class': 'strip', 'width': None})]
	print (str(len(images)) + "images found.")

	if not images:
		print('high resolution not found, downloading lower resolution')
		images = [img for img in soup.findAll('img', {'class': 'strip'})]

	link = images[0].get('src')
	filename = url[-10:].replace('/', '-') + '.gif'
	urllib.urlretrieve(link, filename)
	return link

def next_page(soup):
	next_url = [n.get('href') for n in soup.findAll('a', {'class': 'next'}) if 'calvinandhobbes' in n.get('href')]
	return next_url[0]


soup = make_soup(url)
get_image(soup, url)
n = next_page(soup)
while n:
	soup = make_soup(url)
	get_image(soup, url)
	n = next_page(soup)
	url = domain + n;
	print(url)
	import urllib2
	import urllib
	from bs4 import BeautifulSoup

	url = "http://www.gocomics.com/calvinandhobbes/1985/11/18"
	domain = "http://www.gocomics.com"

	def make_soup(url):
	hdr = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8',
	'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
	'Accept-Encoding': 'none',
	'Accept-Language': 'en-US,en;q=0.8',
	'Connection': 'keep-alive'}
	request = urllib2.Request(url, headers=hdr)
	html = urllib2.urlopen(request).read()
	return BeautifulSoup(html, "lxml")

	def get_image(soup, url):
	images = [img for img in soup.findAll('img', {'class': 'strip', 'width': None})]
	print (str(len(images)) + "images found.")

	if not images:
	print('high resolution not found, downloading lower resolution')
	images = [img for img in soup.findAll('img', {'class': 'strip'})]

	link = images[0].get('src')
	filename = url[-10:].replace('/', '-') + '.gif'
	urllib.urlretrieve(link, filename)
	return link

	def next_page(soup):
	next_url = [n.get('href') for n in soup.findAll('a', {'class': 'next'}) if 'calvinandhobbes' in n.get('href')]
	return next_url[0]


	soup = make_soup(url)
	get_image(soup, url)
	n = next_page(soup)
	while n:
	soup = make_soup(url)
	get_image(soup, url)
	n = next_page(soup)
	url = domain + n;
	print(url)