oneamitj/hellocomics.py

## hellocomics.py
#!/usr/bin/python3

import urllib.request, urllib.error, urllib.parse
from pdb import set_trace
from bs4 import BeautifulSoup
import os

url = input('Enter url from hellocomic.com: ')

# url = 'http://www.hellocomic.com/miles-morales-ultimate-spider-man/c1/p1'

# hdr = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
#        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
#        'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
#        'Accept-Encoding': 'none',
#        'Accept-Language': 'en-US,en;q=0.8',
#        'Connection': 'keep-alive'}

user_agent = 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.472.63 Safari/534.3'

headers = { 'User-Agent' : user_agent }

req = urllib.request.Request(url, None, headers)
response = urllib.request.urlopen(req)

soup_page = BeautifulSoup(response.read())

chapters = []

for chapter in soup_page.find(id ='e2').find_all('option'):
    chapters.append(chapter.get('value'))


for chapter in chapters:

	html_page = BeautifulSoup(urllib.request.urlopen(urllib.request.Request(chapter, None, headers)).read())

	pages = range(len(html_page.find(id='e1').find_all('option')))

	# set_trace()
	folder = html_page.title.string.split(' - Read')[0]

	os.system('mkdir -p "{}"'.format(folder))

	print('\nDownloading {}\n\tPages'.format(folder))

	for page in pages:
		img_url = html_page.find(attrs={'class':'coverIssue'}).find('img').get('src')

		current_page = html_page.title.string.split(' - Page #')[1]

		print("	==> {}".format(current_page), end=' ')
		os.system('wget -q -nc -c "{}" -O "{}"'.format(img_url, folder+'/'+current_page+'.jpg'))
		print("✓")

		# urllib.request.urlretrieve(img_url, img_url[70:])

		nxt_page = html_page.find(attrs={'class':'coverIssue'}).a.get('href')

		html_page = BeautifulSoup(urllib.request.urlopen(urllib.request.Request(nxt_page, None, headers)).read())
	#!/usr/bin/python3

	import urllib.request, urllib.error, urllib.parse
	from pdb import set_trace
	from bs4 import BeautifulSoup
	import os

	url = input('Enter url from hellocomic.com: ')

	# url = 'http://www.hellocomic.com/miles-morales-ultimate-spider-man/c1/p1'

	# hdr = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
	# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8',
	# 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
	# 'Accept-Encoding': 'none',
	# 'Accept-Language': 'en-US,en;q=0.8',
	# 'Connection': 'keep-alive'}

	user_agent = 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.472.63 Safari/534.3'

	headers = { 'User-Agent' : user_agent }

	req = urllib.request.Request(url, None, headers)
	response = urllib.request.urlopen(req)

	soup_page = BeautifulSoup(response.read())

	chapters = []

	for chapter in soup_page.find(id ='e2').find_all('option'):
	chapters.append(chapter.get('value'))


	for chapter in chapters:

	html_page = BeautifulSoup(urllib.request.urlopen(urllib.request.Request(chapter, None, headers)).read())

	pages = range(len(html_page.find(id='e1').find_all('option')))

	# set_trace()
	folder = html_page.title.string.split(' - Read')[0]

	os.system('mkdir -p "{}"'.format(folder))

	print('\nDownloading {}\n\tPages'.format(folder))

	for page in pages:
	img_url = html_page.find(attrs={'class':'coverIssue'}).find('img').get('src')

	current_page = html_page.title.string.split(' - Page #')[1]

	print(" ==> {}".format(current_page), end=' ')
	os.system('wget -q -nc -c "{}" -O "{}"'.format(img_url, folder+'/'+current_page+'.jpg'))
	print("✓")

	# urllib.request.urlretrieve(img_url, img_url[70:])

	nxt_page = html_page.find(attrs={'class':'coverIssue'}).a.get('href')

	html_page = BeautifulSoup(urllib.request.urlopen(urllib.request.Request(nxt_page, None, headers)).read())