0xD34D/generate_comic_book.py

## generate_comic_book.py
#!/usr/bin/env python3
from lxml import html
import os
import requests
import shutil
import sys
from urllib import request
from zipfile import ZipFile

AGENT = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11'
TMP_DIR = '/tmp/comics'


if len(sys.argv) < 2:
    print('You must specify a fullcomic.pro url')
    exit()

url = sys.argv[1]
try:
    page = requests.get(url)
except requests.exceptions.RequestException as e:
    print('Error requesting page: ' + e)
    exit()


tree = html.fromstring(page.content)
title = tree.find('.//title').text
# strip off the garbage at the end of the comic book title
title = title.split(' | ')[0]
# grab all the chapter images from the page
pages = tree.xpath('.//div[@id="imgPages"]')[0].xpath('.//img')
numPages = len(pages)
# no pages means no comic book for you!
if numPages <= 0:
    print('Could not find any pages for %s' % title)
    exit()

# create a tmp directory for storing pages
os.mkdir(TMP_DIR)

# download and zip up the pages
print('Processing %d pages for %s' % (numPages, title))
with ZipFile('%s.cbz' % title, 'w') as cbz:
    for i in range(0, numPages):
        page_name = pages[i].xpath('@alt')[0]
        page_src = pages[i].xpath('@src')[0]
        print('Fetching %s' % page_name)
        file = '%s/%03d.jpg' % (TMP_DIR, (i + 1))
        request.urlretrieve(page_src, file)
        cbz.write(file)

# remove our tmp directory now that we are all done
shutil.rmtree(TMP_DIR)
	#!/usr/bin/env python3
	from lxml import html
	import os
	import requests
	import shutil
	import sys
	from urllib import request
	from zipfile import ZipFile

	AGENT = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11'
	TMP_DIR = '/tmp/comics'


	if len(sys.argv) < 2:
	print('You must specify a fullcomic.pro url')
	exit()

	url = sys.argv[1]
	try:
	page = requests.get(url)
	except requests.exceptions.RequestException as e:
	print('Error requesting page: ' + e)
	exit()


	tree = html.fromstring(page.content)
	title = tree.find('.//title').text
	# strip off the garbage at the end of the comic book title
	title = title.split(' \| ')[0]
	# grab all the chapter images from the page
	pages = tree.xpath('.//div[@id="imgPages"]')[0].xpath('.//img')
	numPages = len(pages)
	# no pages means no comic book for you!
	if numPages <= 0:
	print('Could not find any pages for %s' % title)
	exit()

	# create a tmp directory for storing pages
	os.mkdir(TMP_DIR)

	# download and zip up the pages
	print('Processing %d pages for %s' % (numPages, title))
	with ZipFile('%s.cbz' % title, 'w') as cbz:
	for i in range(0, numPages):
	page_name = pages[i].xpath('@alt')[0]
	page_src = pages[i].xpath('@src')[0]
	print('Fetching %s' % page_name)
	file = '%s/%03d.jpg' % (TMP_DIR, (i + 1))
	request.urlretrieve(page_src, file)
	cbz.write(file)

	# remove our tmp directory now that we are all done
	shutil.rmtree(TMP_DIR)