Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Generate .cbz comic books from readcomics.tv
#!/usr/bin/env python3
from lxml import html
import os
import requests
import shutil
import sys
from urllib import request
from zipfile import ZipFile
AGENT = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11'
TMP_DIR = '/tmp/comics'
if len(sys.argv) < 2:
print('You must specify a fullcomic.pro url')
exit()
url = sys.argv[1]
try:
page = requests.get(url)
except requests.exceptions.RequestException as e:
print('Error requesting page: ' + e)
exit()
tree = html.fromstring(page.content)
title = tree.find('.//title').text
# strip off the garbage at the end of the comic book title
title = title.split(' | ')[0]
# grab all the chapter images from the page
pages = tree.xpath('.//div[@id="imgPages"]')[0].xpath('.//img')
numPages = len(pages)
# no pages means no comic book for you!
if numPages <= 0:
print('Could not find any pages for %s' % title)
exit()
# create a tmp directory for storing pages
os.mkdir(TMP_DIR)
# download and zip up the pages
print('Processing %d pages for %s' % (numPages, title))
with ZipFile('%s.cbz' % title, 'w') as cbz:
for i in range(0, numPages):
page_name = pages[i].xpath('@alt')[0]
page_src = pages[i].xpath('@src')[0]
print('Fetching %s' % page_name)
file = '%s/%03d.jpg' % (TMP_DIR, (i + 1))
request.urlretrieve(page_src, file)
cbz.write(file)
# remove our tmp directory now that we are all done
shutil.rmtree(TMP_DIR)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment