Last active
January 9, 2019 21:12
-
-
Save 0xD34D/dbe20110f8cff242c815f50c1707095e to your computer and use it in GitHub Desktop.
Generate .cbz comic books from readcomics.tv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from lxml import html | |
import os | |
import requests | |
import shutil | |
import sys | |
from urllib import request | |
from zipfile import ZipFile | |
AGENT = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11' | |
TMP_DIR = '/tmp/comics' | |
if len(sys.argv) < 2: | |
print('You must specify a fullcomic.pro url') | |
exit() | |
url = sys.argv[1] | |
try: | |
page = requests.get(url) | |
except requests.exceptions.RequestException as e: | |
print('Error requesting page: ' + e) | |
exit() | |
tree = html.fromstring(page.content) | |
title = tree.find('.//title').text | |
# strip off the garbage at the end of the comic book title | |
title = title.split(' | ')[0] | |
# grab all the chapter images from the page | |
pages = tree.xpath('.//div[@id="imgPages"]')[0].xpath('.//img') | |
numPages = len(pages) | |
# no pages means no comic book for you! | |
if numPages <= 0: | |
print('Could not find any pages for %s' % title) | |
exit() | |
# create a tmp directory for storing pages | |
os.mkdir(TMP_DIR) | |
# download and zip up the pages | |
print('Processing %d pages for %s' % (numPages, title)) | |
with ZipFile('%s.cbz' % title, 'w') as cbz: | |
for i in range(0, numPages): | |
page_name = pages[i].xpath('@alt')[0] | |
page_src = pages[i].xpath('@src')[0] | |
print('Fetching %s' % page_name) | |
file = '%s/%03d.jpg' % (TMP_DIR, (i + 1)) | |
request.urlretrieve(page_src, file) | |
cbz.write(file) | |
# remove our tmp directory now that we are all done | |
shutil.rmtree(TMP_DIR) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment