Skip to content

Instantly share code, notes, and snippets.

@Schnouki
Created August 13, 2010 14:49
Show Gist options
  • Save Schnouki/522998 to your computer and use it in GitHub Desktop.
Save Schnouki/522998 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
#
# This is manolodownload, a simple script that allows one to download comic
# books from the Manolosanctis website (http://www.manolosanctis.com/) and save
# them locally as a .cbz archive, readable by an application like Comix.
#
# This can only download comic books that are already readable on the
# Manolosanctis website (many of them being redistributable under the terms of a
# Creative Commons license), this is in no way intended to harm anybody's
# intellectual property or copyright. This is simply intended to make these
# amazing books accessible to people who do not want to use a Flash reader, or
# who would want to read them on a platform that is not supported by Flash (e.g.
# PowerPC, ARM, many mobile phones...).
#
# Copyright (c) 2010 Thomas Jost
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import httplib2
import os.path
import re
import sys
import xml.etree.ElementTree
import zipfile
VALID_ALBUM_URL = re.compile(r"^http://www.manolosanctis.com/bd/(\d+)/")
ALBUM_METADATA_URL = "http://www.manolosanctis.com/xml/album/{0}"
_http = httplib2.Http()
def handle_url(url):
# Is this a correct Manolosanctis URL?
m = VALID_ALBUM_URL.match(url)
if m is None:
print("Invalid URL: {0}", file=sys.stderr)
return
bd = m.group(1)
download_album(bd)
def download_album(bd):
md = get_album_metadata(bd)
if md is None:
return
print("Downloading \"{0}\" by {1} ({2} pages)...".format(md["title"], md["authors"], len(md["pages"])))
fn = "{1} - {0}.cbz".format(md["title"], md["authors"])
cbz = zipfile.ZipFile(fn, mode="w")
dl = 0
for n in range(len(md["pages"])):
url = md["pages"][n]
arcname = "{0:04d}.jpg".format(n+1)
img = get_album_page(url)
if img is None:
print("Could not download page {0}".format(n+1), file=sys.stderr)
else:
cbz.writestr(arcname, img)
dl += 1
print("Downloaded: [{0} / {1}]".format(n+1, len(md["pages"])), end="\r")
print("Downloaded {0} pages successfuly!".format(dl))
def get_album_metadata(bd):
print("Retrieving album metadata...")
# Read the metadata
response, content = _http.request(ALBUM_METADATA_URL.format(bd))
if response.status != 200:
print("Could not retrieve metadata: got HTTP code {0}.".format(response.status), file=sys.stderr)
return None
# Now parse the XML and extract useful stuff
tree = xml.etree.ElementTree.fromstring(content)
md = {}
md["title"] = tree.findtext("album/title")
md["authors"] = tree.findtext("album/authors")
pages = [elt.get("src") for elt in tree.findall("album/page")]
# Add the size in each url
md["pages"] = ["{0}_l{1}".format(*os.path.splitext(url)) for url in pages]
return md
def get_album_page(url):
response, content = _http.request(url)
if response.status != 200:
return None
else:
return content
if __name__ == '__main__':
if len(sys.argv) < 2:
print("Syntax: {0} url1 [url2 ...]".format(sys.argv[0]), file=sys.stderr)
sys.exit(1)
for url in sys.argv[1:]:
handle_url(url)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment