Last active
September 29, 2018 16:55
-
-
Save omirobarcelo/b7362447ed49ac9881e6ad54c4199d66 to your computer and use it in GitHub Desktop.
Script to download and organize all the pages of a chapter in MangaTown
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import os | |
import urllib2 | |
if len(sys.argv) < 3: | |
print 'usage: python py_mangatown_download.py <url_of_chapter_first_img> <dest_folder>' | |
exit() | |
dirs = sys.argv[2].split('/') | |
for dir in dirs: | |
if not os.path.exists(dir): | |
os.makedirs(dir) | |
os.chdir(dir) | |
page = 1 | |
pages_left = True | |
html_url = sys.argv[1] | |
while pages_left: | |
try: | |
response = urllib2.urlopen(html_url) | |
html = response.read() | |
lines = html.split('\n') | |
content = lines[CONTENT_LINE].split()[CONTENT_TAG] | |
url = content.split('\"')[CONTENT_URL] | |
response = urllib2.urlopen(url) | |
img = response.read() | |
# Get file name magic! | |
elem = url.split('/') | |
fname = elem[FNAME_ELEM].split('?')[FNAME_NAME] | |
fout = open(fname, 'w') | |
fout.write(img) | |
fout.close() | |
page += 1 | |
# Next URL | |
elem = html_url.split('/') | |
elem[-1] = str(page) + ".html" | |
html_url = '/'.join(elem) | |
except urllib2.HTTPError: | |
pages_left = False |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment