Created
August 9, 2019 19:55
-
-
Save darkcores/40a155d15335e1b462f680d95f6a1df0 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import requests | |
from lxml import html | |
import sys | |
import os | |
import glob | |
import zipfile | |
class MgkaDL: | |
def __init__(self, url): | |
self.session = requests.Session() | |
self.saved_urls = [] | |
self.url = url | |
self.load_known() | |
if self.url == 'update': | |
print("Updating all ...") | |
else: | |
if url not in self.saved_urls: | |
print("Downloading new url") | |
f = open("mgkadl_save.txt", "a") | |
f.write("%s\n" % url) | |
f.close() | |
self.set_url(url) | |
def set_url(self, url): | |
self.url = url | |
self.title = url.split('/')[-1] | |
print('Downloading %s' % self.title) | |
self.dlpath = 'mgkadl/%s' % self.title | |
if not os.path.exists(self.dlpath): | |
os.makedirs(self.dlpath) | |
def load_known(self): | |
try: | |
f = open("mgkadl_save.txt", "r") | |
for line in f: | |
self.saved_urls.append(line[:-1]) | |
print("Loaded %d urls" % len(self.saved_urls)) | |
except FileNotFoundError: | |
print("No saved urls found") | |
def get_chapters(self): | |
r = self.session.get(self.url) | |
if r.status_code is not 200: | |
print('Error retrieving page: %d' % r.status_code) | |
print(self.url) | |
sys.exit(1) | |
tree = html.fromstring(r.content) | |
self.urls = [] | |
l = tree.find_class('chapter-list')[0] | |
for x in l: | |
self.urls.append(x[0][0].get('href')) | |
self.urls = self.urls[::-1] | |
def get_image(self, url, zfile): | |
r = self.session.get(url, stream=True) | |
filename = url.split('/')[-1] | |
print(filename, end='') | |
if r.status_code is not 200: | |
print('Error retrieving %s: %d' % (filename, r.status_code)) | |
print(url) | |
sys.exit(1) | |
zfile.writestr(filename, r.raw.read()) | |
def get_images(self, url): | |
r = self.session.get(url) | |
if r.status_code is not 200: | |
print('Error retrieving page: %d' % r.status_code) | |
print(url) | |
sys.exit(1) | |
tree = html.fromstring(r.content) | |
l = tree.get_element_by_id('vungdoc') | |
i = 1 | |
c = len(l) | |
chapter = url.split('/')[-1] | |
zfile = zipfile.ZipFile('%s/%s.cbz' % (self.dlpath, chapter), mode='w') | |
for x in l: | |
if x.tag == 'img': | |
print("\r%3d / %3d : " % (i, c), end='') | |
self.get_image(x.get('src'), zfile) | |
i += 1 | |
print() | |
def download(self): | |
if self.url == 'update': | |
for url in self.saved_urls: | |
print("Updating from %s" % url) | |
self.set_url(url) | |
self.__download() | |
else: | |
self.__download() | |
def __download(self): | |
self.get_chapters() | |
print("Available chapters: %d" % len(self.urls)) | |
downloaded = len(glob.glob('%s/*.cbz' % self.dlpath)) | |
print("Counted in downloaded: %d" % downloaded) | |
for url in self.urls[downloaded:]: | |
print("Chapter: %d" % (downloaded + 1)) | |
self.get_images(url) | |
downloaded += 1 | |
if len(sys.argv) != 2: | |
print("Usage: %s [url|update]" % sys.argv[0]) | |
sys.exit(1) | |
dl = MgkaDL(sys.argv[1]) | |
dl.download() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment