Skip to content

Instantly share code, notes, and snippets.

@darkcores
Created August 9, 2019 19:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save darkcores/40a155d15335e1b462f680d95f6a1df0 to your computer and use it in GitHub Desktop.
Save darkcores/40a155d15335e1b462f680d95f6a1df0 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import requests
from lxml import html
import sys
import os
import glob
import zipfile
class MgkaDL:
def __init__(self, url):
self.session = requests.Session()
self.saved_urls = []
self.url = url
self.load_known()
if self.url == 'update':
print("Updating all ...")
else:
if url not in self.saved_urls:
print("Downloading new url")
f = open("mgkadl_save.txt", "a")
f.write("%s\n" % url)
f.close()
self.set_url(url)
def set_url(self, url):
self.url = url
self.title = url.split('/')[-1]
print('Downloading %s' % self.title)
self.dlpath = 'mgkadl/%s' % self.title
if not os.path.exists(self.dlpath):
os.makedirs(self.dlpath)
def load_known(self):
try:
f = open("mgkadl_save.txt", "r")
for line in f:
self.saved_urls.append(line[:-1])
print("Loaded %d urls" % len(self.saved_urls))
except FileNotFoundError:
print("No saved urls found")
def get_chapters(self):
r = self.session.get(self.url)
if r.status_code is not 200:
print('Error retrieving page: %d' % r.status_code)
print(self.url)
sys.exit(1)
tree = html.fromstring(r.content)
self.urls = []
l = tree.find_class('chapter-list')[0]
for x in l:
self.urls.append(x[0][0].get('href'))
self.urls = self.urls[::-1]
def get_image(self, url, zfile):
r = self.session.get(url, stream=True)
filename = url.split('/')[-1]
print(filename, end='')
if r.status_code is not 200:
print('Error retrieving %s: %d' % (filename, r.status_code))
print(url)
sys.exit(1)
zfile.writestr(filename, r.raw.read())
def get_images(self, url):
r = self.session.get(url)
if r.status_code is not 200:
print('Error retrieving page: %d' % r.status_code)
print(url)
sys.exit(1)
tree = html.fromstring(r.content)
l = tree.get_element_by_id('vungdoc')
i = 1
c = len(l)
chapter = url.split('/')[-1]
zfile = zipfile.ZipFile('%s/%s.cbz' % (self.dlpath, chapter), mode='w')
for x in l:
if x.tag == 'img':
print("\r%3d / %3d : " % (i, c), end='')
self.get_image(x.get('src'), zfile)
i += 1
print()
def download(self):
if self.url == 'update':
for url in self.saved_urls:
print("Updating from %s" % url)
self.set_url(url)
self.__download()
else:
self.__download()
def __download(self):
self.get_chapters()
print("Available chapters: %d" % len(self.urls))
downloaded = len(glob.glob('%s/*.cbz' % self.dlpath))
print("Counted in downloaded: %d" % downloaded)
for url in self.urls[downloaded:]:
print("Chapter: %d" % (downloaded + 1))
self.get_images(url)
downloaded += 1
if len(sys.argv) != 2:
print("Usage: %s [url|update]" % sys.argv[0])
sys.exit(1)
dl = MgkaDL(sys.argv[1])
dl.download()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment