Skip to content

Instantly share code, notes, and snippets.

@apnorton
Created July 3, 2020 20:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save apnorton/6d7fb0e1ab7a6d031b24f35317142628 to your computer and use it in GitHub Desktop.
Save apnorton/6d7fb0e1ab7a6d031b24f35317142628 to your computer and use it in GitHub Desktop.
Tool for downloading zips of scanlations from jaiminisbox. Usage: python jaiminisbox_download.py [url_to_series_page]
import os
import sys
import logging
import requests
from functools import partial
from bs4 import BeautifulSoup
import multiprocessing
from multiprocessing import Pool
logging.basicConfig(level=logging.INFO)
# converts a row of the toc to a dictionary consisting of three
# values: title,url
# url is to the zip file
def row_to_dict(e):
url = e.find("a")["href"]
title = e.find("div", class_="title").text
return {"url":url,"title":title}
def download(url, base_filepath):
remainer = url.split("download", 1)[1]
metadata = list(filter(None, remainer.split("/")))
name = metadata[0]
language = metadata[1]
volume = int(metadata[2])
chapter = int(metadata[3])
if language != "en" and language != "en-us":
logging.warning("Not downloading %s as it is not English", url)
return
location = f"{name}/vol{volume:02}"
filename = f"chapter{chapter:03}.zip"
full_path = os.path.join(base_filepath, location, filename)
if (os.path.isfile(full_path)):
logging.debug("Skipping %s as it is already downloaded", url)
return
logging.info("Downloading %s: Vol. %d Chapt. %d", name, volume, chapter)
# ensure parent dirs are created
os.makedirs(os.path.join(base_filepath, location), exist_ok=True)
resp = requests.get(url, stream=True)
with open(full_path, "wb") as f:
for block in resp.iter_content(1024):
f.write(block)
# series_url: string for series base page (e.g. https://jaiminisbox.com/reader/series/solo-leveling/)
# returns true if successful
def download_series(series_url, base_filepath="."):
resp = requests.get(series_url)
if resp.status_code != 200:
logging.fatal("Failure to read series url")
return False
soup = BeautifulSoup(resp.text, "html.parser")
elements = soup.find_all("div", {"class":"element"})
chapter_urls = [row_to_dict(e)["url"] for e in elements]
# limit processes based on avail CPUs (see docs for os.cpu_count())
pool_sz = max(len(os.sched_getaffinity(0)) - 1, 1) * 2
logging.info("Creating a pool of size %d", pool_sz)
with Pool(pool_sz) as p:
p.map(partial(download, base_filepath=base_filepath), chapter_urls)
return True
if __name__ == "__main__":
download_series(sys.argv[1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment