Skip to content

Instantly share code, notes, and snippets.

@vitaminac
Last active December 26, 2021 15:04
Show Gist options
  • Save vitaminac/ad16144e9f6afb49b042e4d0d2d28699 to your computer and use it in GitHub Desktop.
Save vitaminac/ad16144e9f6afb49b042e4d0d2d28699 to your computer and use it in GitHub Desktop.
狐妖小红娘漫画下载
import asyncio
import errno
import json
import os
import re
from base64 import b64decode
from re import compile
import aiohttp
import bs4
import execjs
BASE64_PATTERN = compile("packed=\"([^\"]+)\"")
SAVE_FILE = "saved.json"
HOST = "http://m.katui.net"
STATIC_CONTENT_SERVER = "http://katui.700mh.com/"
async def fetch(session, url):
async with session.get(url) as response:
return await response.text(encoding="GBK")
async def get_chapter_image_urls(session, chapter_url: str):
html = await fetch(session, chapter_url)
base64 = BASE64_PATTERN.search(html)[1]
decode = str(b64decode(base64), 'utf-8')
js = decode[4:]
arr = execjs.eval(js)
js_get: str = arr[14:]
return execjs.eval(js_get)
async def download_image(session, index, title, page, url):
url = STATIC_CONTENT_SERVER + url
filename = str(index) + "-" + title + "/" + str(page) + ".jpg"
filename = re.sub('[!?]', '_', filename)
if not os.path.exists(filename):
if not os.path.exists(os.path.dirname(filename)):
try:
os.makedirs(os.path.dirname(filename))
except OSError as exc: # Guard against race condition
if exc.errno != errno.EEXIST:
raise
async with session.get(url) as r:
with open(filename, "wb") as f:
f.write(await r.read())
async def collect_images(images, session, index, title, chapter_url, downloaded: list):
for page, url in enumerate(await get_chapter_image_urls(session, chapter_url)):
images.append(download_image(session, index, title, page, url))
downloaded.append(chapter_url)
async def main():
downloaded = None
with open(SAVE_FILE, encoding="UTF-8") as f:
downloaded = json.load(f)
async with aiohttp.ClientSession(headers={
"User-Agent": "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19"
}) as session:
chapters = []
images = []
html = await fetch(session, "http://m.katui.net/manhua/1/")
soup = bs4.BeautifulSoup(html, "html.parser")
chapter_links = list(soup.select("#c2 > dl > dt > a"))
chapter_links.reverse()
for index, chapter in enumerate(chapter_links):
title = chapter.text
chapter_url: str = HOST + chapter.attrs["href"]
if chapter_url not in downloaded:
chapters.append(collect_images(images, session,
index, title, chapter_url, downloaded))
if chapters:
await asyncio.wait(chapters)
if images:
await asyncio.wait(images)
with open(SAVE_FILE, "w", encoding="UTF-8") as f:
json.dump(downloaded, f)
if __name__ == '__main__':
loop = asyncio.get_event_loop()
loop.run_until_complete(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment