vitaminac/狐妖小红娘漫画下载.py

## 狐妖小红娘漫画下载.py
import asyncio
import errno
import json
import os
import re
from base64 import b64decode
from re import compile

import aiohttp
import bs4
import execjs

BASE64_PATTERN = compile("packed=\"([^\"]+)\"")
SAVE_FILE = "saved.json"
HOST = "http://m.katui.net"
STATIC_CONTENT_SERVER = "http://katui.700mh.com/"


async def fetch(session, url):
    async with session.get(url) as response:
        return await response.text(encoding="GBK")


async def get_chapter_image_urls(session, chapter_url: str):
    html = await fetch(session, chapter_url)
    base64 = BASE64_PATTERN.search(html)[1]
    decode = str(b64decode(base64), 'utf-8')
    js = decode[4:]
    arr = execjs.eval(js)
    js_get: str = arr[14:]
    return execjs.eval(js_get)


async def download_image(session, index, title, page, url):
    url = STATIC_CONTENT_SERVER + url
    filename = str(index) + "-" + title + "/" + str(page) + ".jpg"
    filename = re.sub('[!?]', '_', filename)
    if not os.path.exists(filename):
        if not os.path.exists(os.path.dirname(filename)):
            try:
                os.makedirs(os.path.dirname(filename))
            except OSError as exc:  # Guard against race condition
                if exc.errno != errno.EEXIST:
                    raise
        async with session.get(url) as r:
            with open(filename, "wb") as f:
                f.write(await r.read())


async def collect_images(images, session, index, title, chapter_url, downloaded: list):
    for page, url in enumerate(await get_chapter_image_urls(session, chapter_url)):
        images.append(download_image(session, index, title, page, url))
    downloaded.append(chapter_url)


async def main():
    downloaded = None
    with open(SAVE_FILE, encoding="UTF-8") as f:
        downloaded = json.load(f)
    async with aiohttp.ClientSession(headers={
        "User-Agent": "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19"
    }) as session:
        chapters = []
        images = []
        html = await fetch(session, "http://m.katui.net/manhua/1/")
        soup = bs4.BeautifulSoup(html, "html.parser")
        chapter_links = list(soup.select("#c2 > dl > dt > a"))
        chapter_links.reverse()
        for index, chapter in enumerate(chapter_links):
            title = chapter.text
            chapter_url: str = HOST + chapter.attrs["href"]
            if chapter_url not in downloaded:
                chapters.append(collect_images(images, session,
                                               index, title, chapter_url, downloaded))
        if chapters:
            await asyncio.wait(chapters)
            if images:
                await asyncio.wait(images)
            with open(SAVE_FILE, "w", encoding="UTF-8") as f:
                json.dump(downloaded, f)


if __name__ == '__main__':
    loop = asyncio.get_event_loop()
    loop.run_until_complete(main())
	import asyncio
	import errno
	import json
	import os
	import re
	from base64 import b64decode
	from re import compile

	import aiohttp
	import bs4
	import execjs

	BASE64_PATTERN = compile("packed=\"([^\"]+)\"")
	SAVE_FILE = "saved.json"
	HOST = "http://m.katui.net"
	STATIC_CONTENT_SERVER = "http://katui.700mh.com/"


	async def fetch(session, url):
	async with session.get(url) as response:
	return await response.text(encoding="GBK")


	async def get_chapter_image_urls(session, chapter_url: str):
	html = await fetch(session, chapter_url)
	base64 = BASE64_PATTERN.search(html)[1]
	decode = str(b64decode(base64), 'utf-8')
	js = decode[4:]
	arr = execjs.eval(js)
	js_get: str = arr[14:]
	return execjs.eval(js_get)


	async def download_image(session, index, title, page, url):
	url = STATIC_CONTENT_SERVER + url
	filename = str(index) + "-" + title + "/" + str(page) + ".jpg"
	filename = re.sub('[!?]', '_', filename)
	if not os.path.exists(filename):
	if not os.path.exists(os.path.dirname(filename)):
	try:
	os.makedirs(os.path.dirname(filename))
	except OSError as exc: # Guard against race condition
	if exc.errno != errno.EEXIST:
	raise
	async with session.get(url) as r:
	with open(filename, "wb") as f:
	f.write(await r.read())


	async def collect_images(images, session, index, title, chapter_url, downloaded: list):
	for page, url in enumerate(await get_chapter_image_urls(session, chapter_url)):
	images.append(download_image(session, index, title, page, url))
	downloaded.append(chapter_url)


	async def main():
	downloaded = None
	with open(SAVE_FILE, encoding="UTF-8") as f:
	downloaded = json.load(f)
	async with aiohttp.ClientSession(headers={
	"User-Agent": "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19"
	}) as session:
	chapters = []
	images = []
	html = await fetch(session, "http://m.katui.net/manhua/1/")
	soup = bs4.BeautifulSoup(html, "html.parser")
	chapter_links = list(soup.select("#c2 > dl > dt > a"))
	chapter_links.reverse()
	for index, chapter in enumerate(chapter_links):
	title = chapter.text
	chapter_url: str = HOST + chapter.attrs["href"]
	if chapter_url not in downloaded:
	chapters.append(collect_images(images, session,
	index, title, chapter_url, downloaded))
	if chapters:
	await asyncio.wait(chapters)
	if images:
	await asyncio.wait(images)
	with open(SAVE_FILE, "w", encoding="UTF-8") as f:
	json.dump(downloaded, f)


	if __name__ == '__main__':
	loop = asyncio.get_event_loop()
	loop.run_until_complete(main())