Skip to content

Instantly share code, notes, and snippets.

@quininer
Last active September 13, 2022 01:38
Show Gist options
  • Save quininer/a412efaad8714fb6ed70 to your computer and use it in GitHub Desktop.
Save quininer/a412efaad8714fb6ed70 to your computer and use it in GitHub Desktop.
抓取u17漫画
#!/usr/bin/env python
# encoding: utf-8
import asyncio
import json
from aiohttp import ClientSession
from lxml import etree
from argparse import ArgumentParser
from os import mkdir, listdir
from execjs import compile as compilejs
from base64 import b64decode
from PIL import Image
from io import BytesIO
async def name2comic(name: str, session) -> int:
print('[name]', name)
api = 'https://so.u17.com/all/{name}/m0_p1.html'
async with session.get(api.format(name=name)) as res:
return int(etree.HTML(
(await res.text())
).xpath(
'//*[@id="comiclist"]/div/div[3]/ul/li/div/div[2]/h3/strong/a'
)[0].attrib['href'].split('/')[-1].split('.')[0])
async def comic2chapter(comic_id: int, session) -> dict:
print('[comic]', comic_id)
api = 'https://www.u17.com/comic/ajax.php?mod=comic&act=get_chapters&comic_id={comic_id}'
async with session.get(api.format(comic_id=comic_id)) as res:
res = await res.text()
return json.loads(res)
async def chapter2image(chapter_id: int, session) -> dict:
print('[chapter]', chapter_id)
api = 'https://www.u17.com/chapter/{chapter_id}.html'
async with session.get(api.format(chapter_id=chapter_id)) as res:
script = [e.text for e in etree.HTML((await res.text())).xpath('/html/head/script') if bool(e.text)][-1]
js = compilejs(
'''var $ = {{
evalJSON: JSON.parse
}};
{script}'''.format(script=script)
)
return js.eval('image_config["image_list"]')
async def getimage(image_url: bytes, path: str, session):
if b'news.u17i.com' in image_url:
return
print('[image]', image_url)
async with session.get(image_url.decode()) as res:
Image.open(BytesIO((await res.read()))).save(path)
async def main(name:str=None, comic:int=None, chapter:int=None):
path = {}
async with ClientSession() as session:
chapter_num = 0
if not comic and name:
try:
comic = await name2comic(name, session)
except:
exit("[!] 获取 comic id 出错。")
path['comic'] = str(comic)
if not path['comic'] in listdir(): mkdir(path['comic'])
l = await comic2chapter(comic, session)
if chapter:
for i in l:
if i['chapter_id'] == str(chapter):
chapter_num = l.index(i)
break
for i in l[chapter_num:]:
path['chapter'] = i['chapter_name']
if not path['chapter'] in listdir(path['comic']): mkdir('{}/{}'.format(path['comic'], path['chapter']))
imgs = await chapter2image(int(i['chapter_id']), session)
await asyncio.wait([
getimage(b64decode(imgs[img]['src']), '{comic}/{chapter}/{img:0>3}.jpg'.format(
comic = path['comic'],
chapter = path['chapter'],
img=img
), session) for img in imgs
])
if __name__ == '__main__':
parser = ArgumentParser()
parser.add_argument('--name', help="漫画名")
parser.add_argument('--comic', type=int, help="漫画ID")
parser.add_argument('--chapter', type=int, help="从某章节开始下载")
args = parser.parse_args()
if not (args.name or args.comic):
parser.print_help()
exit(0)
asyncio.run(main(args.name, args.comic, args.chapter))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment