Skip to content

Instantly share code, notes, and snippets.

@Javran
Forked from scturtle/exh.py
Created January 31, 2016 03:47
Show Gist options
  • Save Javran/80ad70ee139ddf4364eb to your computer and use it in GitHub Desktop.
Save Javran/80ad70ee139ddf4364eb to your computer and use it in GitHub Desktop.
downloader for curgentleman
#!/usr/bin/env python3
import os
import re
import sys
import json
import asyncio
import aiohttp
import urllib.request
import logging
from itertools import count
sem = asyncio.Semaphore(5)
chunk_size = 100 * 1024 # 100kb
headers = None
async def getimg(title, showkey, url):
global chunk_size, headers
imgkey, gidpage = url.split('/')[-2:]
gid, page = gidpage.split('-', 1)
filename = title + '/' + str(page) + '.jpg'
if os.path.exists(filename):
sys.stdout.write(str(page) + ' ')
sys.stdout.flush()
return
data = dict(method='showpage', gid=gid, page=page,
imgkey=imgkey, showkey=showkey)
url = None
while True:
try:
async with sem:
with aiohttp.Timeout(5.0):
async with aiohttp.post('http://exhentai.org/api.php',
data=json.dumps(data),
headers=headers) as r:
j = await r.json()
for v in j.values():
if isinstance(v, str) and v.startswith('<a onclick'):
url = re.search(r'src="([^"]*)"', v).group(1)
break
except:
sys.stdout.write(str(page) + '? ')
sys.stdout.flush()
await asyncio.sleep(1.0)
assert url.split('.')[-1] == 'jpg', 'image type'
assert url, 'api error'
fails = 0
while True:
try:
async with sem:
with aiohttp.Timeout(5.0):
async with aiohttp.get(url) as r:
with open(filename, 'wb') as f:
while True:
chunk = await r.content.read(chunk_size)
if not chunk:
break
f.write(chunk)
break
except:
fails += 1
if fails == 5:
url = re.sub('/((\d{1,3}\.){3}\d{1,3}:?(\d{1,5}?))/',
'/37.48.81.80/', url)
sys.stdout.write(str(page) + ('? ' if fails <= 5 else '?? '))
sys.stdout.flush()
await asyncio.sleep(1.0)
sys.stdout.write(str(page) + '✔ ')
sys.stdout.flush()
def get_download_tasks(url):
global headers
sys.stdout.write('Pages:\n')
sys.stdout.flush()
# get all pages
pages = []
for p in count(0):
req = urllib.request.Request(url + '/?p=' + str(p), headers=headers)
text = urllib.request.urlopen(req).read().decode('utf-8')
sys.stdout.write(str(p) + '✔ ')
sys.stdout.flush()
pgs = re.findall(r'http://exhentai.org/s/[^"]*', text)
if pages and pgs[-1] == pages[-1]:
break
pages.extend(pgs)
print()
# get showkey
req = urllib.request.Request(pages[0], headers=headers)
text = urllib.request.urlopen(req).read().decode('utf-8')
showkey = re.search(r'showkey="([^"]*)"', text).group(1)
title = re.search(r'<title>([^<]*)<', text).group(1)
title = title.replace(' ', '_')
if not os.path.exists(title):
os.mkdir(title)
return [getimg(title, showkey, p) for p in pages]
if __name__ == '__main__':
# no "decode JSON with unexpected mimetype" warnings
logging.getLogger('aiohttp.client').setLevel(logging.ERROR)
headers = {'Cookie': open('cookies.txt').read().strip()}
if len(sys.argv) == 1:
# url = 'http://exhentai.org/g/811536/5a696a817a'
url = 'http://exhentai.org/g/831911/c4e250474c'
else:
url = sys.argv[1]
tasks = get_download_tasks(url.rstrip('/'))
print('Tasks:', len(tasks))
asyncio.get_event_loop().run_until_complete(asyncio.wait(tasks))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment