Javran/exh.py

## exh.py
#!/usr/bin/env python3
import os
import re
import sys
import json
import asyncio
import aiohttp
import urllib.request
import logging
from itertools import count

sem = asyncio.Semaphore(5)
chunk_size = 100 * 1024  # 100kb
headers = None


async def getimg(title, showkey, url):
    global chunk_size, headers

    imgkey, gidpage = url.split('/')[-2:]
    gid, page = gidpage.split('-', 1)

    filename = title + '/' + str(page) + '.jpg'
    if os.path.exists(filename):
        sys.stdout.write(str(page) + ' ')
        sys.stdout.flush()
        return

    data = dict(method='showpage', gid=gid, page=page,
                imgkey=imgkey, showkey=showkey)

    url = None
    while True:
        try:
            async with sem:
                with aiohttp.Timeout(5.0):
                    async with aiohttp.post('http://exhentai.org/api.php',
                                            data=json.dumps(data),
                                            headers=headers) as r:
                        j = await r.json()
                        for v in j.values():
                            if isinstance(v, str) and v.startswith('<a onclick'):
                                url = re.search(r'src="([^"]*)"', v).group(1)
                        break
        except:
            sys.stdout.write(str(page) + '? ')
            sys.stdout.flush()
            await asyncio.sleep(1.0)

    assert url.split('.')[-1] == 'jpg', 'image type'
    assert url, 'api error'
    fails = 0
    while True:
        try:
            async with sem:
                with aiohttp.Timeout(5.0):
                    async with aiohttp.get(url) as r:
                        with open(filename, 'wb') as f:
                            while True:
                                chunk = await r.content.read(chunk_size)
                                if not chunk:
                                    break
                                f.write(chunk)
                        break
        except:
            fails += 1
            if fails == 5:
                url = re.sub('/((\d{1,3}\.){3}\d{1,3}:?(\d{1,5}?))/',
                             '/37.48.81.80/', url)
            sys.stdout.write(str(page) + ('? ' if fails <= 5 else '?? '))
            sys.stdout.flush()
            await asyncio.sleep(1.0)

    sys.stdout.write(str(page) + '✔ ')
    sys.stdout.flush()


def get_download_tasks(url):
    global headers

    sys.stdout.write('Pages:\n')
    sys.stdout.flush()
    # get all pages
    pages = []
    for p in count(0):
        req = urllib.request.Request(url + '/?p=' + str(p), headers=headers)
        text = urllib.request.urlopen(req).read().decode('utf-8')
        sys.stdout.write(str(p) + '✔ ')
        sys.stdout.flush()
        pgs = re.findall(r'http://exhentai.org/s/[^"]*', text)
        if pages and pgs[-1] == pages[-1]:
            break
        pages.extend(pgs)
    print()

    # get showkey
    req = urllib.request.Request(pages[0], headers=headers)
    text = urllib.request.urlopen(req).read().decode('utf-8')
    showkey = re.search(r'showkey="([^"]*)"', text).group(1)
    title = re.search(r'<title>([^<]*)<', text).group(1)
    title = title.replace(' ', '_')
    if not os.path.exists(title):
        os.mkdir(title)
    return [getimg(title, showkey, p) for p in pages]


if __name__ == '__main__':
    # no "decode JSON with unexpected mimetype" warnings
    logging.getLogger('aiohttp.client').setLevel(logging.ERROR)
    headers = {'Cookie': open('cookies.txt').read().strip()}
    if len(sys.argv) == 1:
        # url = 'http://exhentai.org/g/811536/5a696a817a'
        url = 'http://exhentai.org/g/831911/c4e250474c'
    else:
        url = sys.argv[1]

    tasks = get_download_tasks(url.rstrip('/'))
    print('Tasks:', len(tasks))
    asyncio.get_event_loop().run_until_complete(asyncio.wait(tasks))
	#!/usr/bin/env python3
	import os
	import re
	import sys
	import json
	import asyncio
	import aiohttp
	import urllib.request
	import logging
	from itertools import count

	sem = asyncio.Semaphore(5)
	chunk_size = 100 * 1024 # 100kb
	headers = None


	async def getimg(title, showkey, url):
	global chunk_size, headers

	imgkey, gidpage = url.split('/')[-2:]
	gid, page = gidpage.split('-', 1)

	filename = title + '/' + str(page) + '.jpg'
	if os.path.exists(filename):
	sys.stdout.write(str(page) + ' ')
	sys.stdout.flush()
	return

	data = dict(method='showpage', gid=gid, page=page,
	imgkey=imgkey, showkey=showkey)

	url = None
	while True:
	try:
	async with sem:
	with aiohttp.Timeout(5.0):
	async with aiohttp.post('http://exhentai.org/api.php',
	data=json.dumps(data),
	headers=headers) as r:
	j = await r.json()
	for v in j.values():
	if isinstance(v, str) and v.startswith('<a onclick'):
	url = re.search(r'src="([^"]*)"', v).group(1)
	break
	except:
	sys.stdout.write(str(page) + '? ')
	sys.stdout.flush()
	await asyncio.sleep(1.0)

	assert url.split('.')[-1] == 'jpg', 'image type'
	assert url, 'api error'
	fails = 0
	while True:
	try:
	async with sem:
	with aiohttp.Timeout(5.0):
	async with aiohttp.get(url) as r:
	with open(filename, 'wb') as f:
	while True:
	chunk = await r.content.read(chunk_size)
	if not chunk:
	break
	f.write(chunk)
	break
	except:
	fails += 1
	if fails == 5:
	url = re.sub('/((\d{1,3}\.){3}\d{1,3}:?(\d{1,5}?))/',
	'/37.48.81.80/', url)
	sys.stdout.write(str(page) + ('? ' if fails <= 5 else '?? '))
	sys.stdout.flush()
	await asyncio.sleep(1.0)

	sys.stdout.write(str(page) + '✔ ')
	sys.stdout.flush()


	def get_download_tasks(url):
	global headers

	sys.stdout.write('Pages:\n')
	sys.stdout.flush()
	# get all pages
	pages = []
	for p in count(0):
	req = urllib.request.Request(url + '/?p=' + str(p), headers=headers)
	text = urllib.request.urlopen(req).read().decode('utf-8')
	sys.stdout.write(str(p) + '✔ ')
	sys.stdout.flush()
	pgs = re.findall(r'http://exhentai.org/s/[^"]*', text)
	if pages and pgs[-1] == pages[-1]:
	break
	pages.extend(pgs)
	print()

	# get showkey
	req = urllib.request.Request(pages[0], headers=headers)
	text = urllib.request.urlopen(req).read().decode('utf-8')
	showkey = re.search(r'showkey="([^"]*)"', text).group(1)
	title = re.search(r'<title>([^<]*)<', text).group(1)
	title = title.replace(' ', '_')
	if not os.path.exists(title):
	os.mkdir(title)
	return [getimg(title, showkey, p) for p in pages]


	if __name__ == '__main__':
	# no "decode JSON with unexpected mimetype" warnings
	logging.getLogger('aiohttp.client').setLevel(logging.ERROR)
	headers = {'Cookie': open('cookies.txt').read().strip()}
	if len(sys.argv) == 1:
	# url = 'http://exhentai.org/g/811536/5a696a817a'
	url = 'http://exhentai.org/g/831911/c4e250474c'
	else:
	url = sys.argv[1]

	tasks = get_download_tasks(url.rstrip('/'))
	print('Tasks:', len(tasks))
	asyncio.get_event_loop().run_until_complete(asyncio.wait(tasks))