Skip to content

Instantly share code, notes, and snippets.

@scturtle
Last active October 11, 2015 04:37
Show Gist options
  • Save scturtle/3803651 to your computer and use it in GitHub Desktop.
Save scturtle/3803651 to your computer and use it in GitHub Desktop.
bengou comics downloader (gevent, timeout)
# coding: utf-8
import requests
import sys, os, re
from gevent import monkey, pool, Timeout
monkey.patch_all(dns=False)
p = pool.Pool(20)
TIMEOUT = 10
title_p = re.compile(r'title>([^\]]*)</title')
base_p = re.compile(r"pic_base\s=\s'([^']*)'")
pics_p = re.compile(r"picTree\s=\s\[([^\]]*)\]")
cnt = None
sam = None
def download(url):
global cnt, sam
while True:
try:
r = requests.get(url)
break
except Exception as e:
print e.message
r.encoding = 'utf8'
html = r.text
title = title_p.findall(html)[0]
if not os.path.exists(title):
os.mkdir(title)
base = base_p.findall(html)[0]
if base[-1] != '/':
base += '/'
pics = pics_p.findall(html)[0].split(',')
pics = map(lambda x: x.strip('"'), pics)
cnt, sam = 0, len(pics)
origin = os.path.abspath('.')
os.chdir(title)
print cnt, '/', sam
for i, pic in enumerate(pics):
#dl(i, base+pic)
p.spawn(dl, i, base+pic)
p.join()
os.chdir(origin)
def dl(i, pic_url):
global cnt
filename = str(i) + pic_url[pic_url.rindex('.'):]
if not os.path.exists(filename):
while True:
try:
with Timeout(TIMEOUT) as timeout:
img = requests.get(pic_url).content
open(filename, 'wb').write(img)
break
except (Exception, Timeout) as e:
print e.message or 'timeout'
cnt += 1
print cnt, '/', sam
if __name__ == '__main__':
if len(sys.argv) < 2:
print 'Useage:'
print __file__, '[url]'
exit()
url = sys.argv[1]
download(url)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment