grzhan/ehentai.py

## ehentai.py
#!/usr/bin/python
#-*- coding: utf-8 -*-
# ============================================
#  _____      _   _            _        _
# | ____|    | | | | ___ _ __ | |_ __ _(_)
# |  _| _____| |_| |/ _ \ '_ \| __/ _` | |
# | |__|_____|  _  |  __/ | | | || (_| | |
# |_____|    |_| |_|\___|_| |_|\__\__,_|_|
#
# --------------------------------------------
# @Author: grzhan
# @Date: 2015-08-27
# @Email: i@grr.moe
# @Description: 因为最近e绅士收藏的本子经常失效丢失，所以写了个简单的下载器
# 设定了延时来规避e绅士对于下载脚本的检测，速度比较慢，但满足了自己的需求


import requests
from pyquery import PyQuery as pq
from time import sleep
from StringIO import StringIO
import re
from PIL import Image
import sys
import os


def get(url):
    # 设置HTTP代理与User Agent
    proxy = {'http': 'http://127.0.0.1:8123', 'https': 'http://127.0.0.1:8123'}
    ua = {'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 \
		(KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36'}
    # 设置分辨率Cookie
    cookies = {'uconfig': 'tl_m-uh_y-rc_0-cats_0-xns_0-ts_m-tr_2-prn_y-dm_l-ar_0-rx_0-ry_0-m\
    	s_n-mt_n-cs_a-to_a-pn_0-sc_0-sa_y-oi_n-qb_n-tf_n-hp_-hk_-xl_', 'xres': '3'}
    print '获取【{0}】的内容'.format(url)
    req = requests.get(url, proxies=proxy, headers=ua, cookies=cookies)
    sleep(3)
    return req


url = sys.argv[1]
req = get(url)
dom = pq(req.content)
url = dom('a')[0].get('href')
title = ''.join(dom('title').text().split(' - ')[:-1]).strip()
print title

images = []
while True:
    req = get(url)
    cur_dom = pq(req.content)
    cur_src = cur_dom('#sm')[0].get('src')
    images.append(cur_src)
    npattern = re.compile(r'<a href="([^"]*?)">Next\s*?Page\s*?&gt;<\/a>')
    result = re.findall(npattern, req.content)
    if result:
        url = result[0]
    else:
        break

if not os.path.exists(title):
    os.mkdir(title)
for i, image_url in enumerate(images):
    image = Image.open(StringIO(requests.get(image_url).content))
    filename = title + '/' + str(i) + '.' + image_url.split('.')[-1]
    print u'保存图片【{0}】'.format(filename)
    image.save(filename)
	#!/usr/bin/python
	#-- coding: utf-8 --
	# ============================================
	# _____ _ _ _ _
	# \| ____\| \| \| \| \| ___ _ __ \| \|_ __ _(_)
	# \| _\| _____\| \|_\| \|/ _ \ '_ \\| __/ _` \| \|
	# \| \|__\|_____\| _ \| __/ \| \| \| \|\| (_\| \| \|
	# \|_____\| \|_\| \|_\|\___\|_\| \|_\|\__\__,_\|_\|
	#
	# --------------------------------------------
	# @Author: grzhan
	# @Date: 2015-08-27
	# @Email: i@grr.moe
	# @Description: 因为最近e绅士收藏的本子经常失效丢失，所以写了个简单的下载器
	# 设定了延时来规避e绅士对于下载脚本的检测，速度比较慢，但满足了自己的需求


	import requests
	from pyquery import PyQuery as pq
	from time import sleep
	from StringIO import StringIO
	import re
	from PIL import Image
	import sys
	import os


	def get(url):
	# 设置HTTP代理与User Agent
	proxy = {'http': 'http://127.0.0.1:8123', 'https': 'http://127.0.0.1:8123'}
	ua = {'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 \
	(KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36'}
	# 设置分辨率Cookie
	cookies = {'uconfig': 'tl_m-uh_y-rc_0-cats_0-xns_0-ts_m-tr_2-prn_y-dm_l-ar_0-rx_0-ry_0-m\
	s_n-mt_n-cs_a-to_a-pn_0-sc_0-sa_y-oi_n-qb_n-tf_n-hp_-hk_-xl_', 'xres': '3'}
	print '获取【{0}】的内容'.format(url)
	req = requests.get(url, proxies=proxy, headers=ua, cookies=cookies)
	sleep(3)
	return req


	url = sys.argv[1]
	req = get(url)
	dom = pq(req.content)
	url = dom('a')[0].get('href')
	title = ''.join(dom('title').text().split(' - ')[:-1]).strip()
	print title

	images = []
	while True:
	req = get(url)
	cur_dom = pq(req.content)
	cur_src = cur_dom('#sm')[0].get('src')
	images.append(cur_src)
	npattern = re.compile(r'<a href="([^"]?)">Next\s?Page\s*?><\/a>')
	result = re.findall(npattern, req.content)
	if result:
	url = result[0]
	else:
	break

	if not os.path.exists(title):
	os.mkdir(title)
	for i, image_url in enumerate(images):
	image = Image.open(StringIO(requests.get(image_url).content))
	filename = title + '/' + str(i) + '.' + image_url.split('.')[-1]
	print u'保存图片【{0}】'.format(filename)
	image.save(filename)