Last active
August 28, 2015 12:49
-
-
Save grzhan/6ce71b68f8dd34c62cf4 to your computer and use it in GitHub Desktop.
因为最近e绅士收藏的本子经常失效丢失,所以写了个简单的下载器,设定了延时来规避e绅士对于下载脚本的检测,速度比较慢,但满足了自己的需求
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
#-*- coding: utf-8 -*- | |
# ============================================ | |
# _____ _ _ _ _ | |
# | ____| | | | | ___ _ __ | |_ __ _(_) | |
# | _| _____| |_| |/ _ \ '_ \| __/ _` | | | |
# | |__|_____| _ | __/ | | | || (_| | | | |
# |_____| |_| |_|\___|_| |_|\__\__,_|_| | |
# | |
# -------------------------------------------- | |
# @Author: grzhan | |
# @Date: 2015-08-27 | |
# @Email: i@grr.moe | |
# @Description: 因为最近e绅士收藏的本子经常失效丢失,所以写了个简单的下载器 | |
# 设定了延时来规避e绅士对于下载脚本的检测,速度比较慢,但满足了自己的需求 | |
import requests | |
from pyquery import PyQuery as pq | |
from time import sleep | |
from StringIO import StringIO | |
import re | |
from PIL import Image | |
import sys | |
import os | |
def get(url): | |
# 设置HTTP代理与User Agent | |
proxy = {'http': 'http://127.0.0.1:8123', 'https': 'http://127.0.0.1:8123'} | |
ua = {'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 \ | |
(KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36'} | |
# 设置分辨率Cookie | |
cookies = {'uconfig': 'tl_m-uh_y-rc_0-cats_0-xns_0-ts_m-tr_2-prn_y-dm_l-ar_0-rx_0-ry_0-m\ | |
s_n-mt_n-cs_a-to_a-pn_0-sc_0-sa_y-oi_n-qb_n-tf_n-hp_-hk_-xl_', 'xres': '3'} | |
print '获取【{0}】的内容'.format(url) | |
req = requests.get(url, proxies=proxy, headers=ua, cookies=cookies) | |
sleep(3) | |
return req | |
url = sys.argv[1] | |
req = get(url) | |
dom = pq(req.content) | |
url = dom('a')[0].get('href') | |
title = ''.join(dom('title').text().split(' - ')[:-1]).strip() | |
print title | |
images = [] | |
while True: | |
req = get(url) | |
cur_dom = pq(req.content) | |
cur_src = cur_dom('#sm')[0].get('src') | |
images.append(cur_src) | |
npattern = re.compile(r'<a href="([^"]*?)">Next\s*?Page\s*?><\/a>') | |
result = re.findall(npattern, req.content) | |
if result: | |
url = result[0] | |
else: | |
break | |
if not os.path.exists(title): | |
os.mkdir(title) | |
for i, image_url in enumerate(images): | |
image = Image.open(StringIO(requests.get(image_url).content)) | |
filename = title + '/' + str(i) + '.' + image_url.split('.')[-1] | |
print u'保存图片【{0}】'.format(filename) | |
image.save(filename) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment