Skip to content

Instantly share code, notes, and snippets.

@grzhan
Last active August 28, 2015 12:49
Show Gist options
  • Save grzhan/6ce71b68f8dd34c62cf4 to your computer and use it in GitHub Desktop.
Save grzhan/6ce71b68f8dd34c62cf4 to your computer and use it in GitHub Desktop.
因为最近e绅士收藏的本子经常失效丢失,所以写了个简单的下载器,设定了延时来规避e绅士对于下载脚本的检测,速度比较慢,但满足了自己的需求
#!/usr/bin/python
#-*- coding: utf-8 -*-
# ============================================
# _____ _ _ _ _
# | ____| | | | | ___ _ __ | |_ __ _(_)
# | _| _____| |_| |/ _ \ '_ \| __/ _` | |
# | |__|_____| _ | __/ | | | || (_| | |
# |_____| |_| |_|\___|_| |_|\__\__,_|_|
#
# --------------------------------------------
# @Author: grzhan
# @Date: 2015-08-27
# @Email: i@grr.moe
# @Description: 因为最近e绅士收藏的本子经常失效丢失,所以写了个简单的下载器
# 设定了延时来规避e绅士对于下载脚本的检测,速度比较慢,但满足了自己的需求
import requests
from pyquery import PyQuery as pq
from time import sleep
from StringIO import StringIO
import re
from PIL import Image
import sys
import os
def get(url):
# 设置HTTP代理与User Agent
proxy = {'http': 'http://127.0.0.1:8123', 'https': 'http://127.0.0.1:8123'}
ua = {'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 \
(KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36'}
# 设置分辨率Cookie
cookies = {'uconfig': 'tl_m-uh_y-rc_0-cats_0-xns_0-ts_m-tr_2-prn_y-dm_l-ar_0-rx_0-ry_0-m\
s_n-mt_n-cs_a-to_a-pn_0-sc_0-sa_y-oi_n-qb_n-tf_n-hp_-hk_-xl_', 'xres': '3'}
print '获取【{0}】的内容'.format(url)
req = requests.get(url, proxies=proxy, headers=ua, cookies=cookies)
sleep(3)
return req
url = sys.argv[1]
req = get(url)
dom = pq(req.content)
url = dom('a')[0].get('href')
title = ''.join(dom('title').text().split(' - ')[:-1]).strip()
print title
images = []
while True:
req = get(url)
cur_dom = pq(req.content)
cur_src = cur_dom('#sm')[0].get('src')
images.append(cur_src)
npattern = re.compile(r'<a href="([^"]*?)">Next\s*?Page\s*?&gt;<\/a>')
result = re.findall(npattern, req.content)
if result:
url = result[0]
else:
break
if not os.path.exists(title):
os.mkdir(title)
for i, image_url in enumerate(images):
image = Image.open(StringIO(requests.get(image_url).content))
filename = title + '/' + str(i) + '.' + image_url.split('.')[-1]
print u'保存图片【{0}】'.format(filename)
image.save(filename)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment