Skip to content

Instantly share code, notes, and snippets.

@cnbeining cnbeining/163pp.py
Last active Feb 14, 2018

Embed
What would you like to do?
Batch download pp.163.com | 批量下载网易摄影 pp.163.com 的照片
#!/usr/bin/env python
#coding:utf-8
# Author: Beining http://www.cnbeining.com/ cnbeining[at]gmail.com
# Purpose: Batch download pp.163.com
# Created: 03/04/2015
# License: GNU GPL 2.0 https://www.gnu.org/licenses/gpl-2.0.html
import os
import sys
import unittest
import urllib2
import logging
import re
from multiprocessing import Pool
from multiprocessing.dummy import Pool as ThreadPool
import getopt
import subprocess
global DOWNLOAD_SOFTWARE, FAKE_HEADER, LOCATION_DIR, resolution
FAKE_HEADER = {
'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.16 Safari/537.36',
'Cache-Control': 'no-cache',
'Pragma': 'no-cache'}
LOCATION_DIR = os.getcwd()
DOWNLOAD_SOFTWARE = 'wget'
#----------------------------------------------------------------------
def page_reader(url):
"""str->str
read pages."""
request = urllib2.Request(url, headers=FAKE_HEADER)
response = urllib2.urlopen(request)
data = response.read()
return data
#----------------------------------------------------------------------
def page_parser(webpage):
"""str->dict
url:http://pp.163.com/daowuzhe123/pp/13424132.html
prpr~"""
logging.info('Retriving purl...')
for i in webpage.split('\n'):
if 'purl' in i:
purl = 'http://' + i.strip()[6:-2]
#http://s1.ph.126.net/WwP8GD1A3ocjPfENOdgrdQ==/192414543510075.js
for i in webpage.split('\n'):
if 'name:' in i:
folder_name = i.decode('gbk').strip()[7:-2]
print(folder_name)
break
try:
os.mkdir(folder_name)
except Exception:
pass
os.chdir(LOCATION_DIR + '/' + folder_name)
purl_data = page_reader(purl)
purl_processed = purl_data.split('[{')[1].split('}]')[0].split('},{')
purl_processed_list =['{' + i + '}' for i in purl_processed]
pattern = r"([a-zA-Z_][a-zA-Z_0-9]*)\s*\:"
repl = lambda match: '"{}":'.format(match.group(1))
dict_big = {}
#print(purl_processed_list)
for i in purl_processed_list:
#print(i)
dict_this = {}
dict_this = eval(re.sub(pattern, repl, i))
#print(dict_this)
photoId = dict_this['photoId']
#print(photoId)
dict_big[photoId] = dict_this
return dict_big
#----------------------------------------------------------------------
def download_video_link((filename, DOWNLOAD_SOFTWARE, img_url)):
""""""
logging.info('Downloading #{filename}...'.format(filename = filename))
if DOWNLOAD_SOFTWARE == 'aria2c':
cmd = 'aria2c -c -k1M --out {filename} "{img_url}"'
elif DOWNLOAD_SOFTWARE == 'wget':
cmd = 'wget -c -O {filename} "{img_url}"'
elif DOWNLOAD_SOFTWARE == 'curl':
cmd = 'curl -L -C -o {filename} "{img_url}"'
elif DOWNLOAD_SOFTWARE == 'axel':
cmd = 'axel -o {filename} "{img_url}"'
cmd = cmd.format(filename = filename, img_url = img_url)
logging.debug(cmd)
execute_cmd(cmd)
#----------------------------------------------------------------------
def execute_cmd(cmd):
""""""
return_code = subprocess.call(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if return_code == 0:
pass
else:
logging.warning('ERROR')
return return_code
#----------------------------------------------------------------------
def parse_list(img_dict, resolution):
"""dict->None"""
down_list = []
for i in img_dict:
filename = str(img_dict[i]['photoId']) + '.' + img_dict[i][resolution].split('.')[-1]
img_url = 'http://img' + img_dict[i][resolution][0] + '.ph.126.net' + img_dict[i][resolution][1:]
down_list.append((filename, DOWNLOAD_SOFTWARE, img_url))
return down_list
#----------------------------------------------------------------------
def downloader(down_list, workers = 5):
""""""
from multiprocessing.dummy import Pool as ThreadPool
# Make the Pool of workers
pool = ThreadPool(int(workers))
# Open the urls in their own threads
# and return the results
results = pool.map(download_video_link, down_list)
#close the pool and wait for the work to finish
pool.close()
pool.join()
#----------------------------------------------------------------------
def main(link, resolution):
""""""
page_data = page_reader(link)
link_dict = page_parser(page_data)
down_list = parse_list(link_dict, resolution)
downloader(down_list, 5)
if __name__=='__main__':
resolution = sys.argv[1]
argv_list = sys.argv[2:]
for link in argv_list:
os.chdir(LOCATION_DIR)
main(link, resolution)
print('Done!')

Batch download pp.163.com

批量下载网易摄影 pp.163.com 的照片

使用方法:

修改成你需要的线程和下载器 然后:

python 163pp.py [分辨率] URL1 URL2...

分辨率:
murl 中等
surl 小
lurl 好像是很小很小。。。。。。。。
turl 比小还小
qurl 正方形
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.