Created
April 20, 2014 14:24
-
-
Save yangjunjun/11115378 to your computer and use it in GitHub Desktop.
批量下载豆瓣相册图片
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
# 批量下载豆瓣相册图片v0.1 | |
# 步骤: | |
# 1. 获取图片的页面地址 | |
# 2. 获取页面上图片的地址 | |
# 3. 获取图片,写入文件 | |
# 导入所需要的模块 | |
import urllib2 | |
from bs4 import BeautifulSoup | |
import re | |
# 资源地址 | |
baseUrl = 'http://www.douban.com/photos/album/122789497/' | |
# 获得图片并写入文件 | |
def getImg(url): | |
imgPath = url | |
imgLocalPath = imgPath[-15:] | |
with open(u'E:\\douban\\' + imgLocalPath, 'wb') as imgFile: | |
imgData = urllib2.urlopen(imgPath).read() | |
imgFile.write(imgData) | |
def getImgUrl(baseUrl): | |
soup = BeautifulSoup(urllib2.urlopen(baseUrl)) | |
# print(soup.find_all('a', class_="photolst_photo")) | |
imgWrap = soup.find_all('a', class_="photolst_photo") | |
# print(imgWrap[0].contents) | |
for w in imgWrap: | |
for child in w.children: | |
if child.name =='img': | |
print(child['src']) | |
# 小图地址 http://img3.douban.com/view/photo/thumb/public/p2168272700.jpg | |
smallSrc = child['src'] | |
# 大图地址 http://img3.douban.com/view/photo/large/public/p2171489724.jpg | |
bigSrc = re.sub(r'thumb', 'large', smallSrc) | |
getImg(bigSrc) | |
# 主函数 | |
getImgUrl(baseUrl) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment