Skip to content

Instantly share code, notes, and snippets.

@yangjunjun
Created April 20, 2014 14:24
Show Gist options
  • Save yangjunjun/11115378 to your computer and use it in GitHub Desktop.
Save yangjunjun/11115378 to your computer and use it in GitHub Desktop.
批量下载豆瓣相册图片
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# 批量下载豆瓣相册图片v0.1
# 步骤:
# 1. 获取图片的页面地址
# 2. 获取页面上图片的地址
# 3. 获取图片,写入文件
# 导入所需要的模块
import urllib2
from bs4 import BeautifulSoup
import re
# 资源地址
baseUrl = 'http://www.douban.com/photos/album/122789497/'
# 获得图片并写入文件
def getImg(url):
imgPath = url
imgLocalPath = imgPath[-15:]
with open(u'E:\\douban\\' + imgLocalPath, 'wb') as imgFile:
imgData = urllib2.urlopen(imgPath).read()
imgFile.write(imgData)
def getImgUrl(baseUrl):
soup = BeautifulSoup(urllib2.urlopen(baseUrl))
# print(soup.find_all('a', class_="photolst_photo"))
imgWrap = soup.find_all('a', class_="photolst_photo")
# print(imgWrap[0].contents)
for w in imgWrap:
for child in w.children:
if child.name =='img':
print(child['src'])
# 小图地址 http://img3.douban.com/view/photo/thumb/public/p2168272700.jpg
smallSrc = child['src']
# 大图地址 http://img3.douban.com/view/photo/large/public/p2171489724.jpg
bigSrc = re.sub(r'thumb', 'large', smallSrc)
getImg(bigSrc)
# 主函数
getImgUrl(baseUrl)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment