wenLiangcan/get_douban_album_pic.py

## get_douban_album_pic.py
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Filename: get_douban_album_pic.py

import urllib2, re, os, sys
from os.path import basename
from urlparse import urlsplit

def fetch(url):
    try:
        return urllib2.urlopen(url).read()
    except Exception as e:
        print(e)

def get_count(url):
    urlContent = fetch(url)
    ptn = u'<span class="count">\(\u5171(\d+)\u5f20\)</span>'
    count = re.findall(ptn, unicode(urlContent, "utf-8"))
    if len(count) < 1:
        return 1
    return int(count[0])

def down_img(url):
    urlContent = fetch(url)
    spans = r'(http://img\d.douban.com/view/photo/thumb/public/p\d+.jpg)'
    imgUrls = re.findall(spans, urlContent)
    #1770000000 号以后的照片都有更大尺寸的原图，url 中含 large. via: 豆藤
    imgUrls = [
        i.replace('thumb', 'large')
        if int(re.findall(r'.*?public/p(\d+).jpg', i)[0]) > 1770000000
        else i.replace('thumb', 'photo')
        for i in imgUrls
    ]
    for imgUrl in imgUrls:
        imgData = fetch(imgUrl)
        fileName = basename(urlsplit(imgUrl)[2])
        with open('douban/'+fileName, 'wb') as output:
            output.write(imgData)

def download(url):
    base = url
    num = 0
    count = get_count(url)
    start = 0
    while (start < count):
       num += 1
       print("Downloading images in page %d ...") % num
       down_img(url)
       start += 18
       url = base + '?start=' + str(start)
    print("Finished")

def input_url():
    link = r'(^http://www.douban.com/photos/album/\d+).*?'
    if len(sys.argv) == 1:
        inputString = raw_input('Enter album url --> ')
    else:
        inputString = sys.argv[1]
    url = re.findall(link, inputString)[0] + '/'
    return url

if __name__ == "__main__":
    if (os.path.exists('douban') == False):
        os.mkdir('douban')
    download(input_url())
	#!/usr/bin/python
	# -- coding: utf-8 --
	# Filename: get_douban_album_pic.py

	import urllib2, re, os, sys
	from os.path import basename
	from urlparse import urlsplit

	def fetch(url):
	try:
	return urllib2.urlopen(url).read()
	except Exception as e:
	print(e)

	def get_count(url):
	urlContent = fetch(url)
	ptn = u'<span class="count">\(\u5171(\d+)\u5f20\)</span>'
	count = re.findall(ptn, unicode(urlContent, "utf-8"))
	if len(count) < 1:
	return 1
	return int(count[0])

	def down_img(url):
	urlContent = fetch(url)
	spans = r'(http://img\d.douban.com/view/photo/thumb/public/p\d+.jpg)'
	imgUrls = re.findall(spans, urlContent)
	#1770000000 号以后的照片都有更大尺寸的原图，url 中含 large. via: 豆藤
	imgUrls = [
	i.replace('thumb', 'large')
	if int(re.findall(r'.*?public/p(\d+).jpg', i)[0]) > 1770000000
	else i.replace('thumb', 'photo')
	for i in imgUrls
	]
	for imgUrl in imgUrls:
	imgData = fetch(imgUrl)
	fileName = basename(urlsplit(imgUrl)[2])
	with open('douban/'+fileName, 'wb') as output:
	output.write(imgData)

	def download(url):
	base = url
	num = 0
	count = get_count(url)
	start = 0
	while (start < count):
	num += 1
	print("Downloading images in page %d ...") % num
	down_img(url)
	start += 18
	url = base + '?start=' + str(start)
	print("Finished")

	def input_url():
	link = r'(^http://www.douban.com/photos/album/\d+).*?'
	if len(sys.argv) == 1:
	inputString = raw_input('Enter album url --> ')
	else:
	inputString = sys.argv[1]
	url = re.findall(link, inputString)[0] + '/'
	return url

	if __name__ == "__main__":
	if (os.path.exists('douban') == False):
	os.mkdir('douban')
	download(input_url())