Skip to content

Instantly share code, notes, and snippets.

@toddlerya
Last active August 29, 2015 14:05
Show Gist options
  • Save toddlerya/35ba2b49042fbfe5c406 to your computer and use it in GitHub Desktop.
Save toddlerya/35ba2b49042fbfe5c406 to your computer and use it in GitHub Desktop.
A py-spider : download the pictures of Taobao'models~
#-*- coding: utf-8 -*-
#Author: toddlerya
#History: 2014/8/10
import urllib,re
num = raw_input("你要下载哪一页的淘宝小妹?\n输入一个页码: ")
web = "http://mm.taobao.com/json/request_top_list.htm?type=0&page="
url = web + str(num)
def getModelHomePage(url):
val = urllib.urlopen(url).read()
#print val
modre = r'href=".*?com/\d+\.htm'
modelre = re.compile(modre)
modurls = modelre.findall(val)
head = 'href="'
for modurl in modurls:
html = modurl[len(head):]
return html
def getImgUrl():
html = getModelHomePage(url)
home = urllib.urlopen(html).read()
imgre = r'src="\w+?.*?\.jpg'
imglist = re.findall(imgre,home)
return imglist
def getImage():
imglist = getImgUrl()
temp = 'src="'
n =0
for img in imglist:
image = img[len(temp):]
#print image
urllib.urlretrieve(image,"pic\\mm.jpg" +str(n)+".jpg")
print "正在下载第%s张" % n
n += 1
getImage()
print "下载完毕!"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment