Skip to content

Instantly share code, notes, and snippets.

@Jeswang
Created August 26, 2013 14:52
Show Gist options
  • Save Jeswang/6342263 to your computer and use it in GitHub Desktop.
Save Jeswang/6342263 to your computer and use it in GitHub Desktop.
#!/usr/bin/python
# -*- coding:utf-8 -*-
import re
import urllib
import urllib2
from BeautifulSoup import *
SAVE_PATH = "/Users/jeswang/Desktop/ICONS/"
def get_icons_from_macx(page_number):
# page_number: 软件列表的页号
c = urllib2.urlopen("http://soft.macx.cn/index.htm?page=" + page_number)
soup = BeautifulSoup(c.read())
c.close()
items = soup.findAll('li', {"class":"item"})
# 单线程存储图片
for i in items:
imgURL = i.find('img')['src']
title_with_version = i.find('a',{"class":"title"}).contents[0]
title = re.search(ur"^([\w\W]*?)(\s[0-9|.]*)?$", title_with_version).group(1)
title = title.replace("/"," ") # 可能会出现系统不支持的文件名,此处过滤的不完全
print title, imgURL
urllib.urlretrieve(imgURL, SAVE_PATH+title+".jpg")
if __name__ == "__main__":
for i in range(0, 220):
print i
get_icons_from_macx(str(i))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment