Skip to content

Instantly share code, notes, and snippets.

@recall704
Last active August 29, 2015 14:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save recall704/a72b120e6c6f7013ea28 to your computer and use it in GitHub Desktop.
Save recall704/a72b120e6c6f7013ea28 to your computer and use it in GitHub Desktop.
#coding:utf-8
import os
import urllib
from pyquery import PyQuery
# 下载某个 url 中的所有图片 到 指定目录
def get_img_and_save(url, target_dir):
pyobj = PyQuery(url)
img_objs = pyobj('img')
# 图片列表
img_list = [img.get('src') for img in img_objs]
# 如果目录不存在,则创建该目录
if not os.path.exists(target_dir) or not os.path.isdir(target_dir):
os.mkdir(target_dir)
for i,img in enumerate(img_list):
# 文件 扩展名
ext = os.path.splitext(img)[1]
file_name = "{0}{1}".format(i,ext)
# 存储文件的完整路径
file_path = os.path.join(target_dir,file_name)
print u'正在下载',img
urllib.urlretrieve(img,file_path)
url = 'http://www.mm131.com/xiaohua/1119.html'
get_img_and_save(url,'/home/recall/1111')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment