Skip to content

Instantly share code, notes, and snippets.

@g761007
Created November 3, 2015 07:04
Show Gist options
  • Save g761007/118977430e329bcfb103 to your computer and use it in GitHub Desktop.
Save g761007/118977430e329bcfb103 to your computer and use it in GitHub Desktop.
# -*-: coding:utf8 -*-
import os
from StringIO import StringIO
from lxml import etree
import requests
import click
def download_images(url, output_dir='output'):
r = requests.get(url)
html = r.content
tree = etree.parse(StringIO(html), parser=etree.HTMLParser())
nodes = tree.xpath('//img')
total = len(nodes)
count = 1
for node in nodes:
src = node.attrib['src']
path = os.path.join(output_dir, src.split('/')[-1])
if not os.path.exists(path):
r = requests.get(src, stream=True)
if r.status_code == 200:
with open(path, 'wb') as f:
for chunk in r.iter_content(1024):
f.write(chunk)
print 'done. ({}/{}) {} , '.format(count, total, src)
else:
print 'fail. ({}/{}) {} , '.format(count, total, src)
count += 1
@click.command()
@click.option('--url', help='download images of url')
@click.option('--output', help='dst path of directory')
def main(url, output):
download_images(url, output)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment