Skip to content

Instantly share code, notes, and snippets.

@jbarratt
Forked from anonymous/scrap.py
Created January 2, 2015 14:28
Show Gist options
  • Save jbarratt/28d63f49752b533d50c1 to your computer and use it in GitHub Desktop.
Save jbarratt/28d63f49752b533d50c1 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import requests
import sys
import lxml.html
import urlparse
import posixpath
import os
r = requests.get('http://www.jpl.nasa.gov/spaceimages/')
parsed = lxml.html.fromstring(r.text)
images = parsed.xpath('//img/@src')
if not images:
sys.exit("found no pictures")
images = [urlparse.urljoin(r.url, url) for url in images if 'wallpaper' in url]
print images
print 'found %s pictures' % len(images)
for url in images:
path = urlparse.urlsplit(url).path
filename = posixpath.basename(path)
if not os.path.exists(filename):
print "downloading {} to {}".format(url, filename)
r = requests.get(url)
with open(filename, 'wb') as image:
image.write(r.content)
else:
print "Skpping {}, already exists".format(filename)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment