Skip to content

Instantly share code, notes, and snippets.

@chrishokamp
Created November 12, 2014 14:41
Show Gist options
  • Save chrishokamp/fe6d4fb3224b680ea1e9 to your computer and use it in GitHub Desktop.
Save chrishokamp/fe6d4fb3224b680ea1e9 to your computer and use it in GitHub Desktop.
grab the images at a url
#!/usr/bin/python
#Author: Chris Hokamp
#Spring 2013
import sys
import requests
from BeautifulSoup import BeautifulSoup
import re
if len(sys.argv) != 4:
sys.exit("Usage: grabImages.py <url> <fiterString> <path-to-save>")
url = sys.argv[1].strip()
filterStr = sys.argv[2].strip()
savePath = sys.argv[3].strip()
req = requests.get(url)
bs = BeautifulSoup(req.text)
#print bs
imgUrls = [x['src'] for x in bs('img')]
for x in bs('img'):
print x
r = re.compile(filterStr, re.IGNORECASE)
goodOnes = filter(lambda x: r.search(x) is not None, imgUrls)
for img in goodOnes:
print img
imgData = requests.get(img)
imgFileName = "%s/%s" % (savePath, img.split('/')[-1])
#print "File Name %s" % imgFileName
imgFile = open(imgFileName, 'wb')
imgFile.write(imgData.content)
imgFile.close()
print "Saved %s" % imgFileName
#This line didn't work??: python grabImages.py "http://www.buzzfeed.com/mjs538/things-youll-see-only-in-china" .jpg ~/Pictures/only_in_china
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment