Skip to content

Instantly share code, notes, and snippets.

@treece
Created July 17, 2014 00:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save treece/8de12ae3ca5c49b3e462 to your computer and use it in GitHub Desktop.
Save treece/8de12ae3ca5c49b3e462 to your computer and use it in GitHub Desktop.
Unsplash Image Downloader Script
import feedparser
import time
import re
import httplib
import urlparse
import httplib2
import urllib
import urllib2
import sys
import os
from math import fabs
failed_counter = 0
last_page = ''
def getContentLocation(link):
h = httplib2.Http()
h.follow_all_redirects = True
resp = h.request(link, "GET")[0]
contentLocation = resp['content-location']
contentLocation = contentLocation[:-4]
return contentLocation
def unshorten_url(url):
parsed = urlparse.urlparse(url)
h = httplib.HTTPConnection(parsed.netloc)
resource = parsed.path
if parsed.query != "":
resource += "?" + parsed.query
h.request('HEAD', resource )
response = h.getresponse()
if response.status/100 == 3 and response.getheader('Location'):
return unshorten_url(response.getheader('Location'))
else:
return url
def report(count, blockSize, totalSize):
percent = int(count*blockSize*100/totalSize)
sys.stdout.write("\r%d%%" % percent + ' Complete - ')
sys.stdout.flush()
def save_file(siteName, url):
filename = os.path.basename(url)
if not os.path.isfile(siteName + '/' + filename):
sys.stdout.write('\rDownloading Image ' + url + '...\n')
urllib.urlretrieve(url, siteName + '/' + filename, reporthook=report)
sys.stdout.write("\rDownload complete, saved as %s" % (filename) + '\n\n')
sys.stdout.flush()
else:
print("Image already exists in directory!")
def make_dir(d):
if not os.path.exists(d):
os.makedirs(d)
def start():
global pageNum, failed_counter, last_page
siteName = "unsplash"
print("Starting page number?")
pageStart = int(raw_input('> '))
if pageStart == 0:
pageStart = 1
pageNum = pageStart
print("Ending page number?")
pageEnd = int(raw_input('> '))
try:
for page in range(pageStart, pageEnd+1):
make_dir(siteName)
if pageNum == 1:
link = 'http://' + siteName + '.tumblr.com'
linkFinal = getContentLocation(link) + 'rss'
else:
link = ('http://' + siteName + '.tumblr.com/page/' + str(pageNum) +
'/rss')
linkFinal = getContentLocation(link) + '/rss'
print('\n---- Downloading images on page ' + str(pageNum)) + ' ----\n'
d = feedparser.parse(linkFinal)
for post in d.entries:
myString = post.description
match = re.search(r'http://bit.ly[\'"]?([^\'" >]+)', myString)
if match:
matched = match.group(0)
imageUrl = unshorten_url(matched)
save_file(siteName, imageUrl)
pageNum +=1
except:
print('Something went wrong!\n')
start()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment