Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
Unsplash Image Downloader Script
import feedparser
import time
import re
import httplib
import urlparse
import httplib2
import urllib
import urllib2
import sys
import os
from math import fabs
failed_counter = 0
last_page = ''
def getContentLocation(link):
h = httplib2.Http()
h.follow_all_redirects = True
resp = h.request(link, "GET")[0]
contentLocation = resp['content-location']
contentLocation = contentLocation[:-4]
return contentLocation
def unshorten_url(url):
parsed = urlparse.urlparse(url)
h = httplib.HTTPConnection(parsed.netloc)
resource = parsed.path
if parsed.query != "":
resource += "?" + parsed.query
h.request('HEAD', resource )
response = h.getresponse()
if response.status/100 == 3 and response.getheader('Location'):
return unshorten_url(response.getheader('Location'))
else:
return url
def report(count, blockSize, totalSize):
percent = int(count*blockSize*100/totalSize)
sys.stdout.write("\r%d%%" % percent + ' Complete - ')
sys.stdout.flush()
def save_file(siteName, url):
filename = os.path.basename(url)
if not os.path.isfile(siteName + '/' + filename):
sys.stdout.write('\rDownloading Image ' + url + '...\n')
urllib.urlretrieve(url, siteName + '/' + filename, reporthook=report)
sys.stdout.write("\rDownload complete, saved as %s" % (filename) + '\n\n')
sys.stdout.flush()
else:
print("Image already exists in directory!")
def make_dir(d):
if not os.path.exists(d):
os.makedirs(d)
def start():
global pageNum, failed_counter, last_page
siteName = "unsplash"
print("Starting page number?")
pageStart = int(raw_input('> '))
if pageStart == 0:
pageStart = 1
pageNum = pageStart
print("Ending page number?")
pageEnd = int(raw_input('> '))
try:
for page in range(pageStart, pageEnd+1):
make_dir(siteName)
if pageNum == 1:
link = 'http://' + siteName + '.tumblr.com'
linkFinal = getContentLocation(link) + 'rss'
else:
link = ('http://' + siteName + '.tumblr.com/page/' + str(pageNum) +
'/rss')
linkFinal = getContentLocation(link) + '/rss'
print('\n---- Downloading images on page ' + str(pageNum)) + ' ----\n'
d = feedparser.parse(linkFinal)
for post in d.entries:
myString = post.description
match = re.search(r'http://bit.ly[\'"]?([^\'" >]+)', myString)
if match:
matched = match.group(0)
imageUrl = unshorten_url(matched)
save_file(siteName, imageUrl)
pageNum +=1
except:
print('Something went wrong!\n')
start()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment