Created
July 17, 2014 00:15
-
-
Save treece/8de12ae3ca5c49b3e462 to your computer and use it in GitHub Desktop.
Unsplash Image Downloader Script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import feedparser | |
import time | |
import re | |
import httplib | |
import urlparse | |
import httplib2 | |
import urllib | |
import urllib2 | |
import sys | |
import os | |
from math import fabs | |
failed_counter = 0 | |
last_page = '' | |
def getContentLocation(link): | |
h = httplib2.Http() | |
h.follow_all_redirects = True | |
resp = h.request(link, "GET")[0] | |
contentLocation = resp['content-location'] | |
contentLocation = contentLocation[:-4] | |
return contentLocation | |
def unshorten_url(url): | |
parsed = urlparse.urlparse(url) | |
h = httplib.HTTPConnection(parsed.netloc) | |
resource = parsed.path | |
if parsed.query != "": | |
resource += "?" + parsed.query | |
h.request('HEAD', resource ) | |
response = h.getresponse() | |
if response.status/100 == 3 and response.getheader('Location'): | |
return unshorten_url(response.getheader('Location')) | |
else: | |
return url | |
def report(count, blockSize, totalSize): | |
percent = int(count*blockSize*100/totalSize) | |
sys.stdout.write("\r%d%%" % percent + ' Complete - ') | |
sys.stdout.flush() | |
def save_file(siteName, url): | |
filename = os.path.basename(url) | |
if not os.path.isfile(siteName + '/' + filename): | |
sys.stdout.write('\rDownloading Image ' + url + '...\n') | |
urllib.urlretrieve(url, siteName + '/' + filename, reporthook=report) | |
sys.stdout.write("\rDownload complete, saved as %s" % (filename) + '\n\n') | |
sys.stdout.flush() | |
else: | |
print("Image already exists in directory!") | |
def make_dir(d): | |
if not os.path.exists(d): | |
os.makedirs(d) | |
def start(): | |
global pageNum, failed_counter, last_page | |
siteName = "unsplash" | |
print("Starting page number?") | |
pageStart = int(raw_input('> ')) | |
if pageStart == 0: | |
pageStart = 1 | |
pageNum = pageStart | |
print("Ending page number?") | |
pageEnd = int(raw_input('> ')) | |
try: | |
for page in range(pageStart, pageEnd+1): | |
make_dir(siteName) | |
if pageNum == 1: | |
link = 'http://' + siteName + '.tumblr.com' | |
linkFinal = getContentLocation(link) + 'rss' | |
else: | |
link = ('http://' + siteName + '.tumblr.com/page/' + str(pageNum) + | |
'/rss') | |
linkFinal = getContentLocation(link) + '/rss' | |
print('\n---- Downloading images on page ' + str(pageNum)) + ' ----\n' | |
d = feedparser.parse(linkFinal) | |
for post in d.entries: | |
myString = post.description | |
match = re.search(r'http://bit.ly[\'"]?([^\'" >]+)', myString) | |
if match: | |
matched = match.group(0) | |
imageUrl = unshorten_url(matched) | |
save_file(siteName, imageUrl) | |
pageNum +=1 | |
except: | |
print('Something went wrong!\n') | |
start() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment