Unsplash Image Downloader Script
import feedparser | |
import time | |
import re | |
import httplib | |
import urlparse | |
import httplib2 | |
import urllib | |
import urllib2 | |
import sys | |
import os | |
from math import fabs | |
failed_counter = 0 | |
last_page = '' | |
def getContentLocation(link): | |
h = httplib2.Http() | |
h.follow_all_redirects = True | |
resp = h.request(link, "GET")[0] | |
contentLocation = resp['content-location'] | |
contentLocation = contentLocation[:-4] | |
return contentLocation | |
def unshorten_url(url): | |
parsed = urlparse.urlparse(url) | |
h = httplib.HTTPConnection(parsed.netloc) | |
resource = parsed.path | |
if parsed.query != "": | |
resource += "?" + parsed.query | |
h.request('HEAD', resource ) | |
response = h.getresponse() | |
if response.status/100 == 3 and response.getheader('Location'): | |
return unshorten_url(response.getheader('Location')) | |
else: | |
return url | |
def report(count, blockSize, totalSize): | |
percent = int(count*blockSize*100/totalSize) | |
sys.stdout.write("\r%d%%" % percent + ' Complete - ') | |
sys.stdout.flush() | |
def save_file(siteName, url): | |
filename = os.path.basename(url) | |
if not os.path.isfile(siteName + '/' + filename): | |
sys.stdout.write('\rDownloading Image ' + url + '...\n') | |
urllib.urlretrieve(url, siteName + '/' + filename, reporthook=report) | |
sys.stdout.write("\rDownload complete, saved as %s" % (filename) + '\n\n') | |
sys.stdout.flush() | |
else: | |
print("Image already exists in directory!") | |
def make_dir(d): | |
if not os.path.exists(d): | |
os.makedirs(d) | |
def start(): | |
global pageNum, failed_counter, last_page | |
siteName = "unsplash" | |
print("Starting page number?") | |
pageStart = int(raw_input('> ')) | |
if pageStart == 0: | |
pageStart = 1 | |
pageNum = pageStart | |
print("Ending page number?") | |
pageEnd = int(raw_input('> ')) | |
try: | |
for page in range(pageStart, pageEnd+1): | |
make_dir(siteName) | |
if pageNum == 1: | |
link = 'http://' + siteName + '.tumblr.com' | |
linkFinal = getContentLocation(link) + 'rss' | |
else: | |
link = ('http://' + siteName + '.tumblr.com/page/' + str(pageNum) + | |
'/rss') | |
linkFinal = getContentLocation(link) + '/rss' | |
print('\n---- Downloading images on page ' + str(pageNum)) + ' ----\n' | |
d = feedparser.parse(linkFinal) | |
for post in d.entries: | |
myString = post.description | |
match = re.search(r'http://bit.ly[\'"]?([^\'" >]+)', myString) | |
if match: | |
matched = match.group(0) | |
imageUrl = unshorten_url(matched) | |
save_file(siteName, imageUrl) | |
pageNum +=1 | |
except: | |
print('Something went wrong!\n') | |
start() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment