Skip to content

Instantly share code, notes, and snippets.

@garcia
Last active December 4, 2018 00:27
Show Gist options
  • Save garcia/6504832 to your computer and use it in GitHub Desktop.
Save garcia/6504832 to your computer and use it in GitHub Desktop.
Save all photos from the tumblelog specified on the command-line using the Tumblr API v1. Requires the Python 'requests' module. Updated for a breaking API change and Python 3 support in 2018.
#!/usr/bin/env python
import argparse
import errno
import itertools
import json
import os
import requests
import time
WIDTHS = (1280, 500, 400, 250, 100, 75)
CALLBACK = 'savephotos'
REQUEST_URL = 'http://%s.tumblr.com/api/read/json?debug=1&start=%d&callback=' + CALLBACK
def extract_json(req_text):
trim_start = CALLBACK + '('
trim_end = ');'
if req_text.startswith(trim_start) and req_text.endswith(trim_end):
return req_text[len(trim_start):-len(trim_end)]
else:
return req_text
def download_photo(blog, pid, photo, p):
for width in WIDTHS:
photo_size = 'photo-url-%d' % width
if photo_size in photo:
print('downloading photo %d @ %spx' % (p, width))
photo_url = photo[photo_size]
photoreq = requests.get(photo_url,
stream=True)
photoreq.raise_for_status()
with open(os.path.join(blog, '%s.%s%s' % (pid, p,
os.path.splitext(photo_url)[1])), 'wb') as out:
for chunk in photoreq.iter_content(1024):
out.write(chunk)
return
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--start', type=int, default=0,
help='the starting offset')
parser.add_argument('blog', help='the blog to backup')
args = parser.parse_args()
blog = args.blog
start = args.start
# Make a directory for the images
try:
os.mkdir(blog)
except OSError as exc:
if exc.errno == errno.EEXIST and os.path.isdir(blog):
pass
else:
raise
# Get the images
for offset in itertools.count(start, 20):
print('requesting posts starting from %d' % offset)
req = requests.get(REQUEST_URL % (blog, offset))
req.raise_for_status()
data = json.loads(extract_json(req.text))
if not data or not 'posts' in data or not data['posts']:
print('no posts returned')
break
for p, post in enumerate(data['posts']):
if post['type'] == 'photo':
print('post #%d' % (offset + p))
try:
# Save photoset posts
if 'photos' in post and post['photos']:
for ph, photo in enumerate(post['photos']):
download_photo(blog, post['id'], photo, ph)
# Save single-photo posts
else:
download_photo(blog, post['id'], post, 0)
except requests.exceptions.HTTPError as exc:
print(exc.message)
print('saved images for post %s' % post['id'])
time.sleep(10)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment