Skip to content

Instantly share code, notes, and snippets.

@blech
Last active August 30, 2015 00:25
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save blech/65692 to your computer and use it in GitHub Desktop.
Save blech/65692 to your computer and use it in GitHub Desktop.
A hackish Tumblr backup tool
#!/usr/bin/python
# this script will fetch all your Tumblr posts into a single big JSON file
# TODO- authenticate (fetch private posts)
# TODO- fetch referenced media?
import urllib2
import simplejson as json
start = 0
posts = []
domain = "craneporn" # TODO pull in from command line
def fetch_tumblr(start):
num = 50
response = urllib2.urlopen('http://%s.tumblr.com/api/read/json?callback=false&num=%s&start=%s' % (domain, num, start))
data = response.read()
data = data[6:] # TODO make this much more robust
data = data[:-3]
# print data
tumblr = json.loads(data)
return tumblr
tumblr = fetch_tumblr(start)
while (len(tumblr['posts']) == 50):
print "Fetched %s posts" % len(tumblr['posts'])
posts.extend(tumblr['posts'])
start += 50
tumblr = fetch_tumblr(start)
print "Fetched final %s posts" % len(tumblr['posts'])
total = start + len(tumblr['posts'])
posts.extend(tumblr['posts'])
filename = "%s.json" % domain
file = open(filename, 'w')
file.write(json.dumps(posts))
file.close()
print "Wrote %s posts to '%s'" % (total, filename)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment