Skip to content

Instantly share code, notes, and snippets.

@tfausak
Created March 24, 2011 00:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tfausak/884343 to your computer and use it in GitHub Desktop.
Save tfausak/884343 to your computer and use it in GitHub Desktop.
Back up Tumblr posts to XML files. (Doesn't save assets, like images and audio.)
#!/usr/bin/env python
from BeautifulSoup import BeautifulStoneSoup
from urllib2 import urlopen
from urllib import urlretrieve
from os.path import basename
import sys
def main(argv=None):
if argv is None:
argv = sys.argv
if len(argv) < 2:
print 'Usage: {0} username'.format(basename(__file__))
username = argv[1]
num = 50
url = 'http://{0}.tumblr.com/api/read/'.format(username)
page = urlopen(url)
xml = page.read()
soup = BeautifulStoneSoup(xml)
total = int(soup.find('posts')['total'])
for index, start in enumerate(range(0, total, num)):
urlretrieve('{0}?start={1}&total={2}'.format(url, start, num), 'page{0}.xml'.format(index))
if __name__ == '__main__':
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment