Skip to content

Instantly share code, notes, and snippets.

Created Mar 24, 2011
What would you like to do?
Back up Tumblr posts to XML files. (Doesn't save assets, like images and audio.)
#!/usr/bin/env python
from BeautifulSoup import BeautifulStoneSoup
from urllib2 import urlopen
from urllib import urlretrieve
from os.path import basename
import sys
def main(argv=None):
if argv is None:
argv = sys.argv
if len(argv) < 2:
print 'Usage: {0} username'.format(basename(__file__))
username = argv[1]
num = 50
url = 'http://{0}'.format(username)
page = urlopen(url)
xml =
soup = BeautifulStoneSoup(xml)
total = int(soup.find('posts')['total'])
for index, start in enumerate(range(0, total, num)):
urlretrieve('{0}?start={1}&total={2}'.format(url, start, num), 'page{0}.xml'.format(index))
if __name__ == '__main__':
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment