Skip to content

Instantly share code, notes, and snippets.

@paulirish
Created May 12, 2010 21:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save paulirish/399132 to your computer and use it in GitHub Desktop.
Save paulirish/399132 to your computer and use it in GitHub Desktop.
archive all your tweets to xml or json
'''
getTwitterHistory.
mostly written by Simon Willison (ithink?) and fucked with by paul irish
1. set the credentials correctly below
2. run: python gethistory.py
'''
import urllib2
username = 'paul_irish'
password = 'i<3IE'
tweets = 2000 # number of tweets
# 3200 is given as max due to pagination limits
format = 'xml' # json or xml - json throws it all on a single line. :(
filename = username + '_archive.'+format # filename of the archive
perpage = 200 # max per request
pages = (int(float(tweets)/float(perpage)))+1 # 200 tweets per page.
auth = urllib2.HTTPPasswordMgrWithDefaultRealm()
auth.add_password(None, 'http://twitter.com/statuses/user_timeline.' + format, username, password)
authHandler = urllib2.HTTPBasicAuthHandler(auth)
opener = urllib2.build_opener(authHandler)
urllib2.install_opener(opener)
i = 1
response = ''
print 'Downloading tweets. Note that this may take some time'
while i <= pages:
request = urllib2.Request('http://twitter.com/statuses/user_timeline.' \
+ format + '?count='+ str(perpage) +'&page=' + str(i))
print 'getting page '+str(i) + ', '+ str(i*perpage) + ' of '+ str(tweets) + ' tweets retrived'
response = response + urllib2.urlopen(request).read()
i = i + 1
# i'd like to grab all RT's too, but don't know when it should stop...
"""
auth = urllib2.HTTPPasswordMgrWithDefaultRealm()
auth.add_password(None, 'http://api.twitter.com/1/statuses/retweeted_by_me.' + format, username, password)
authHandler = urllib2.HTTPBasicAuthHandler(auth)
opener = urllib2.build_opener(authHandler)
urllib2.install_opener(opener)
i = 1
# response = ''
print 'Now, we\'re downloading retweets. Note that this may take some... er.. less time'
while i <= pages:
request = urllib2.Request('http://api.twitter.com/1/statuses/retweeted_by_me.' \
+ format + '?count='+ str(perpage) +'&page=' + str(i))
print 'getting page '+str(i) + ', '+ str(i*perpage) + ' of '+ str(tweets) + ' tweets retrived'
response = response + urllib2.urlopen(request).read()
i = i + 1
"""
handle = open(filename,"w")
handle.write(response)
handle.close()
print 'Archived ' + str(tweets) + ' of ' + username + \
'\'s tweets to ' + filename
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment