Created
May 12, 2010 21:15
-
-
Save paulirish/399132 to your computer and use it in GitHub Desktop.
archive all your tweets to xml or json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
getTwitterHistory. | |
mostly written by Simon Willison (ithink?) and fucked with by paul irish | |
1. set the credentials correctly below | |
2. run: python gethistory.py | |
''' | |
import urllib2 | |
username = 'paul_irish' | |
password = 'i<3IE' | |
tweets = 2000 # number of tweets | |
# 3200 is given as max due to pagination limits | |
format = 'xml' # json or xml - json throws it all on a single line. :( | |
filename = username + '_archive.'+format # filename of the archive | |
perpage = 200 # max per request | |
pages = (int(float(tweets)/float(perpage)))+1 # 200 tweets per page. | |
auth = urllib2.HTTPPasswordMgrWithDefaultRealm() | |
auth.add_password(None, 'http://twitter.com/statuses/user_timeline.' + format, username, password) | |
authHandler = urllib2.HTTPBasicAuthHandler(auth) | |
opener = urllib2.build_opener(authHandler) | |
urllib2.install_opener(opener) | |
i = 1 | |
response = '' | |
print 'Downloading tweets. Note that this may take some time' | |
while i <= pages: | |
request = urllib2.Request('http://twitter.com/statuses/user_timeline.' \ | |
+ format + '?count='+ str(perpage) +'&page=' + str(i)) | |
print 'getting page '+str(i) + ', '+ str(i*perpage) + ' of '+ str(tweets) + ' tweets retrived' | |
response = response + urllib2.urlopen(request).read() | |
i = i + 1 | |
# i'd like to grab all RT's too, but don't know when it should stop... | |
""" | |
auth = urllib2.HTTPPasswordMgrWithDefaultRealm() | |
auth.add_password(None, 'http://api.twitter.com/1/statuses/retweeted_by_me.' + format, username, password) | |
authHandler = urllib2.HTTPBasicAuthHandler(auth) | |
opener = urllib2.build_opener(authHandler) | |
urllib2.install_opener(opener) | |
i = 1 | |
# response = '' | |
print 'Now, we\'re downloading retweets. Note that this may take some... er.. less time' | |
while i <= pages: | |
request = urllib2.Request('http://api.twitter.com/1/statuses/retweeted_by_me.' \ | |
+ format + '?count='+ str(perpage) +'&page=' + str(i)) | |
print 'getting page '+str(i) + ', '+ str(i*perpage) + ' of '+ str(tweets) + ' tweets retrived' | |
response = response + urllib2.urlopen(request).read() | |
i = i + 1 | |
""" | |
handle = open(filename,"w") | |
handle.write(response) | |
handle.close() | |
print 'Archived ' + str(tweets) + ' of ' + username + \ | |
'\'s tweets to ' + filename |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment