Created
December 13, 2010 23:59
-
-
Save hugs/739816 to your computer and use it in GitHub Desktop.
Python code for retrieving all your tweets, based on code from @terrycojones.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Based on http://blogs.fluidinfo.com/terry/2009/06/24/python-code-for-retrieving-all-your-tweets/ | |
# Patched to retry upon failure until *all* tweets are downloaded. | |
import sys, twitter, operator | |
from dateutil.parser import parse | |
import time | |
twitterURL = 'http://twitter.com' | |
def fetch(user): | |
data = {} | |
api = twitter.Api() | |
max_id = None | |
total = 0 | |
while True: | |
try: | |
statuses = api.GetUserTimeline(user, count=200, max_id=max_id) | |
except: | |
print "Gateway error... trying again" | |
time.sleep(20) | |
api = twitter.Api() | |
continue | |
time.sleep(5) | |
newCount = ignCount = 0 | |
for s in statuses: | |
if s.id in data: | |
ignCount += 1 | |
else: | |
data[s.id] = s | |
newCount += 1 | |
total += newCount | |
print >>sys.stderr, "Fetched %d/%d/%d new/old/total." % ( | |
newCount, ignCount, total) | |
if newCount == 0: | |
break | |
max_id = min([s.id for s in statuses]) - 1 | |
print "Max_id: %s" % max_id | |
htmlPrint(user, data.values()) | |
data = {} | |
total = 0 | |
#return data.values() | |
def htmlPrint(user, tweets): | |
for t in tweets: | |
#import pdb; pdb.set_trace() | |
t.pdate = parse(t.created_at) | |
key = operator.attrgetter('pdate') | |
tweets = reversed(sorted(tweets, key=key)) | |
f = open('%s.html' % user, 'a') | |
print >>f, """<html><title>Tweets for %s</title> | |
<meta http-equiv="Content-Type" content="text/html;charset=utf-8"> | |
<body><small>""" % user | |
for i, t in enumerate(tweets): | |
print >>f, '%s <a href="%s/%s/status/%d">%s</a><br/>' % ( | |
t.pdate.strftime('%Y-%m-%d %H:%M'), twitterURL, | |
user, t.id, t.text.encode('utf8')) | |
print >>f, '</small></body></html>\n\n' | |
f.close() | |
if __name__ == '__main__': | |
user = 'hugs' if len(sys.argv) < 2 else sys.argv[1] | |
data = fetch(user) | |
#htmlPrint(user, data) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ python tweet_archiver.py | |
Gateway error... trying again | |
Fetched 193/0/193 new/old/total. | |
Max_id: 2101508686487551 | |
Fetched 181/0/181 new/old/total. | |
Max_id: 25258004272 | |
Fetched 195/0/195 new/old/total. | |
Max_id: 20987676233 | |
Fetched 191/0/191 new/old/total. | |
Max_id: 18856965859 | |
Gateway error... trying again | |
Fetched 191/0/191 new/old/total. | |
Max_id: 15375853421 | |
Gateway error... trying again | |
Fetched 181/0/181 new/old/total. | |
Max_id: 11351097286 | |
Gateway error... trying again | |
Fetched 179/0/179 new/old/total. | |
Max_id: 9436491875 | |
Gateway error... trying again | |
Fetched 162/0/162 new/old/total. | |
Max_id: 7688331574 | |
Gateway error... trying again | |
Fetched 191/0/191 new/old/total. | |
Max_id: 5627071780 | |
Gateway error... trying again | |
Fetched 200/0/200 new/old/total. | |
Max_id: 4306824367 | |
Gateway error... trying again | |
Fetched 200/0/200 new/old/total. | |
Max_id: 3451794071 | |
Gateway error... trying again | |
Fetched 200/0/200 new/old/total. | |
Max_id: 2429539283 | |
Gateway error... trying again | |
Fetched 200/0/200 new/old/total. | |
Max_id: 1720724669 | |
Gateway error... trying again | |
Fetched 200/0/200 new/old/total. | |
Max_id: 1262300070 | |
Gateway error... trying again | |
Fetched 200/0/200 new/old/total. | |
Max_id: 977729320 | |
Fetched 30/0/30 new/old/total. | |
Max_id: 266874171 | |
Fetched 0/0/0 new/old/total. | |
$ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment