Skip to content

Instantly share code, notes, and snippets.

@hugs
Created December 13, 2010 23:59
Show Gist options
  • Save hugs/739816 to your computer and use it in GitHub Desktop.
Save hugs/739816 to your computer and use it in GitHub Desktop.
Python code for retrieving all your tweets, based on code from @terrycojones.
# Based on http://blogs.fluidinfo.com/terry/2009/06/24/python-code-for-retrieving-all-your-tweets/
# Patched to retry upon failure until *all* tweets are downloaded.
import sys, twitter, operator
from dateutil.parser import parse
import time
twitterURL = 'http://twitter.com'
def fetch(user):
data = {}
api = twitter.Api()
max_id = None
total = 0
while True:
try:
statuses = api.GetUserTimeline(user, count=200, max_id=max_id)
except:
print "Gateway error... trying again"
time.sleep(20)
api = twitter.Api()
continue
time.sleep(5)
newCount = ignCount = 0
for s in statuses:
if s.id in data:
ignCount += 1
else:
data[s.id] = s
newCount += 1
total += newCount
print >>sys.stderr, "Fetched %d/%d/%d new/old/total." % (
newCount, ignCount, total)
if newCount == 0:
break
max_id = min([s.id for s in statuses]) - 1
print "Max_id: %s" % max_id
htmlPrint(user, data.values())
data = {}
total = 0
#return data.values()
def htmlPrint(user, tweets):
for t in tweets:
#import pdb; pdb.set_trace()
t.pdate = parse(t.created_at)
key = operator.attrgetter('pdate')
tweets = reversed(sorted(tweets, key=key))
f = open('%s.html' % user, 'a')
print >>f, """<html><title>Tweets for %s</title>
<meta http-equiv="Content-Type" content="text/html;charset=utf-8">
<body><small>""" % user
for i, t in enumerate(tweets):
print >>f, '%s <a href="%s/%s/status/%d">%s</a><br/>' % (
t.pdate.strftime('%Y-%m-%d %H:%M'), twitterURL,
user, t.id, t.text.encode('utf8'))
print >>f, '</small></body></html>\n\n'
f.close()
if __name__ == '__main__':
user = 'hugs' if len(sys.argv) < 2 else sys.argv[1]
data = fetch(user)
#htmlPrint(user, data)
$ python tweet_archiver.py
Gateway error... trying again
Fetched 193/0/193 new/old/total.
Max_id: 2101508686487551
Fetched 181/0/181 new/old/total.
Max_id: 25258004272
Fetched 195/0/195 new/old/total.
Max_id: 20987676233
Fetched 191/0/191 new/old/total.
Max_id: 18856965859
Gateway error... trying again
Fetched 191/0/191 new/old/total.
Max_id: 15375853421
Gateway error... trying again
Fetched 181/0/181 new/old/total.
Max_id: 11351097286
Gateway error... trying again
Fetched 179/0/179 new/old/total.
Max_id: 9436491875
Gateway error... trying again
Fetched 162/0/162 new/old/total.
Max_id: 7688331574
Gateway error... trying again
Fetched 191/0/191 new/old/total.
Max_id: 5627071780
Gateway error... trying again
Fetched 200/0/200 new/old/total.
Max_id: 4306824367
Gateway error... trying again
Fetched 200/0/200 new/old/total.
Max_id: 3451794071
Gateway error... trying again
Fetched 200/0/200 new/old/total.
Max_id: 2429539283
Gateway error... trying again
Fetched 200/0/200 new/old/total.
Max_id: 1720724669
Gateway error... trying again
Fetched 200/0/200 new/old/total.
Max_id: 1262300070
Gateway error... trying again
Fetched 200/0/200 new/old/total.
Max_id: 977729320
Fetched 30/0/30 new/old/total.
Max_id: 266874171
Fetched 0/0/0 new/old/total.
$
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment