Skip to content

Instantly share code, notes, and snippets.

@jehiah
Created December 17, 2010 05:00
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save jehiah/744508 to your computer and use it in GitHub Desktop.
Save jehiah/744508 to your computer and use it in GitHub Desktop.
python script to archive your tweets
#!/usr/bin/env python
"""
twitter_archiver.py written by Jehiah Czebotar 2010 <jehiah@gmail.com> http://jehiah.cz/
this uses the great 'python twitter tools' library by Mike Verdone
http://mike.verdone.ca/twitter/
usage:
$ pip install twitter
$ python twitter_archiver.py
$ view tweets.json
"""
import sys
import os.path
import simplejson as json
import time
import logging
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG,
format='%(asctime)s %(process)d %(filename)s %(lineno)d %(levelname)s #| %(message)s',
datefmt='%H:%M:%S')
from twitter.api import Twitter, TwitterHTTPError
from twitter.oauth import OAuth, read_token_file
from twitter.oauth_dance import oauth_dance
from twitter.cmdline import CONSUMER_KEY, CONSUMER_SECRET
OPTIONS = {
'oauth_filename': os.environ.get('HOME', '') + os.sep + '.twitter_oauth',
'secure': True,
}
def save_user_timeline(twitter, outputfile):
# docs are at http://dev.twitter.com/doc/get/statuses/user_timeline
max_id = None
data = []
while True:
try:
kwargs = {}
if max_id:
kwargs['max_id'] = max_id
temp_data = twitter.statuses.user_timeline(count=200, trim_user='t', include_rts='t', include_entities='t', **kwargs)
if not temp_data:
# yay we reached the end
break
max_id = min([x.get('id') for x in temp_data]) # bound to the min of this range
print temp_data
data += temp_data
except TwitterHTTPError, e:
logging.exception('twitter error')
d = (e.e.code, e.uri, e.format, e.uriparts, e.e.fp.read())
print d
f = open('error.log', 'w')
f.write(json.dumps(d))
f.close()
break
except:
logging.exception('failed')
break
time.sleep(15)
f = open(outputfile, 'w')
f.write(json.dumps(data))
f.close()
print 'saved %d records' % len(data)
def main(args=sys.argv[1:]):
oauth_filename = os.path.expanduser(OPTIONS['oauth_filename'])
if ('authorize' in args
or not os.path.exists(oauth_filename)):
oauth_dance(
"the Command-Line Tool", CONSUMER_KEY, CONSUMER_SECRET,
OPTIONS['oauth_filename'])
oauth_token, oauth_token_secret = read_token_file(oauth_filename)
twitter = Twitter(
auth=OAuth(
oauth_token, oauth_token_secret, CONSUMER_KEY, CONSUMER_SECRET),
secure=OPTIONS['secure'],
api_version='1',
domain='api.twitter.com')
save_user_timeline(twitter, outputfile='tweets.json')
if __name__ == "__main__":
main()
@hugovk
Copy link

hugovk commented May 11, 2016

Thanks for this, it still works with just a minor change to api_version='1.1':

https://gist.github.com/hugovk/d5d57086ed8685345f54a9cc5fcf95b9/revisions

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment