Skip to content

Instantly share code, notes, and snippets.

@palewire
Last active October 15, 2018 01:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save palewire/4666689 to your computer and use it in GitHub Desktop.
Save palewire/4666689 to your computer and use it in GitHub Desktop.
A quick Python script for archiving a user's tweets via the Twitter API. Only goes back as far as Twitter allows. Doesn't include RTs by default.
import re
import os
import csv
import codecs
import twitter
import cStringIO
from pprint import pprint
from dateutil.parser import parse as dateparse
class UnicodeWriter:
"""
A CSV writer which will write rows to CSV file "f",
which is encoded in the given encoding.
"""
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
# Redirect output to a queue
self.queue = cStringIO.StringIO()
self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
self.stream = f
self.encoder = codecs.getincrementalencoder(encoding)()
def writerow(self, row):
self.writer.writerow([s.encode("utf-8") for s in row])
# Fetch UTF-8 output from the queue ...
data = self.queue.getvalue()
data = data.decode("utf-8")
# ... and reencode it into the target encoding
data = self.encoder.encode(data)
# write to the target stream
self.stream.write(data)
# empty queue
self.queue.truncate(0)
def writerows(self, rows):
for row in rows:
self.writerow(row)
class TwitterClient(object):
"""
A minimal Twitter client.
"""
def __init__(self, username):
self.username = username
self.api = twitter.Api(
consumer_key='your',
consumer_secret='shiz',
access_token_key='goes',
access_token_secret='here'
)
def __getattr__(self):
return TwitterClient(self.username)
def __repr__(self):
return "<TwitterClient: %s>" % self.username
def pull(self):
outfile = UnicodeWriter(open("%s.csv" % self.username, "w"))
keep_going = True
max_id = None
page = 1
big_total = 0
while keep_going:
tweet_list = list(self.api.GetUserTimeline(self.username, max_id=max_id))
big_total += len(tweet_list)
print "Fetched page %s, Found %s tweets, Total up to %s" % (page, len(tweet_list), big_total)
if len(tweet_list) == 0:
print "No more tweets"
keep_going = False
break
max_id = tweet_list[-1].id
page += 1
tweet_list.pop(0)
[outfile.writerow(map(unicode, [t.id, dateparse(t.created_at), t.text]))
for t in tweet_list]
if __name__ == '__main__':
client = TwitterClient("palewire")
client.pull()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment