Created December 28, 2014 15:34
Print the tweets in a Twitter archive that are younger than a year, useful for heroku_ebooks
#!/usr/bin/env python
from cStringIO import StringIO
from csv import DictReader
from sys import argv
from zipfile import ZipFile
from arrow import utcnow
import dateutil.parser
def twitter2ebooks(path):
archive = ZipFile(path)
csv ='tweets.csv')
csv = StringIO(csv)
rows = DictReader(csv)
now = utcnow()
for row in rows:
created_at = row['timestamp']
created_at = dateutil.parser.parse(created_at)
if (created_at - now).days < 365:
print repr(row['text'])
if __name__ == '__main__':
