Skip to content

Instantly share code, notes, and snippets.

@tdsmith
Created December 24, 2012 21:45
Show Gist options
  • Save tdsmith/4370781 to your computer and use it in GitHub Desktop.
Save tdsmith/4370781 to your computer and use it in GitHub Desktop.
A little script to dump your Twitter .zip archive into an Evernote notebook on OS X. For, you know, cloudiness. Loads tweets at about one per second, so don't expect it to be super-zippy. pip install appscript if you haven't, yet, which will give you aem.
#!/usr/bin/env python
import json, appscript, aem, sys, os, glob, datetime
def usage(called_name):
print 'Usage: %s <path to unzipped twitter archive> <name of Evernote notebook>' % called_name
print 'e.g., %s tweets/ "My Tweets"' % called_name
def main(tweet_path, notebook_name):
if not os.path.isdir(tweet_path):
raise Exception, "%s: not a directory" % tweet_path
try:
en = appscript.app('Evernote')
except aem.findapp.ApplicationNotFoundError, e:
print 'Are you sure Evernote is installed? :('
raise e
if notebook_name in [nb.name() for nb in en.notebooks.get()]:
raise Exception, 'Notebook %s already exists' % notebook_name
filespec = os.path.join(tweet_path, 'data/js/tweets/*js')
twitter_files = glob.glob(filespec)
if not twitter_files:
raise Exception, 'Could not find any files matching %s' % filespec
nb = en.create_notebook(notebook_name)
for filename in twitter_files:
f = open(filename, 'rb')
buf = f.readlines()[1:]
f.close()
try:
archive = json.loads(''.join(buf))
except Exception, e:
print 'Uh oh! Hit a rough patch in %s' % filename
raise e
for tweet in archive:
datelist = tweet['created_at'].split(' ')
datestring = ' '.join(datelist[:4] + datelist[-1:]) # erase timezone info
date = datetime.datetime.strptime(datestring, '%a %b %d %H:%M:%S %Y')
en.create_note(with_text = json.dumps(tweet), title=tweet['text'], notebook=nb, created=date)
if __name__ == '__main__':
if len(sys.argv) != 3:
usage(sys.argv[0])
sys.exit(1)
try:
main(sys.argv[1], sys.argv[2])
except Exception, e:
usage(sys.argv[0])
print e
sys.exit(1)
@klickreflex
Copy link

Hi Tim and thanks for sharing this snippet,

I keep on running into the following error when trying to import my archive:
time data '2013-02-28 19:38:05 +0000 +0000' does not match format '%a %b %d %H:%M:%S %Y'

Here's the data of the troublesome tweet:

{
  "source" : "\u003Ca href=\"http:\/\/instagram.com\" rel=\"nofollow\"\u003EInstagram\u003C\/a\u003E",
  "entities" : {
    "user_mentions" : [ ],
    "media" : [ ],
    "hashtags" : [ ],
    "urls" : [ {
      "indices" : [ 16, 38 ],
      "url" : "http:\/\/t.co\/7VI9HHCQRk",
      "expanded_url" : "http:\/\/instagr.am\/p\/WSWK3xHN5x\/",
      "display_url" : "instagr.am\/p\/WSWK3xHN5x\/"
    } ]
  },
  "geo" : { },
  "id_str" : "307213104553537536",
  "text" : "8:36pm - dinner http:\/\/t.co\/7VI9HHCQRk",
  "id" : 307213104553537536,
  "created_at" : "2013-02-28 19:38:06 +0000",
  "user" : {
    "name" : "Daniel Wentsch",
    "screen_name" : "klickreflex",
    "protected" : false,
    "id_str" : "74269728",
    "profile_image_url_https" : "https:\/\/pbs.twimg.com\/profile_images\/806444772021927936\/yPDu1ljd_normal.jpg",
    "id" : 74269728,
    "verified" : false
  }
},

This is not the first tweet, so I guess prior tweet work well and I don't see any difference in the date string :(

Could you help me out here?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment