Skip to content

Instantly share code, notes, and snippets.

@blech
Created July 22, 2009 06:12
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save blech/151841 to your computer and use it in GitHub Desktop.
Save blech/151841 to your computer and use it in GitHub Desktop.
fetch-twitter-posts.py
#!/usr/bin/python
import simplejson as json
import sys
import urllib2
# what site?
top_level_url = "http://twitter.com/"
username = "blech"
password = "notmyrealpassword"
# open existing archive and build list of existing IDs
# TODO make this work when starting from scratch
# (as a hack:
# posts = []
# )
file = open("posts-all.json", "rb")
posts = json.loads(file.read())
page = 1
def get_id(item): return item['id']
ids = map(get_id, posts);
# set up usernames and passwords
password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
password_mgr.add_password(None, top_level_url, username, password)
handler = urllib2.HTTPBasicAuthHandler(password_mgr)
opener = urllib2.build_opener(handler)
while (1):
response = opener.open('http://twitter.com/statuses/user_timeline.json?count=200&page=%s' % page)
data = response.read()
new_posts = json.loads(data)
extend = []
sys.stderr.write("got %s posts for page %s\n" % (len(new_posts), page))
for new_post in new_posts:
if new_post['id'] in ids:
continue
extend.extend(new_post)
sys.stderr.write(" of which %s posts are new\n" % (len(extend)))
if not len(extend):
break
posts.extend(extend)
page += 1
print json.dumps(posts)
sys.stderr.write("writing %s posts\n" % len(posts))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment