Skip to content

Instantly share code, notes, and snippets.

@brosner
Created October 24, 2011 20:06
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save brosner/1310000 to your computer and use it in GitHub Desktop.
Save brosner/1310000 to your computer and use it in GitHub Desktop.
# original: https://github.com/simonw/mytweets
# modified by Brian Rosner
# This script does not fetch all tweets. It is limited by what Twitter's API
# returns. Somewhere around 3200 tweets.
#
# Tweak the API keys around line 40 for your Twitter app
import json
import httplib
import time
import traceback
import twitter
FILE = "my_tweets.json"
def load_all():
try:
return json.load(open(FILE))
except IOError:
return []
def fetch_and_save_new_tweets():
tweets = load_all()
old_tweet_ids = set(t["id"] for t in tweets)
if tweets:
since_id = max(t["id"] for t in tweets)
else:
since_id = None
api = twitter.Api(
consumer_key="xxx",
consumer_secret="xxx",
access_token_key="xxx",
access_token_secret="xxx"
)
new_tweets = fetch_all(api, since_id)
num_new_saved = 0
for tweet in new_tweets:
if tweet["id"] not in old_tweet_ids:
tweets.append(tweet)
num_new_saved += 1
tweets.sort(key=lambda t: t["id"], reverse=True)
# Delete the "user" key
for t in tweets:
if "user" in t:
del t["user"]
# Save back to disk
json.dump(tweets, open(FILE, "w"), indent=2)
print "Saved %s new tweets" % num_new_saved
def fetch_all(api, since_id=None):
all_tweets, all_tweets_len = [], 0
seen_ids = set()
page = 0
attempts = 0
kwargs = {"count": 200}
if since_id is not None:
kwargs["since_id"] = since_id
try:
while True:
kwargs["page"] = page
try:
tweets = api.GetUserTimeline(**kwargs)
except twitter.TwitterError, e:
if "Capacity" in e.args[0]:
attempts += 1
continue
except httplib.BadStatusLine:
attempts += 1
continue
else:
attempts = 0
page += 1
if not tweets:
break
for tweet in tweets:
if tweet.id not in seen_ids:
seen_ids.add(tweet.id)
all_tweets.append(tweet.AsDict())
print "Fetched another %s" % (len(all_tweets) - all_tweets_len)
all_tweets_len = len(all_tweets)
time.sleep(5)
except:
traceback.print_exc()
print "Saving tweets to disk anyways"
all_tweets.sort(key=lambda t: t["id"], reverse=True)
return all_tweets
if __name__ == "__main__":
fetch_and_save_new_tweets()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment