Skip to content

Instantly share code, notes, and snippets.

@tpoisot
Last active August 29, 2015 14:05
Show Gist options
  • Save tpoisot/718fc361b2339d77ff31 to your computer and use it in GitHub Desktop.
Save tpoisot/718fc361b2339d77ff31 to your computer and use it in GitHub Desktop.
Use with ./get_tweets.py 10 3 test.json openscience opendata
{
"c_key": "ConsumerKey",
"c_sec": "ConsumerSecret",
"t_key": "TokenKey",
"t_sec": "TokenSecret"
}
#! /usr/bin/env python
import sys
#from TwitterAPI import TwitterAPI
import twitter
import json
import time
import os.path
# Author: Timothee Poisot
# Contact: t.poisot _at_ gmail _dot_ com
# License: MIT
def get_tweets(api, keywords, target_n, output_file, max_iters):
if os.path.isfile(output_file):
with open(output_file, 'r') as current_tweets:
tweets = json.load(current_tweets)
del current_tweets
else:
tweets = dict()
results = len(tweets)
iters = 0
max_id = None
query = {'q': '+'.join(keywords), 'count': 100}
ids = []
while (iters < max_iters) and (results < target_n):
iters = iters + 1
if max_id:
query['max_id'] = max_id
request = api.GetSearch('+'.join(keywords), max_id=max_id)
for item in request:
if not item.id in tweets:
ids.append(item.id)
tweets[item.id] = item.AsDict()
if len(tweets) >= target_n:
continue
max_id = min(ids)
results = len(tweets)
time.sleep(10)
print("Iter. "+str(iters)+"\t"+str(results)+" unique tweets recovered")
with open(output_file, 'w') as outfile:
json.dump(tweets, outfile)
if __name__ == "__main__":
if len(sys.argv) == 1:
print("Usage: n_tweets(1000) n_trials(110) outfile(output.json) keyw1 keyw2\n")
else:
n_tweets = int(sys.argv[1])
n_trials = int(sys.argv[2])
outfile = sys.argv[3]
keyw = [sys.argv[i] for i in range(4,len(sys.argv))]
with open('credentials.json', 'r') as cred_file:
credentials = json.load(cred_file)
api = twitter.Api(credentials['c_key'], credentials['c_sec'], credentials['t_key'], credentials['t_sec'])
print api.VerifyCredentials()
print("Getting tweets for keywords: "+str(' '.join(keyw))+"\n")
get_tweets(api, keyw, n_tweets, outfile, n_trials)
@tpoisot
Copy link
Author

tpoisot commented Nov 19, 2014

How to use it?

Install the twitter module for python

pip install python-twitter

Then get an access token for the twitter API, register an app, and fill in the proper values in credentials.json.

Then chmod +x get_tweets.py, and

./get_tweets.py 10000 100 output.json @handle1 @handle2 #keyword term ...

This will write everyting in output.json

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment