-
-
Save asw456/fd33e784e1a70c0049dc to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"c_key": "ConsumerKey", | |
"c_sec": "ConsumerSecret", | |
"t_key": "TokenKey", | |
"t_sec": "TokenSecret" | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python | |
import sys | |
from TwitterAPI import TwitterAPI | |
import json | |
import time | |
import os.path | |
# Author: Timothée Poisot | |
# Contact: t.poisot _at_ gmail _dot_ com | |
# License: MIT | |
def get_tweets(api, keywords, target_n, output_file, max_iters): | |
if os.path.isfile(output_file): | |
with open(output_file, 'r') as current_tweets: | |
gathered_tweets = current_tweets.readlines() | |
tweets = [json.loads(tweet) for tweet in gathered_tweets] | |
del gathered_tweets | |
else: | |
tweets = dict() | |
results = len(tweets) | |
iters = 0 | |
max_id = None | |
query = {'q': '+'.join(keywords), 'count': 100} | |
ids = [] | |
while (iters < max_iters) and (results < target_n): | |
iters = iters + 1 | |
if max_id: | |
query['max_id'] = max_id | |
request = api.request('search/tweets', query) | |
for item in request.get_iterator(): | |
if "id" in item: | |
if not item['id'] in tweets: | |
ids.append(item['id']) | |
tweets[item['id']] = item | |
if len(tweets) == target_n: | |
break | |
max_id = min(ids) | |
results = len(tweets) | |
time.sleep(10) | |
print("Iter. "+str(iters)+"\t"+str(results)+" unique tweets recovered") | |
with open(output_file, 'w') as outfile: | |
json.dump(tweets, outfile) | |
if __name__ == "__main__": | |
if len(sys.argv) == 1: | |
print("Usage: n_tweets(1000) n_trials(110) outfile(output.json) keyw1 keyw2\n") | |
else: | |
n_tweets = int(sys.argv[1]) | |
n_trials = int(sys.argv[2]) | |
outfile = sys.argv[3] | |
keyw = ["#"+sys.argv[i] for i in range(4,len(sys.argv))] | |
with open('credentials.json', 'r') as cred_file: | |
credentials = json.load(cred_file) | |
api = TwitterAPI(credentials['c_key'], credentials['c_sec'], credentials['t_key'], credentials['t_sec']) | |
print("Getting tweets for keywords: "+str(' '.join(keyw))+"\n") | |
get_tweets(api, keyw, n_tweets, outfile, n_trials) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment