Skip to content

Instantly share code, notes, and snippets.

@revox
Last active December 15, 2015 12:09
Show Gist options
  • Save revox/2dc5e0e4cc269bf40701 to your computer and use it in GitHub Desktop.
Save revox/2dc5e0e4cc269bf40701 to your computer and use it in GitHub Desktop.
Twitter search API (last 7 days) into a CSV file with some profile info from the tweets (based on code from MTSW, 2nd ed, example 9.4)
# dont keep this script in public_html!!!!
import twitter
import sys,json,csv
def twitter_search(twitter_api, q, max_results=200):
# See https://dev.twitter.com/docs/api/1.1/get/search/tweets and
# https://dev.twitter.com/docs/using-search for details on advanced
# search criteria that may be useful for keyword arguments
# See https://dev.twitter.com/docs/api/1.1/get/search/tweets
search_results = twitter_api.search.tweets(q=q, count=100)
# print search_results
print json.dumps(search_results['search_metadata'], indent=1)
statuses = search_results['statuses']
# Iterate through batches of results by following the cursor until we
# reach the desired number of results, keeping in mind that OAuth users
# can "only" make 180 search queries per 15-minute interval. See
# https://dev.twitter.com/docs/rate-limiting/1.1/limits
# for details. A reasonable number of results is ~1000-1500, although
# that number of results may not exist for all queries.
# Enforce a reasonable limit we'll go for 2000 in case Twitter change the API and
# let more out
max_results = min(2000, max_results)
for _ in range(10): # 10*100 = 1000
try:
next_results = search_results['search_metadata']['next_results']
except KeyError, e: # No more results when next_results doesn't exist
break
# Create a dictionary from next_results, which has the following form:
# ?max_id=313519052523986943&q=NCAA&include_entities=1
kwargs = dict([ kv.split('=') for kv in next_results[1:].split("&") ])
search_results = twitter_api.search.tweets(**kwargs)
statuses += search_results['statuses']
if len(statuses) > max_results:
break
return statuses
''' helper functions, clean data, unpack dictionaries '''
def getVal(val):
clean = ""
if isinstance(val, bool):
return val
if isinstance(val, int):
return val
if val:
clean = val.encode('utf-8')
return clean
def getLng(val):
if isinstance(val, dict):
return val['coordinates'][0]
def getLat(val):
if isinstance(val, dict):
return val['coordinates'][1]
def getPlace(val):
if isinstance(val, dict):
return val['full_name'].encode('utf-8')
# == OAuth Authentication ==
# The consumer keys can be found on your application's Details
consumer_key=""
consumer_secret=""
# Create an access token under the the "Your access token" section
access_token=""
access_token_secret=""
auth = twitter.oauth.OAuth(access_token,
access_token_secret,
consumer_key,
consumer_secret)
twitter_api = twitter.Twitter(auth=auth)
twitter_api.retry = True
# Sample usage
q = "starwars"
results = twitter_search(twitter_api, q, max_results=2000)
print len(results)
# Show one sample search result by slicing the list...
# print json.dumps(results[0], indent=1)
csvfile = open(q + '_with_profiles.csv', 'w')
csvwriter = csv.writer(csvfile)
csvwriter.writerow(['created_at',
'user-screen_name',
'text',
'coordinates lng',
'coordinates lat',
'place',
'user-location',
'user-geo_enabled',
'user-lang',
'user-time_zone',
'user-statuses_count',
'user-followers_count',
'user-created_at'])
for tweet in results:
csvwriter.writerow([tweet['created_at'],
getVal(tweet['user']['screen_name']),
getVal(tweet['text']),
getLng(tweet['coordinates']),
getLat(tweet['coordinates']),
getPlace(tweet['place']),
getVal(tweet['user']['location']),
getVal(tweet['user']['geo_enabled']),
getVal(tweet['user']['lang']),
getVal(tweet['user']['time_zone']),
getVal(tweet['user']['statuses_count']),
getVal(tweet['user']['followers_count']),
getVal(tweet['user']['created_at'])
])
print "done"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment