revox/twitter_search_api_writer.py

## twitter_search_api_writer.py
# dont keep this script in public_html!!!!
import twitter
import sys,json,csv

def twitter_search(twitter_api, q, max_results=200):
    # See https://dev.twitter.com/docs/api/1.1/get/search/tweets and
    # https://dev.twitter.com/docs/using-search for details on advanced
    # search criteria that may be useful for keyword arguments
    # See https://dev.twitter.com/docs/api/1.1/get/search/tweets

    search_results = twitter_api.search.tweets(q=q, count=100)
    # print search_results
    print json.dumps(search_results['search_metadata'], indent=1)
    statuses = search_results['statuses']
    # Iterate through batches of results by following the cursor until we
    # reach the desired number of results, keeping in mind that OAuth users
    # can "only" make 180 search queries per 15-minute interval. See
    # https://dev.twitter.com/docs/rate-limiting/1.1/limits
    # for details. A reasonable number of results is ~1000-1500, although
    # that number of results may not exist for all queries.
    # Enforce a reasonable limit we'll go for 2000 in case Twitter change the API and
    # let more out
    max_results = min(2000, max_results)
    for _ in range(10): # 10*100 = 1000
        try:
            next_results = search_results['search_metadata']['next_results']
        except KeyError, e: # No more results when next_results doesn't exist
            break
        # Create a dictionary from next_results, which has the following form:
        # ?max_id=313519052523986943&q=NCAA&include_entities=1
        kwargs = dict([ kv.split('=') for kv in next_results[1:].split("&") ])
        search_results = twitter_api.search.tweets(**kwargs)
        statuses += search_results['statuses']
        if len(statuses) > max_results:
            break
    return statuses

''' helper functions, clean data, unpack dictionaries '''
def getVal(val):
    clean = ""
    if isinstance(val, bool):
        return val
    if isinstance(val, int):
        return val
    if val:
        clean = val.encode('utf-8')
    return clean

def getLng(val):
    if isinstance(val, dict):
        return val['coordinates'][0]

def getLat(val):
    if isinstance(val, dict):
        return val['coordinates'][1]

def getPlace(val):
    if isinstance(val, dict):
        return val['full_name'].encode('utf-8')

# == OAuth Authentication ==
# The consumer keys can be found on your application's Details
consumer_key=""
consumer_secret=""

# Create an access token under the the "Your access token" section
access_token=""
access_token_secret=""
auth = twitter.oauth.OAuth(access_token,
  access_token_secret,
  consumer_key,
  consumer_secret)
twitter_api = twitter.Twitter(auth=auth)
twitter_api.retry = True

# Sample usage
q = "starwars"
results = twitter_search(twitter_api, q, max_results=2000)
print len(results)
# Show one sample search result by slicing the list...
# print json.dumps(results[0], indent=1)

csvfile = open(q + '_with_profiles.csv', 'w')
csvwriter = csv.writer(csvfile)
csvwriter.writerow(['created_at',
                    'user-screen_name',
                    'text',
                    'coordinates lng',
                    'coordinates lat',
                    'place',
                    'user-location',
                    'user-geo_enabled',
                    'user-lang',
                    'user-time_zone',
                    'user-statuses_count',
                    'user-followers_count',
                    'user-created_at'])
for tweet in results:
    csvwriter.writerow([tweet['created_at'],
                            getVal(tweet['user']['screen_name']),
                            getVal(tweet['text']),
                            getLng(tweet['coordinates']),
                            getLat(tweet['coordinates']),
                            getPlace(tweet['place']),
                            getVal(tweet['user']['location']),
                            getVal(tweet['user']['geo_enabled']),
                            getVal(tweet['user']['lang']),
                            getVal(tweet['user']['time_zone']),
                            getVal(tweet['user']['statuses_count']),
                            getVal(tweet['user']['followers_count']),
                            getVal(tweet['user']['created_at'])
                            ])
print "done"
	# dont keep this script in public_html!!!!
	import twitter
	import sys,json,csv

	def twitter_search(twitter_api, q, max_results=200):
	# See https://dev.twitter.com/docs/api/1.1/get/search/tweets and
	# https://dev.twitter.com/docs/using-search for details on advanced
	# search criteria that may be useful for keyword arguments
	# See https://dev.twitter.com/docs/api/1.1/get/search/tweets

	search_results = twitter_api.search.tweets(q=q, count=100)
	# print search_results
	print json.dumps(search_results['search_metadata'], indent=1)
	statuses = search_results['statuses']
	# Iterate through batches of results by following the cursor until we
	# reach the desired number of results, keeping in mind that OAuth users
	# can "only" make 180 search queries per 15-minute interval. See
	# https://dev.twitter.com/docs/rate-limiting/1.1/limits
	# for details. A reasonable number of results is ~1000-1500, although
	# that number of results may not exist for all queries.
	# Enforce a reasonable limit we'll go for 2000 in case Twitter change the API and
	# let more out
	max_results = min(2000, max_results)
	for _ in range(10): # 10*100 = 1000
	try:
	next_results = search_results['search_metadata']['next_results']
	except KeyError, e: # No more results when next_results doesn't exist
	break
	# Create a dictionary from next_results, which has the following form:
	# ?max_id=313519052523986943&q=NCAA&include_entities=1
	kwargs = dict([ kv.split('=') for kv in next_results[1:].split("&") ])
	search_results = twitter_api.search.tweets(**kwargs)
	statuses += search_results['statuses']
	if len(statuses) > max_results:
	break
	return statuses

	''' helper functions, clean data, unpack dictionaries '''
	def getVal(val):
	clean = ""
	if isinstance(val, bool):
	return val
	if isinstance(val, int):
	return val
	if val:
	clean = val.encode('utf-8')
	return clean

	def getLng(val):
	if isinstance(val, dict):
	return val['coordinates'][0]

	def getLat(val):
	if isinstance(val, dict):
	return val['coordinates'][1]

	def getPlace(val):
	if isinstance(val, dict):
	return val['full_name'].encode('utf-8')

	# == OAuth Authentication ==
	# The consumer keys can be found on your application's Details
	consumer_key=""
	consumer_secret=""

	# Create an access token under the the "Your access token" section
	access_token=""
	access_token_secret=""
	auth = twitter.oauth.OAuth(access_token,
	access_token_secret,
	consumer_key,
	consumer_secret)
	twitter_api = twitter.Twitter(auth=auth)
	twitter_api.retry = True

	# Sample usage
	q = "starwars"
	results = twitter_search(twitter_api, q, max_results=2000)
	print len(results)
	# Show one sample search result by slicing the list...
	# print json.dumps(results[0], indent=1)

	csvfile = open(q + '_with_profiles.csv', 'w')
	csvwriter = csv.writer(csvfile)
	csvwriter.writerow(['created_at',
	'user-screen_name',
	'text',
	'coordinates lng',
	'coordinates lat',
	'place',
	'user-location',
	'user-geo_enabled',
	'user-lang',
	'user-time_zone',
	'user-statuses_count',
	'user-followers_count',
	'user-created_at'])
	for tweet in results:
	csvwriter.writerow([tweet['created_at'],
	getVal(tweet['user']['screen_name']),
	getVal(tweet['text']),
	getLng(tweet['coordinates']),
	getLat(tweet['coordinates']),
	getPlace(tweet['place']),
	getVal(tweet['user']['location']),
	getVal(tweet['user']['geo_enabled']),
	getVal(tweet['user']['lang']),
	getVal(tweet['user']['time_zone']),
	getVal(tweet['user']['statuses_count']),
	getVal(tweet['user']['followers_count']),
	getVal(tweet['user']['created_at'])
	])
	print "done"