Skip to content

Instantly share code, notes, and snippets.

@revox
Last active December 3, 2020 14:22
Show Gist options
  • Save revox/11c6160b2a5fc9af3463687af713bd11 to your computer and use it in GitHub Desktop.
Save revox/11c6160b2a5fc9af3463687af713bd11 to your computer and use it in GitHub Desktop.
Python code to produce a TAGS style output from Twitter Streaming API
import twitter, json, csv
# == OAuth Authentication ==
CONSUMER_KEY = ''
CONSUMER_SECRET = ''
OAUTH_TOKEN = ''
OAUTH_TOKEN_SECRET = ''
auth = twitter.oauth.OAuth(OAUTH_TOKEN,
OAUTH_TOKEN_SECRET,
CONSUMER_KEY,
CONSUMER_SECRET)
twitter_api = twitter.Twitter(auth=auth, retry=True)
csvfile = open('brexit.csv', 'w')
csvwriter = csv.writer(csvfile,delimiter ='|')
q = "#brexit"
# clean up our data so we can write unicode to CSV
def clean(val):
clean = ""
if val:
val = val.replace('|', ' ')
val = val.replace('\n', ' ')
val = val.replace('\r', ' ')
clean = val
return clean
print ('Filtering the public timeline for keyword="%s"' % (q))
twitter_stream = twitter.TwitterStream(auth=twitter_api.auth)
stream = twitter_stream.statuses.filter(track=q)
for tweet in stream:
# print (json.dumps(tweet, indent=1)) # for debugging purposes
if tweet['truncated']:
tweet_text = tweet['extended_tweet']['full_text']
else:
tweet_text = tweet['text']
if 'retweeted_status' in tweet:
rt_prefix = 'RT @' + tweet['retweeted_status']['user']['screen_name'] + ': '
if tweet['retweeted_status']['truncated']:
tweet_text = tweet['retweeted_status']['extended_tweet']['full_text']
else:
tweet_text = tweet['retweeted_status']['text']
tweet_text = rt_prefix + tweet_text
if ('quoted_status' in tweet):
quote_suffix = tweet['quoted_status_permalink']['url']
tweet_text = tweet_text + ' ' + quote_suffix
if ('quoted_status' in tweet) and ('retweeted_status' not in tweet) :
quote_suffix = tweet['quoted_status_permalink']['url']
tweet_text = tweet_text + ' ' + quote_suffix
geo_coordinates = ""
if tweet['coordinates']:
geo_coordinates = tweet['coordinates']['coordinates'][1] + ',' + tweet['coordinates']['coordinates'][0]
csvwriter.writerow([tweet['id_str'],
tweet['created_at'],
clean(tweet['user']['screen_name']),
clean(tweet_text),
tweet['user']['created_at'],
geo_coordinates,
tweet['user']['lang'],
tweet['in_reply_to_user_id_str'],
tweet['in_reply_to_screen_name'],
tweet['user']['id_str'],
tweet['in_reply_to_status_id_str'],
clean(tweet['source']),
tweet['user']['profile_image_url_https'],
tweet['user']['followers_count'],
tweet['user']['friends_count'],
tweet['user']['statuses_count'],
clean(tweet['user']['location']),
tweet['entities']]
)
print('text: ', tweet_text) # just so we can see it runnning
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment