Skip to content

Instantly share code, notes, and snippets.

@s-fujimoto
Created December 2, 2017 14:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save s-fujimoto/c8f84abdedd78745d350aa38536f0b02 to your computer and use it in GitHub Desktop.
Save s-fujimoto/c8f84abdedd78745d350aa38536f0b02 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import twitter
import boto3
import os
keyword = '#reinvent'
lang = 'en'
region = 'us-east-1'
size = 100 * 100
def get_tweet_texts():
api = twitter.Api(consumer_key=os.environ['consumer_key'],
consumer_secret=os.environ['consumer_secret'],
access_token_key=os.environ['access_token_key'],
access_token_secret=os.environ['access_token_secret'],
sleep_on_rate_limit=True)
maxid = None
corpus = []
for i in range(int(size/100)):
results = api.GetSearch(term=keyword,result_type='recent',count=100,max_id=maxid,lang=lang)
maxid = min([result.id for result in results]) - 1
corpus.extend(results)
return corpus
def detect_sentiment(corpus):
result = {
'Mixed': 0,
'Negative': 0,
'Neutral': 0,
'Positive': 0,
'MIXED': 0,
'NEGATIVE': 0,
'NEUTRAL': 0,
'POSITIVE': 0,
'MOST_NEGATIVE': {'score':0},
'MOST_POSITIVE': {'score':0},
}
comprehend = boto3.client('comprehend', region_name=region)
batch_size = 25
for tweets in [corpus[i:i+batch_size] for i in range(0, len(corpus), batch_size)]:
sentiment_results = comprehend.batch_detect_sentiment(
TextList=[tweet.text for tweet in tweets],
LanguageCode=lang
)
for sentiment in sentiment_results['ResultList']:
result[sentiment['Sentiment']] += 1
if sentiment['Sentiment'] == 'NEGATIVE' and sentiment['SentimentScore']['Negative'] > result['MOST_NEGATIVE']['score']:
result['MOST_NEGATIVE'] = {'score': sentiment['SentimentScore']['Negative'], 'tweet': tweets[sentiment['Index']]}
elif sentiment['Sentiment'] == 'POSITIVE' and sentiment['SentimentScore']['Positive'] > result['MOST_POSITIVE']['score']:
result['MOST_POSITIVE'] = {'score': sentiment['SentimentScore']['Positive'], 'tweet': tweets[sentiment['Index']]}
for key, score in sentiment['SentimentScore'].items():
result[key] += score
return result
def stdout(result):
sum_score = sum([value for key, value in result.items() if key in ('Mixed', 'Negative', 'Neutral', 'Positive')])
print('Positive : {:4d}件 : {:.1f}%'.format(result['POSITIVE'], round(result['Positive']/sum_score*100), 1))
print('Negative : {:4d}件 : {:.1f}%'.format(result['NEGATIVE'], round(result['Negative']/sum_score*100), 1))
print('Mixed : {:4d}件 : {:.1f}%'.format(result['MIXED'], round(result['Mixed']/sum_score*100), 1))
print('Neutral : {:4d}件 : {:.1f}%'.format(result['NEUTRAL'], round(result['Neutral']/sum_score*100), 1))
if result['MOST_POSITIVE'].get('tweet'):
print('Most positive tweet is "{}"'.format(result['MOST_POSITIVE']['tweet'].text))
if result['MOST_NEGATIVE'].get('tweet'):
print('Most negative tweet is "{}"'.format(result['MOST_NEGATIVE']['tweet'].text))
def main():
corpus = get_tweet_texts()
result = detect_sentiment(corpus)
stdout(result)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment