s-fujimoto/detect_sentiment_from_twitter.py

## detect_sentiment_from_twitter.py
#!/usr/bin/env python

import twitter
import boto3
import os

keyword = '#reinvent'
lang = 'en'
region = 'us-east-1'
size = 100 * 100

def get_tweet_texts():
    api = twitter.Api(consumer_key=os.environ['consumer_key'],
                      consumer_secret=os.environ['consumer_secret'],
                      access_token_key=os.environ['access_token_key'],
                      access_token_secret=os.environ['access_token_secret'],
                      sleep_on_rate_limit=True)

    maxid = None
    corpus = []
    for i in range(int(size/100)):
        results = api.GetSearch(term=keyword,result_type='recent',count=100,max_id=maxid,lang=lang)
        maxid = min([result.id for result in results]) - 1
        corpus.extend(results)

    return corpus


def detect_sentiment(corpus):
    result = {
        'Mixed': 0,
        'Negative': 0,
        'Neutral': 0,
        'Positive': 0,
        'MIXED': 0,
        'NEGATIVE': 0,
        'NEUTRAL': 0,
        'POSITIVE': 0,
        'MOST_NEGATIVE': {'score':0},
        'MOST_POSITIVE': {'score':0},
    }
    comprehend = boto3.client('comprehend', region_name=region)
    batch_size = 25
    for tweets in [corpus[i:i+batch_size] for i in range(0, len(corpus), batch_size)]:
        sentiment_results = comprehend.batch_detect_sentiment(
            TextList=[tweet.text for tweet in tweets],
            LanguageCode=lang
        )
        for sentiment in sentiment_results['ResultList']:
            result[sentiment['Sentiment']] += 1
            if sentiment['Sentiment'] == 'NEGATIVE' and sentiment['SentimentScore']['Negative'] > result['MOST_NEGATIVE']['score']:
                result['MOST_NEGATIVE'] = {'score': sentiment['SentimentScore']['Negative'], 'tweet': tweets[sentiment['Index']]}
            elif sentiment['Sentiment'] == 'POSITIVE' and sentiment['SentimentScore']['Positive'] > result['MOST_POSITIVE']['score']:
                result['MOST_POSITIVE'] = {'score': sentiment['SentimentScore']['Positive'], 'tweet': tweets[sentiment['Index']]}

            for key, score in sentiment['SentimentScore'].items():
                result[key] += score

    return result


def stdout(result):
    sum_score = sum([value for key, value in result.items() if key in ('Mixed', 'Negative', 'Neutral', 'Positive')])
    print('Positive : {:4d}件 : {:.1f}%'.format(result['POSITIVE'], round(result['Positive']/sum_score*100), 1))
    print('Negative : {:4d}件 : {:.1f}%'.format(result['NEGATIVE'], round(result['Negative']/sum_score*100), 1))
    print('Mixed    : {:4d}件 : {:.1f}%'.format(result['MIXED'], round(result['Mixed']/sum_score*100), 1))
    print('Neutral  : {:4d}件 : {:.1f}%'.format(result['NEUTRAL'], round(result['Neutral']/sum_score*100), 1))
    if result['MOST_POSITIVE'].get('tweet'):
        print('Most positive tweet is "{}"'.format(result['MOST_POSITIVE']['tweet'].text))
    if result['MOST_NEGATIVE'].get('tweet'):
        print('Most negative tweet is "{}"'.format(result['MOST_NEGATIVE']['tweet'].text))


def main():
    corpus = get_tweet_texts()
    result = detect_sentiment(corpus)
    stdout(result)


if __name__ == '__main__':
    main()
	#!/usr/bin/env python

	import twitter
	import boto3
	import os

	keyword = '#reinvent'
	lang = 'en'
	region = 'us-east-1'
	size = 100 * 100

	def get_tweet_texts():
	api = twitter.Api(consumer_key=os.environ['consumer_key'],
	consumer_secret=os.environ['consumer_secret'],
	access_token_key=os.environ['access_token_key'],
	access_token_secret=os.environ['access_token_secret'],
	sleep_on_rate_limit=True)

	maxid = None
	corpus = []
	for i in range(int(size/100)):
	results = api.GetSearch(term=keyword,result_type='recent',count=100,max_id=maxid,lang=lang)
	maxid = min([result.id for result in results]) - 1
	corpus.extend(results)

	return corpus


	def detect_sentiment(corpus):
	result = {
	'Mixed': 0,
	'Negative': 0,
	'Neutral': 0,
	'Positive': 0,
	'MIXED': 0,
	'NEGATIVE': 0,
	'NEUTRAL': 0,
	'POSITIVE': 0,
	'MOST_NEGATIVE': {'score':0},
	'MOST_POSITIVE': {'score':0},
	}
	comprehend = boto3.client('comprehend', region_name=region)
	batch_size = 25
	for tweets in [corpus[i:i+batch_size] for i in range(0, len(corpus), batch_size)]:
	sentiment_results = comprehend.batch_detect_sentiment(
	TextList=[tweet.text for tweet in tweets],
	LanguageCode=lang
	)
	for sentiment in sentiment_results['ResultList']:
	result[sentiment['Sentiment']] += 1
	if sentiment['Sentiment'] == 'NEGATIVE' and sentiment['SentimentScore']['Negative'] > result['MOST_NEGATIVE']['score']:
	result['MOST_NEGATIVE'] = {'score': sentiment['SentimentScore']['Negative'], 'tweet': tweets[sentiment['Index']]}
	elif sentiment['Sentiment'] == 'POSITIVE' and sentiment['SentimentScore']['Positive'] > result['MOST_POSITIVE']['score']:
	result['MOST_POSITIVE'] = {'score': sentiment['SentimentScore']['Positive'], 'tweet': tweets[sentiment['Index']]}

	for key, score in sentiment['SentimentScore'].items():
	result[key] += score

	return result


	def stdout(result):
	sum_score = sum([value for key, value in result.items() if key in ('Mixed', 'Negative', 'Neutral', 'Positive')])
	print('Positive : {:4d}件 : {:.1f}%'.format(result['POSITIVE'], round(result['Positive']/sum_score*100), 1))
	print('Negative : {:4d}件 : {:.1f}%'.format(result['NEGATIVE'], round(result['Negative']/sum_score*100), 1))
	print('Mixed : {:4d}件 : {:.1f}%'.format(result['MIXED'], round(result['Mixed']/sum_score*100), 1))
	print('Neutral : {:4d}件 : {:.1f}%'.format(result['NEUTRAL'], round(result['Neutral']/sum_score*100), 1))
	if result['MOST_POSITIVE'].get('tweet'):
	print('Most positive tweet is "{}"'.format(result['MOST_POSITIVE']['tweet'].text))
	if result['MOST_NEGATIVE'].get('tweet'):
	print('Most negative tweet is "{}"'.format(result['MOST_NEGATIVE']['tweet'].text))


	def main():
	corpus = get_tweet_texts()
	result = detect_sentiment(corpus)
	stdout(result)


	if __name__ == '__main__':
	main()