Skip to content

Instantly share code, notes, and snippets.

@BenjaminFraser
Last active January 3, 2020 17:43
Show Gist options
  • Save BenjaminFraser/fc4dd29549a75c93336822060d012ec8 to your computer and use it in GitHub Desktop.
Save BenjaminFraser/fc4dd29549a75c93336822060d012ec8 to your computer and use it in GitHub Desktop.
An example of using NLTK VADAR sentiment analyser to perform sentiment analysis on a Pandas dataframe.
from nltk.sentiment.vader import SentimentIntensityAnalyzer
sent_i = SentimentIntensityAnalyzer()
def vadar_sentiment(text):
""" Calculate and return the nltk vadar (lexicon method) sentiment """
return sent_i.polarity_scores(text)['compound']
# create new column for vadar compound sentiment score
news_sentiments['vadar compound'] = news_sentiments['title'].apply(vadar_sentiment)
def categorise_sentiment(sentiment, neg_threshold=-0.05, pos_threshold=0.05):
""" categorise the sentiment value as positive (1), negative (-1)
or neutral (0) based on given thresholds """
if sentiment < neg_threshold:
label = 'negative'
elif sentiment > pos_threshold:
label = 'positive'
else:
label = 'neutral'
return label
# new col with vadar sentiment label based on vadar compound score
news_sentiments['vadar sentiment'] = news_sentiments['vadar compound'].apply(categorise_sentiment)
# plot distribution of article sentiments for each newspaper
plt.figure(figsize=(8,8))
sns.countplot(x='newspaper', data=news_sentiments, hue='vadar sentiment')
plt.title('VADAR Sentiment Analysis', weight='bold')
plt.ylabel('Number of Articles', fontsize=10, weight='bold')
plt.xlabel('VADAR Sentiment', fontsize=10, weight='bold')
plt.legend(bbox_to_anchor=(1.02, 1.0), loc=2, borderaxespad=0.0)
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment