Skip to content

Instantly share code, notes, and snippets.

Avatar

Lianne & Justin @ Just into Data liannewriting

View GitHub Profile
@liannewriting
liannewriting / wordcloud.py
Created Sep 6, 2020
Twitter sentiment analysis python
View wordcloud.py
# stopwords do not appear in the wordcloud.
stopwords = STOPWORDS.copy()
stopwords.update(['http', 'https', 'co', 'starbuck', 'starbucks']) # add some additional stopwords.
# make all the text lowercase and combine everything together.
all_txt = [txt.lower() for txt in df_starbucks['full_text'].to_list()]
all_txt = ' '.join(all_txt)
# create and plot the wordcloud.
wordcloud = WordCloud(stopwords=stopwords, background_color="white", width=800, height=600).generate(all_txt)
@liannewriting
liannewriting / import_wordcloud.py
Created Sep 6, 2020
Twitter sentiment analysis python
View import_wordcloud.py
from wordcloud import WordCloud, STOPWORDS
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
@liannewriting
liannewriting / plot_sentiment_time.py
Created Sep 6, 2020
Twitter sentiment analysis python
View plot_sentiment_time.py
import plotly.express as px
fig = px.line(df_sentiment_by_time, x="created_at_hour", y="cnt", color="predicted_sentiment")
fig.show()
@liannewriting
liannewriting / sentiment_by_hour.py
Created Sep 6, 2020
Twitter sentiment analysis python
View sentiment_by_hour.py
df_starbucks['created_at_hour'] = df_starbucks['created_at'].dt.round('H')
aggregation = {'cnt': ('id', 'count')}
df_sentiment_by_time = df_starbucks.groupby(['created_at_hour', 'predicted_sentiment']).agg(**aggregation).reset_index()
df_sentiment_by_time
@liannewriting
liannewriting / sample_classified_tweets.py
Created Sep 6, 2020
Twitter sentiment analysis python
View sample_classified_tweets.py
df_starbucks[['full_text', 'textblob_sentiment', 'predicted_sentiment']].sample(n=10)
@liannewriting
liannewriting / optimal_thresholds_classify.py
Created Sep 6, 2020
Twitter sentiment analysis python
View optimal_thresholds_classify.py
df_starbucks['predicted_sentiment'] = pd.cut(df_starbucks['textblob_sentiment'],
bins=[-2, -0.05, 0.2857, 2],
labels=['negative', 'neutral', 'positive'],
right=False)
@liannewriting
liannewriting / accuracy_threshold_pos.py
Created Sep 6, 2020
Twitter sentiment analysis python
View accuracy_threshold_pos.py
for i, t in enumerate(pos_thresholds):
pos_pred = df_labelled['textblob_sentiment'] > t
acc = accuracy_score(df_labelled['is_pos'], pos_pred)
print('threshold: {}, accuracy: {}'.format(t, acc))
@liannewriting
liannewriting / plot_roc_curve_pos.py
Created Sep 6, 2020
Twitter sentiment analysis python
View plot_roc_curve_pos.py
pos_fpr, pos_tpr, pos_thresholds = roc_curve(df_labelled['is_pos'], df_labelled['textblob_sentiment'], pos_label=1)
pos_roc_auc = auc(pos_fpr, pos_tpr)
plot_roc_curve(pos_fpr, pos_tpr, pos_roc_auc)
@liannewriting
liannewriting / accuracy_thresholds_neg.py
Created Sep 6, 2020
Twitter sentiment analysis python
View accuracy_thresholds_neg.py
for i, t in enumerate(neg_thresholds):
neg_pred = -df_labelled['textblob_sentiment'] > t
acc = accuracy_score(df_labelled['is_neg'], neg_pred)
print('threshold: {}, accuracy: {}'.format(-t, acc))
@liannewriting
liannewriting / plot_roc_curve_neg.py
Created Sep 6, 2020
Twitter sentiment analysis python
View plot_roc_curve_neg.py
neg_fpr, neg_tpr, neg_thresholds = roc_curve(df_labelled['is_neg'], -df_labelled['textblob_sentiment'], pos_label=1)
neg_roc_auc = auc(neg_fpr, neg_tpr)
plot_roc_curve(neg_fpr, neg_tpr, neg_roc_auc)
You can’t perform that action at this time.