This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from textblob import TextBlob | |
# compute sentiment scores (polarity) and labels | |
sentiment_scores_tb = [round(TextBlob(article).sentiment.polarity, 3) for article in news_df['clean_text']] | |
sentiment_category_tb = ['positive' if score > 0 | |
else 'negative' if score < 0 | |
else 'neutral' | |
for score in sentiment_scores_tb] | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pos_idx = df[(df.news_category=='technology') & (df.sentiment_score == 6)].index[0] | |
neg_idx = df[(df.news_category=='technology') & (df.sentiment_score == -15)].index[0] | |
print('Most Negative Tech News Article:', news_df.iloc[neg_idx][['news_article']][0]) | |
print() | |
print('Most Positive Tech News Article:', news_df.iloc[pos_idx][['news_article']][0]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fc = sns.factorplot(x="news_category", hue="sentiment_category", | |
data=df, kind="count", | |
palette={"negative": "#FE2020", | |
"positive": "#BADD07", | |
"neutral": "#68BFF5"}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
f, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 4)) | |
sp = sns.stripplot(x='news_category', y="sentiment_score", | |
hue='news_category', data=df, ax=ax1) | |
bp = sns.boxplot(x='news_category', y="sentiment_score", | |
hue='news_category', data=df, palette="Set2", ax=ax2) | |
t = f.suptitle('Visualizing News Sentiment', fontsize=14 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# initialize afinn sentiment analyzer | |
from afinn import Afinn | |
af = Afinn() | |
# compute sentiment scores (polarity) and labels | |
sentiment_scores = [af.score(article) for article in corpus] | |
sentiment_category = ['positive' if score > 0 | |
else 'negative' if score < 0 | |
else 'neutral' | |
for score in sentiment_scores] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
named_entities = [] | |
for sentence in corpus: | |
temp_entity_name = '' | |
temp_named_entity = None | |
sentence = nlp(sentence) | |
for word in sentence: | |
term = word.text | |
tag = word.ent_type_ | |
if tag: | |
temp_entity_name = ' '.join([temp_entity_name, term]).strip() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ner(sentence) { | |
sentence_nlp = nlp(sentence) | |
# print named entities in article | |
print([(word, word.ent_type_) for word in sentence_nlp if word.ent_type_]) | |
# visualize named entities | |
displacy.render(sentence_nlp, style='ent', jupyter=True) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pos_tagging_spacy(sentence) { | |
sentence_nlp = nlp(sentence) | |
# POS tagging with Spacy | |
spacy_pos_tagged = [(word, word.tag_, word.pos_) for word in sentence_nlp] | |
pd.DataFrame(spacy_pos_tagged, columns=['Word', 'POS tag', 'Tag type']) | |
} | |
pos_tagging_nltk(sentence) { | |
# POS tagging with nltk |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def remove_stopwords(text, is_lower_case=False): | |
tokens = tokenizer.tokenize(text) | |
tokens = [token.strip() for token in tokens] | |
if is_lower_case: | |
filtered_tokens = [token for token in tokens if token not in stopword_list] | |
else: | |
filtered_tokens = [token for token in tokens if token.lower() not in stopword_list] | |
filtered_text = ' '.join(filtered_tokens) | |
return filtered_text |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def simple_stemmer(text): | |
ps = nltk.porter.PorterStemmer() | |
text = ' '.join([ps.stem(word) for word in text.split()]) | |
return text | |
def lemmatize_text(text): | |
text = nlp(text) | |
text = ' '.join([word.lemma_ if word.lemma_ != '-PRON-' else word.text for word in text]) | |
return text |