This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pos_tagging_spacy(sentence) { | |
sentence_nlp = nlp(sentence) | |
# POS tagging with Spacy | |
spacy_pos_tagged = [(word, word.tag_, word.pos_) for word in sentence_nlp] | |
pd.DataFrame(spacy_pos_tagged, columns=['Word', 'POS tag', 'Tag type']) | |
} | |
pos_tagging_nltk(sentence) { | |
# POS tagging with nltk |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ner(sentence) { | |
sentence_nlp = nlp(sentence) | |
# print named entities in article | |
print([(word, word.ent_type_) for word in sentence_nlp if word.ent_type_]) | |
# visualize named entities | |
displacy.render(sentence_nlp, style='ent', jupyter=True) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
named_entities = [] | |
for sentence in corpus: | |
temp_entity_name = '' | |
temp_named_entity = None | |
sentence = nlp(sentence) | |
for word in sentence: | |
term = word.text | |
tag = word.ent_type_ | |
if tag: | |
temp_entity_name = ' '.join([temp_entity_name, term]).strip() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# initialize afinn sentiment analyzer | |
from afinn import Afinn | |
af = Afinn() | |
# compute sentiment scores (polarity) and labels | |
sentiment_scores = [af.score(article) for article in corpus] | |
sentiment_category = ['positive' if score > 0 | |
else 'negative' if score < 0 | |
else 'neutral' | |
for score in sentiment_scores] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
f, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 4)) | |
sp = sns.stripplot(x='news_category', y="sentiment_score", | |
hue='news_category', data=df, ax=ax1) | |
bp = sns.boxplot(x='news_category', y="sentiment_score", | |
hue='news_category', data=df, palette="Set2", ax=ax2) | |
t = f.suptitle('Visualizing News Sentiment', fontsize=14 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
fc = sns.factorplot(x="news_category", hue="sentiment_category", | |
data=df, kind="count", | |
palette={"negative": "#FE2020", | |
"positive": "#BADD07", | |
"neutral": "#68BFF5"}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pos_idx = df[(df.news_category=='technology') & (df.sentiment_score == 6)].index[0] | |
neg_idx = df[(df.news_category=='technology') & (df.sentiment_score == -15)].index[0] | |
print('Most Negative Tech News Article:', news_df.iloc[neg_idx][['news_article']][0]) | |
print() | |
print('Most Positive Tech News Article:', news_df.iloc[pos_idx][['news_article']][0]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from textblob import TextBlob | |
# compute sentiment scores (polarity) and labels | |
sentiment_scores_tb = [round(TextBlob(article).sentiment.polarity, 3) for article in news_df['clean_text']] | |
sentiment_category_tb = ['positive' if score > 0 | |
else 'negative' if score < 0 | |
else 'neutral' | |
for score in sentiment_scores_tb] | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def normalize_corpus(corpus, html_stripping=True, contraction_expansion=True, | |
accented_char_removal=True, text_lower_case=True, | |
text_lemmatization=True, special_char_removal=True, | |
stopword_removal=True, remove_digits=True): | |
normalized_corpus = [] | |
# normalize each document in the corpus | |
for doc in corpus: | |
# strip HTML | |
if html_stripping: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# nat gw | |
resource "aws_eip" "nat" { | |
vpc = true | |
} | |
resource "aws_nat_gateway" "nat-gw" { | |
allocation_id = "${aws_eip.nat.id}" | |
subnet_id = "${aws_subnet.main-public-1.id}" | |
depends_on = ["aws_internet_gateway.main-gw"] | |
} |