Himanshu Lohiya himlohiya

## most_negative_positive.py
pos_idx = df[(df.news_category=='technology') & (df.sentiment_score == 6)].index[0]
neg_idx = df[(df.news_category=='technology') & (df.sentiment_score == -15)].index[0]

print('Most Negative Tech News Article:', news_df.iloc[neg_idx][['news_article']][0])
print()
print('Most Positive Tech News Article:', news_df.iloc[pos_idx][['news_article']][0])

## sentiment_category.py
fc = sns.factorplot(x="news_category", hue="sentiment_category",
                    data=df, kind="count",
                    palette={"negative": "#FE2020",
                             "positive": "#BADD07",
                             "neutral": "#68BFF5"})

## sentiment_visualisations.py
f, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 4))
sp = sns.stripplot(x='news_category', y="sentiment_score",
                   hue='news_category', data=df, ax=ax1)
bp = sns.boxplot(x='news_category', y="sentiment_score",
                 hue='news_category', data=df, palette="Set2", ax=ax2)
t = f.suptitle('Visualizing News Sentiment', fontsize=14

## sentiment_analysis_afinn.py
# initialize afinn sentiment analyzer
from afinn import Afinn
af = Afinn()

# compute sentiment scores (polarity) and labels
sentiment_scores = [af.score(article) for article in corpus]
sentiment_category = ['positive' if score > 0
                          else 'negative' if score < 0
                              else 'neutral'
                                  for score in sentiment_scores]

## top_ner.py
named_entities = []
for sentence in corpus:
    temp_entity_name = ''
    temp_named_entity = None
    sentence = nlp(sentence)
    for word in sentence:
        term = word.text
        tag = word.ent_type_
        if tag:
            temp_entity_name = ' '.join([temp_entity_name, term]).strip()

## ner.py
ner(sentence) {
  sentence_nlp = nlp(sentence)
  # print named entities in article
  print([(word, word.ent_type_) for word in sentence_nlp if word.ent_type_])

  # visualize named entities
  displacy.render(sentence_nlp, style='ent', jupyter=True)
}

## pos_tagging.py
pos_tagging_spacy(sentence) {
  sentence_nlp = nlp(sentence)

  # POS tagging with Spacy
  spacy_pos_tagged = [(word, word.tag_, word.pos_) for word in sentence_nlp]
  pd.DataFrame(spacy_pos_tagged, columns=['Word', 'POS tag', 'Tag type'])
}

pos_tagging_nltk(sentence) {
  # POS tagging with nltk

## normalize_corpus.py
def normalize_corpus(corpus, html_stripping=True, contraction_expansion=True,
                     accented_char_removal=True, text_lower_case=True,
                     text_lemmatization=True, special_char_removal=True,
                     stopword_removal=True, remove_digits=True):

    normalized_corpus = []
    # normalize each document in the corpus
    for doc in corpus:
        # strip HTML
        if html_stripping:

## remove_stopwords.py
def remove_stopwords(text, is_lower_case=False):
    tokens = tokenizer.tokenize(text)
    tokens = [token.strip() for token in tokens]
    if is_lower_case:
        filtered_tokens = [token for token in tokens if token not in stopword_list]
    else:
        filtered_tokens = [token for token in tokens if token.lower() not in stopword_list]
    filtered_text = ' '.join(filtered_tokens)
    return filtered_text

## stemmer_lemmatize_text.py
def simple_stemmer(text):
    ps = nltk.porter.PorterStemmer()
    text = ' '.join([ps.stem(word) for word in text.split()])
    return text

def lemmatize_text(text):
    text = nlp(text)
    text = ' '.join([word.lemma_ if word.lemma_ != '-PRON-' else word.text for word in text])
    return text
	pos_idx = df[(df.news_category=='technology') & (df.sentiment_score == 6)].index[0]
	neg_idx = df[(df.news_category=='technology') & (df.sentiment_score == -15)].index[0]

	print('Most Negative Tech News Article:', news_df.iloc[neg_idx][['news_article']][0])
	print()
	print('Most Positive Tech News Article:', news_df.iloc[pos_idx][['news_article']][0])
	fc = sns.factorplot(x="news_category", hue="sentiment_category",
	data=df, kind="count",
	palette={"negative": "#FE2020",
	"positive": "#BADD07",
	"neutral": "#68BFF5"})
	f, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 4))
	sp = sns.stripplot(x='news_category', y="sentiment_score",
	hue='news_category', data=df, ax=ax1)
	bp = sns.boxplot(x='news_category', y="sentiment_score",
	hue='news_category', data=df, palette="Set2", ax=ax2)
	t = f.suptitle('Visualizing News Sentiment', fontsize=14
	# initialize afinn sentiment analyzer
	from afinn import Afinn
	af = Afinn()

	# compute sentiment scores (polarity) and labels
	sentiment_scores = [af.score(article) for article in corpus]
	sentiment_category = ['positive' if score > 0
	else 'negative' if score < 0
	else 'neutral'
	for score in sentiment_scores]
	named_entities = []
	for sentence in corpus:
	temp_entity_name = ''
	temp_named_entity = None
	sentence = nlp(sentence)
	for word in sentence:
	term = word.text
	tag = word.ent_type_
	if tag:
	temp_entity_name = ' '.join([temp_entity_name, term]).strip()
	ner(sentence) {
	sentence_nlp = nlp(sentence)
	# print named entities in article
	print([(word, word.ent_type_) for word in sentence_nlp if word.ent_type_])

	# visualize named entities
	displacy.render(sentence_nlp, style='ent', jupyter=True)
	}
	pos_tagging_spacy(sentence) {
	sentence_nlp = nlp(sentence)

	# POS tagging with Spacy
	spacy_pos_tagged = [(word, word.tag_, word.pos_) for word in sentence_nlp]
	pd.DataFrame(spacy_pos_tagged, columns=['Word', 'POS tag', 'Tag type'])
	}

	pos_tagging_nltk(sentence) {
	# POS tagging with nltk
	def normalize_corpus(corpus, html_stripping=True, contraction_expansion=True,
	accented_char_removal=True, text_lower_case=True,
	text_lemmatization=True, special_char_removal=True,
	stopword_removal=True, remove_digits=True):

	normalized_corpus = []
	# normalize each document in the corpus
	for doc in corpus:
	# strip HTML
	if html_stripping:
	def remove_stopwords(text, is_lower_case=False):
	tokens = tokenizer.tokenize(text)
	tokens = [token.strip() for token in tokens]
	if is_lower_case:
	filtered_tokens = [token for token in tokens if token not in stopword_list]
	else:
	filtered_tokens = [token for token in tokens if token.lower() not in stopword_list]
	filtered_text = ' '.join(filtered_tokens)
	return filtered_text
	def simple_stemmer(text):
	ps = nltk.porter.PorterStemmer()
	text = ' '.join([ps.stem(word) for word in text.split()])
	return text

	def lemmatize_text(text):
	text = nlp(text)
	text = ' '.join([word.lemma_ if word.lemma_ != '-PRON-' else word.text for word in text])
	return text