parulnith/Listing most common words in text.py Secret

## Listing most common words in text.py
#source of code : https://medium.com/@cristhianboujon/how-to-list-the-most-common-words-from-text-corpus-using-scikit-learn-dad4d0cab41d

from sklearn.feature_extraction.text import CountVectorizer

def get_top_n_gram(corpus,ngram_range,n=None):
    vec = CountVectorizer(ngram_range=ngram_range,stop_words = 'english').fit(corpus)
    bag_of_words = vec.transform(corpus)
    sum_words = bag_of_words.sum(axis=0)
    words_freq = [(word, sum_words[0, idx]) for word, idx in vec.vocabulary_.items()]
    words_freq =sorted(words_freq, key = lambda x: x[1], reverse=True)
    return words_freq[:n]

# Creating individual dataframes of different emotions
sadness = train[train['label']==0]['text']
joy = train[train['label']==1]['text']
love = train[train['label']==2]['text']
anger = train[train['label']==3]['text']
fear = train[train['label']==4]['text']
surprise = train[train['label']==5]['text']

# Calculating unigrams, bigrams and trigrams for different emotions.
sadness_unigrams = get_top_n_words(sadness.values,15)[2:] # leaving the top 2 as they are feel and feeling
sad_bigrams = get_top_n_gram(sadness.values,(2,2),7)[2:]
sad_trigrams = get_top_n_gram(sadness.values,(3,3),7)[2:]

# Repeat the above code for other emotions
	#source of code : https://medium.com/@cristhianboujon/how-to-list-the-most-common-words-from-text-corpus-using-scikit-learn-dad4d0cab41d

	from sklearn.feature_extraction.text import CountVectorizer

	def get_top_n_gram(corpus,ngram_range,n=None):
	vec = CountVectorizer(ngram_range=ngram_range,stop_words = 'english').fit(corpus)
	bag_of_words = vec.transform(corpus)
	sum_words = bag_of_words.sum(axis=0)
	words_freq = [(word, sum_words[0, idx]) for word, idx in vec.vocabulary_.items()]
	words_freq =sorted(words_freq, key = lambda x: x[1], reverse=True)
	return words_freq[:n]

	# Creating individual dataframes of different emotions
	sadness = train[train['label']==0]['text']
	joy = train[train['label']==1]['text']
	love = train[train['label']==2]['text']
	anger = train[train['label']==3]['text']
	fear = train[train['label']==4]['text']
	surprise = train[train['label']==5]['text']

	# Calculating unigrams, bigrams and trigrams for different emotions.
	sadness_unigrams = get_top_n_words(sadness.values,15)[2:] # leaving the top 2 as they are feel and feeling
	sad_bigrams = get_top_n_gram(sadness.values,(2,2),7)[2:]
	sad_trigrams = get_top_n_gram(sadness.values,(3,3),7)[2:]

	# Repeat the above code for other emotions