pragatibaheti/feature.py

## feature.py
from nltk.tokenize import word_tokenize
# create bags of words
all_words = []
for message in processed:
        words = word_tokenize(message)
        for w in words:
            all_words.append(w)
#FreqDist : The FreqDist class is used to encode “frequency distributions”, which count the number of times word occurs.
all_words = nltk.FreqDist(all_words)
# use the 1500 most common words as features
word_features = list(all_words.keys())[:1500]
	from nltk.tokenize import word_tokenize
	# create bags of words
	all_words = []
	for message in processed:
	words = word_tokenize(message)
	for w in words:
	all_words.append(w)
	#FreqDist : The FreqDist class is used to encode “frequency distributions”, which count the number of times word occurs.
	all_words = nltk.FreqDist(all_words)
	# use the 1500 most common words as features
	word_features = list(all_words.keys())[:1500]