Skip to content

Instantly share code, notes, and snippets.


amankharwal/ Secret

Created Nov 18, 2020
What would you like to do?
restaurant_names = list(zomato['name'].unique())
def get_top_words(column, top_nu_of_words, nu_of_word):
vec = CountVectorizer(ngram_range= nu_of_word, stop_words='english')
bag_of_words = vec.fit_transform(column)
sum_words = bag_of_words.sum(axis=0)
words_freq = [(word, sum_words[0, idx]) for word, idx in vec.vocabulary_.items()]
words_freq =sorted(words_freq, key = lambda x: x[1], reverse=True)
return words_freq[:top_nu_of_words]
zomato=zomato.drop(['address','rest_type', 'type', 'menu_item', 'votes'],axis=1)
import pandas
# Randomly sample 60% of your dataframe
df_percent = zomato.sample(frac=0.5)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment