Skip to content

Instantly share code, notes, and snippets.

@amankharwal
Created November 18, 2020 06:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save amankharwal/8633fe4268c38bf7c308e2c9dfdbe9c2 to your computer and use it in GitHub Desktop.
Save amankharwal/8633fe4268c38bf7c308e2c9dfdbe9c2 to your computer and use it in GitHub Desktop.
# RESTAURANT NAMES:
restaurant_names = list(zomato['name'].unique())
def get_top_words(column, top_nu_of_words, nu_of_word):
vec = CountVectorizer(ngram_range= nu_of_word, stop_words='english')
bag_of_words = vec.fit_transform(column)
sum_words = bag_of_words.sum(axis=0)
words_freq = [(word, sum_words[0, idx]) for word, idx in vec.vocabulary_.items()]
words_freq =sorted(words_freq, key = lambda x: x[1], reverse=True)
return words_freq[:top_nu_of_words]
zomato=zomato.drop(['address','rest_type', 'type', 'menu_item', 'votes'],axis=1)
import pandas
# Randomly sample 60% of your dataframe
df_percent = zomato.sample(frac=0.5)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment