Skip to content

Instantly share code, notes, and snippets.

@amankharwal
Created November 18, 2020 06:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save amankharwal/a7b3097e711e703abd90a2bc68e91046 to your computer and use it in GitHub Desktop.
Save amankharwal/a7b3097e711e703abd90a2bc68e91046 to your computer and use it in GitHub Desktop.
## Lower Casing
zomato["reviews_list"] = zomato["reviews_list"].str.lower()
## Removal of Puctuations
import string
PUNCT_TO_REMOVE = string.punctuation
def remove_punctuation(text):
"""custom function to remove the punctuation"""
return text.translate(str.maketrans('', '', PUNCT_TO_REMOVE))
zomato["reviews_list"] = zomato["reviews_list"].apply(lambda text: remove_punctuation(text))
## Removal of Stopwords
from nltk.corpus import stopwords
STOPWORDS = set(stopwords.words('english'))
def remove_stopwords(text):
"""custom function to remove the stopwords"""
return " ".join([word for word in str(text).split() if word not in STOPWORDS])
zomato["reviews_list"] = zomato["reviews_list"].apply(lambda text: remove_stopwords(text))
## Removal of URLS
def remove_urls(text):
url_pattern = re.compile(r'https?://\S+|www\.\S+')
return url_pattern.sub(r'', text)
zomato["reviews_list"] = zomato["reviews_list"].apply(lambda text: remove_urls(text))
zomato[['reviews_list', 'cuisines']].sample(5)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment