-
-
Save amankharwal/a7b3097e711e703abd90a2bc68e91046 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Lower Casing | |
zomato["reviews_list"] = zomato["reviews_list"].str.lower() | |
## Removal of Puctuations | |
import string | |
PUNCT_TO_REMOVE = string.punctuation | |
def remove_punctuation(text): | |
"""custom function to remove the punctuation""" | |
return text.translate(str.maketrans('', '', PUNCT_TO_REMOVE)) | |
zomato["reviews_list"] = zomato["reviews_list"].apply(lambda text: remove_punctuation(text)) | |
## Removal of Stopwords | |
from nltk.corpus import stopwords | |
STOPWORDS = set(stopwords.words('english')) | |
def remove_stopwords(text): | |
"""custom function to remove the stopwords""" | |
return " ".join([word for word in str(text).split() if word not in STOPWORDS]) | |
zomato["reviews_list"] = zomato["reviews_list"].apply(lambda text: remove_stopwords(text)) | |
## Removal of URLS | |
def remove_urls(text): | |
url_pattern = re.compile(r'https?://\S+|www\.\S+') | |
return url_pattern.sub(r'', text) | |
zomato["reviews_list"] = zomato["reviews_list"].apply(lambda text: remove_urls(text)) | |
zomato[['reviews_list', 'cuisines']].sample(5) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment