Skip to content

Instantly share code, notes, and snippets.

@AyishaR
Created January 22, 2021 16:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save AyishaR/3d3fbfcadb28612af9f2be2f4b590a2f to your computer and use it in GitHub Desktop.
Save AyishaR/3d3fbfcadb28612af9f2be2f4b590a2f to your computer and use it in GitHub Desktop.
sno = nltk.stem.SnowballStemmer('english') # Initializing stemmer
spam = [] # All words in positive reviews
ham = [] # All words in negative reviews
all_sentences = [] # All cleaned sentences
for x in range(len(df['Message'].values)):
review = df['Message'].values[x]
rating = df['Category'].values[x]
cleaned_sentence = []
sentence = removeURL(review)
sentence = removeHTML(sentence)
sentence = onlyAlphabets(sentence)
sentence = sentence.lower()
sentence = removeRecurring(sentence)
for word in sentence.split():
#if word not in stop:
stemmed = sno.stem(word)
cleaned_sentence.append(stemmed)
if rating == 1 :
spam.append(stemmed)
else:
ham.append(stemmed)
all_sentences.append(' '.join(cleaned_sentence))
# add as column in dataframe
df['Cleaned'] = all_sentences
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment