Skip to content

Instantly share code, notes, and snippets.

Last active December 15, 2020 11:25
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
What would you like to do?
#Tokenisation, Lemmatising, Stemming, Corpus
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
sentence1 = "Ravi and Raj went for a walk"
from nltk.corpus import stopwords
sentence2 = "Ravi and Raj went for a ride. Ravi and Raj went for a small ride"
stop = set(stopwords.words('english'))
Word = word_tokenize(sentence2)
Filtered_words = []
for i in Word:
if i not in stop:
from nltk.stem import PorterStemmer
Words = ["cheer","cheering","cheerful"]
PS = PorterStemmer()
for word in Words:
from nltk.stem import WordNetLemmatizer
Lemmatise = WordNetLemmatizer()
print("scare :", Lemmatise.lemmatize("scare"))
print("hate :", Lemmatise.lemmatize("hate"))
# a means adjective in pos.
print("worse :", lemmatise.lemmatize("worse", pos ="a"))
import nltk.corpus
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment