Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
#Tokenisation, Lemmatising, Stemming, Corpus
import nltk
nltk.download()
#eg1
from nltk.tokenize import sent_tokenize, word_tokenize
sentence1 = "Ravi and Raj went for a walk"
print(word_tokenize(sentence1))
#eg2
print(sent_tokenize(sentence1))
#eg3
from nltk.corpus import stopwords
sentence2 = "Ravi and Raj went for a ride. Ravi and Raj went for a small ride"
stop = set(stopwords.words('english'))
Word = word_tokenize(sentence2)
Filtered_words = []
for i in Word:
if i not in stop:
Filtered_words.append(i)
print(Filtered_words)
#eg4
from nltk.stem import PorterStemmer
Words = ["cheer","cheering","cheerful"]
PS = PorterStemmer()
for word in Words:
print(PS.stem(word))
#eg5
from nltk.stem import WordNetLemmatizer
Lemmatise = WordNetLemmatizer()
print("scare :", Lemmatise.lemmatize("scare"))
print("hate :", Lemmatise.lemmatize("hate"))
# a means adjective in pos.
print("worse :", lemmatise.lemmatize("worse", pos ="a"))
#eg6
import nltk.corpus
dir(nltk.corpus)
print(dir(nltk.corpus))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment