Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save xeoncross/6efbd3c786b08f965e9e841153c1d285 to your computer and use it in GitHub Desktop.
Save xeoncross/6efbd3c786b08f965e9e841153c1d285 to your computer and use it in GitHub Desktop.
Example of stemming, lemmatisation and POS-tagging in NLTK
from nltk import pos_tag
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer, WordNetLemmatizer
stemmer = PorterStemmer()
lemmatiser = WordNetLemmatizer()
print("Stem %s: %s" % ("going", stemmer.stem("going")))
print("Stem %s: %s" % ("gone", stemmer.stem("gone")))
print("Stem %s: %s" % ("goes", stemmer.stem("goes")))
print("Stem %s: %s" % ("went", stemmer.stem("went")))
"""
Stem going: go
Stem gone: gone
Stem goes: goe
Stem went: went
"""
print("Without context")
print("Lemmatise %s: %s" % ("going", lemmatiser.lemmatize("going")))
print("Lemmatise %s: %s" % ("gone", lemmatiser.lemmatize("gone")))
print("Lemmatise %s: %s" % ("goes", lemmatiser.lemmatize("goes")))
print("Lemmatise %s: %s" % ("went", lemmatiser.lemmatize("went")))
"""
Without context
Lemmatise going: going
Lemmatise gone: gone
Lemmatise goes: go
Lemmatise went: went
"""
print("With context")
print("Lemmatise %s: %s" % ("going", lemmatiser.lemmatize("going", pos="v")))
print("Lemmatise %s: %s" % ("gone", lemmatiser.lemmatize("gone", pos="v")))
print("Lemmatise %s: %s" % ("goes", lemmatiser.lemmatize("goes", pos="v")))
print("Lemmatise %s: %s" % ("went", lemmatiser.lemmatize("went", pos="v")))
"""
With context
Lemmatise going: go
Lemmatise gone: go
Lemmatise goes: go
Lemmatise went: go
"""
print("Stem %s: %s" % ("studying", stemmer.stem("studying")))
print("Stem %s: %s" % ("study", stemmer.stem("study")))
print("Stem %s: %s" % ("studies", stemmer.stem("studies")))
print("Stem %s: %s" % ("studied", stemmer.stem("studied")))
"""
Stem studying: studi
Stem study: studi
Stem studies: studi
Stem studied: studi
"""
print("Without context")
print("Lemmatise %s: %s" % ("studying", lemmatiser.lemmatize("studying")))
print("Lemmatise %s: %s" % ("study", lemmatiser.lemmatize("study")))
print("Lemmatise %s: %s" % ("studies", lemmatiser.lemmatize("studies")))
print("Lemmatise %s: %s" % ("studied", lemmatiser.lemmatize("studied")))
"""
Without context
Lemmatise studying: studying
Lemmatise study: study
Lemmatise studies: study
Lemmatise studied: studied
"""
print("With context")
print("Lemmatise %s: %s" % ("studying", lemmatiser.lemmatize("studying", pos="v")))
print("Lemmatise %s: %s" % ("study", lemmatiser.lemmatize("study", pos="v")))
print("Lemmatise %s: %s" % ("studies", lemmatiser.lemmatize("studies", pos="v")))
print("Lemmatise %s: %s" % ("studied", lemmatiser.lemmatize("studied", pos="v")))
"""
With context
Lemmatise studying: study
Lemmatise study: study
Lemmatise studies: study
Lemmatise studied: study
"""
s = "This is a simple sentence"
tokens = word_tokenize(s)
tokens_pos = pos_tag(tokens)
print(tokens_pos)
"""
[('This', 'DT'), ('is', 'VBZ'), ('a', 'DT'), ('simple', 'JJ'), ('sentence', 'NN')]
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment