Skip to content

Instantly share code, notes, and snippets.

@alvations
Created January 9, 2016 23:27
Show Gist options
  • Save alvations/07758d02412d928414bb to your computer and use it in GitHub Desktop.
Save alvations/07758d02412d928414bb to your computer and use it in GitHub Desktop.
from nltk.corpus import wordnet as wn
from nltk.stem import PorterStemmer, WordNetLemmatizer
#from nltk import pos_tag, word_tokenize
# Pywsd's Lemmatizer.
porter = PorterStemmer()
wnl = WordNetLemmatizer()
from nltk.tag import PerceptronTagger
tagger = PerceptronTagger()
pos_tag = tagger.tag
def lemmatize(ambiguous_word, pos=None, neverstem=True,
lemmatizer=wnl, stemmer=porter):
"""
Tries to convert a surface word into lemma, and if lemmatize word is not in
wordnet then try and convert surface word into its stem.
This is to handle the case where users input a surface word as an ambiguous
word and the surface word is a not a lemma.
"""
if pos:
lemma = lemmatizer.lemmatize(ambiguous_word, pos=pos)
else:
lemma = lemmatizer.lemmatize(ambiguous_word)
stem = stemmer.stem(ambiguous_word)
# Ensure that ambiguous word is a lemma.
if not wn.synsets(lemma):
if neverstem:
return ambiguous_word
if not wn.synsets(stem):
return ambiguous_word
else:
return stem
else:
return lemma
def penn2morphy(penntag, returnNone=False):
morphy_tag = {'NN':wn.NOUN, 'JJ':wn.ADJ,
'VB':wn.VERB, 'RB':wn.ADV}
try:
return morphy_tag[penntag[:2]]
except:
return None if returnNone else ''
def word_tokenize(text):
return text.split()
def lemmatize_sentence(sentence, neverstem=False, keepWordPOS=False,
tokenizer=word_tokenize, postagger=pos_tag,
lemmatizer=wnl, stemmer=porter):
words, lemmas, poss = [], [], []
for word, pos in postagger(tokenizer(sentence)):
pos = penn2morphy(pos)
lemmas.append(lemmatize(word.lower(), pos, neverstem,
lemmatizer, stemmer))
poss.append(pos)
words.append(word)
if keepWordPOS:
return words, lemmas, [None if i == '' else i for i in poss]
return lemmas
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment