Skip to content

Instantly share code, notes, and snippets.

@dandiep
Created November 25, 2021 02:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dandiep/dbb37db2bfc37e09969d3b75ad69a84e to your computer and use it in GitHub Desktop.
Save dandiep/dbb37db2bfc37e09969d3b75ad69a84e to your computer and use it in GitHub Desktop.
Spacy integration for the German IWNLP lemmatizer
from spacy import Language
from spacy.lang.de import German
from spacy.pipeline.lemmatizer import *
from spacy.tokens import Doc
from iwnlp.iwnlp_wrapper import IWNLPWrapper
class IWNLPLemmatizer:
def __init__(self):
self.lemmatizer = IWNLPWrapper(lemmatizer_path='./zata/language_data/IWNLP.Lemmatizer_20181001.json')
def __call__(self, doc: Doc) -> Doc:
for token in doc:
lemmas = self.lemmatizer.lemmatize(token.text, pos_universal_google=token.pos_)
if lemmas != None and len(lemmas) > 0:
token.lemma_ = lemmas[0]
return doc
@German.factory("iwnlp_lemmatizer")
def create_en_normalizer(nlp, name):
return IWNLPLemmatizer()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment