Skip to content

Instantly share code, notes, and snippets.

@danieldk
Created March 30, 2022 14:00
Show Gist options
  • Save danieldk/06c62241a176b459c043f2ed9d280295 to your computer and use it in GitHub Desktop.
Save danieldk/06c62241a176b459c043f2ed9d280295 to your computer and use it in GitHub Desktop.
--- training/pt_core_news_lg_rule/model-best/config.cfg 2021-11-22 10:28:51.610899763 +0100
+++ training-edit-tree-bench/pt_core_news_lg/model-best/config.cfg 2021-11-21 17:40:53.559460807 +0100
@@ -12,7 +12,7 @@
[nlp]
lang = "pt"
-pipeline = ["tok2vec","morphologizer","parser","senter","attribute_ruler","lemmatizer","ner"]
+pipeline = ["tok2vec","morphologizer","parser","senter","attribute_ruler","edit_tree_lemmatizer","ner"]
disabled = ["parser","senter","attribute_ruler","ner"]
before_creation = null
after_creation = null
@@ -27,12 +27,21 @@
scorer = {"@scorers":"spacy.attribute_ruler_scorer.v1"}
validate = false
-[components.lemmatizer]
-factory = "lemmatizer"
-mode = "lookup"
-model = null
+[components.edit_tree_lemmatizer]
+factory = "edit_tree_lemmatizer"
+backoff = "form"
+min_tree_freq = 3
overwrite = false
-scorer = {"@scorers":"spacy.lemmatizer_scorer.v1"}
+top_k = 1
+
+[components.edit_tree_lemmatizer.model]
+@architectures = "spacy.Tagger.v1"
+nO = null
+
+[components.edit_tree_lemmatizer.model.tok2vec]
+@architectures = "spacy.Tok2VecListener.v1"
+width = ${components.tok2vec.model.encode.width}
+upstream = "tok2vec"
[components.morphologizer]
factory = "morphologizer"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment