Skip to content

Instantly share code, notes, and snippets.

@mariastefan
mariastefan / 1. train_new_tagger.py
Last active July 17, 2020 16:39
train new tagger after custom tokenizer
import random
from pathlib import Path
import spacy
import sys
import os
sys.path.append('.')
from resolution_coreferences_pronominales.custom_model_training.custom_tokenizer import nlp_loader
output_dir = os.path.abspath(os.path.dirname(__file__)) + '/customPOS/'
# base_model = 'fr_core_news_sm'
Adrien voudrait plus de gateau. Il est culotté celui-là.
[['Adrien', 'ADJ'], ['voudrait', 'VERB'], ['plus', 'ADV'], ['de', 'ADP'], ['gateau', 'NOUN'], ['.', 'PUNCT'], ['Il', 'PRON'], ['est', 'AUX'], ['culotté', 'VERB'], ['celui-là', 'ADJ'], ['.', 'PUNCT'], ['\n', 'SPACE']]
Adrien voudrait plus de gateau. Il est culotté celui-ci.
[['Adrien', 'ADJ'], ['voudrait', 'VERB'], ['plus', 'ADV'], ['de', 'ADP'], ['gateau', 'NOUN'], ['.', 'PUNCT'], ['Il', 'PRON'], ['est', 'AUX'], ['culotté', 'VERB'], ['celui-ci', 'PRON'], ['.', 'PUNCT'], ['\n', 'SPACE']]
import random
from pathlib import Path
import spacy
import sys
import os
sys.path.append(".")
from resolution_coreferences_pronominales.coreferences import analyses_texte
output_dir = os.path.abspath(os.path.dirname(__file__)) + '/customPOS/'
base_model = 'fr_core_news_sm'