DataTurks/train_spacy.py

## train_spacy.py
def train_spacy(training_pickle_file):

    #read pickle file to load training data
    with open(training_pickle_file, 'rb') as input:
        TRAIN_DATA=pickle.load(input)

    nlp = spacy.blank('en')  # create blank Language class
    # create the built-in pipeline components and add them to the pipeline
    # nlp.create_pipe works for built-ins that are registered with spaCy
    if 'ner' not in nlp.pipe_names:
        ner = nlp.create_pipe('ner')
        nlp.add_pipe(ner, last=True)

    # add labels
    for _, annotations in TRAIN_DATA:
        for ent in annotations.get('entities'):
            ner.add_label(ent[2])

    # get names of other pipes to disable them during training
    other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']
    with nlp.disable_pipes(*other_pipes):  # only train NER
        optimizer = nlp.begin_training()
        for itn in range(1):
            print("Statring iteration " + str(itn))
            random.shuffle(TRAIN_DATA)
            losses = {}
            for text, annotations in TRAIN_DATA:
                nlp.update(
                    [text],  # batch of texts
                    [annotations],  # batch of annotations
                    drop=0.2,  # dropout - make it harder to memorise data
                    sgd=optimizer,  # callable to update weights
                    losses=losses)
            print(losses)
	def train_spacy(training_pickle_file):

	#read pickle file to load training data
	with open(training_pickle_file, 'rb') as input:
	TRAIN_DATA=pickle.load(input)

	nlp = spacy.blank('en') # create blank Language class
	# create the built-in pipeline components and add them to the pipeline
	# nlp.create_pipe works for built-ins that are registered with spaCy
	if 'ner' not in nlp.pipe_names:
	ner = nlp.create_pipe('ner')
	nlp.add_pipe(ner, last=True)

	# add labels
	for _, annotations in TRAIN_DATA:
	for ent in annotations.get('entities'):
	ner.add_label(ent[2])

	# get names of other pipes to disable them during training
	other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']
	with nlp.disable_pipes(*other_pipes): # only train NER
	optimizer = nlp.begin_training()
	for itn in range(1):
	print("Statring iteration " + str(itn))
	random.shuffle(TRAIN_DATA)
	losses = {}
	for text, annotations in TRAIN_DATA:
	nlp.update(
	[text], # batch of texts
	[annotations], # batch of annotations
	drop=0.2, # dropout - make it harder to memorise data
	sgd=optimizer, # callable to update weights
	losses=losses)
	print(losses)