Skip to content

Instantly share code, notes, and snippets.

@DataTurks
Created May 27, 2018 10:35
Show Gist options
  • Save DataTurks/f6035b1e58497d52bf88517ff7bf64cf to your computer and use it in GitHub Desktop.
Save DataTurks/f6035b1e58497d52bf88517ff7bf64cf to your computer and use it in GitHub Desktop.
Train Spacy NER example
import spacy
################### Train Spacy NER.###########
def train_spacy():
TRAIN_DATA = convert_dataturks_to_spacy("dataturks_downloaded.json");
nlp = spacy.blank('en') # create blank Language class
# create the built-in pipeline components and add them to the pipeline
# nlp.create_pipe works for built-ins that are registered with spaCy
if 'ner' not in nlp.pipe_names:
ner = nlp.create_pipe('ner')
nlp.add_pipe(ner, last=True)
# add labels
for _, annotations in TRAIN_DATA:
for ent in annotations.get('entities'):
ner.add_label(ent[2])
# get names of other pipes to disable them during training
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']
with nlp.disable_pipes(*other_pipes): # only train NER
optimizer = nlp.begin_training()
for itn in range(1):
print("Statring iteration " + str(itn))
random.shuffle(TRAIN_DATA)
losses = {}
for text, annotations in TRAIN_DATA:
nlp.update(
[text], # batch of texts
[annotations], # batch of annotations
drop=0.2, # dropout - make it harder to memorise data
sgd=optimizer, # callable to update weights
losses=losses)
print(losses)
#do prediction
doc = nlp("Samsing mobiles below $100")
print ("Entities= " + str(["" + str(ent.text) + "_" + str(ent.label_) for ent in doc.ents]))
@MarkHeatzig
Copy link

what loss function is used here?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment