Last active
June 8, 2022 16:25
-
-
Save pranavraikote/2227b5e4e6d92a33fefe5a4fcc33d064 to your computer and use it in GitHub Desktop.
NLP Tutorials - Part 24: Named Entity Recognition
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# !pip install -U spacy | |
# !python -m spacy download en_core_sm | |
# Importing the libraries | |
import spacy | |
import random | |
# Creating a dictionary for training data | |
train_data = [ | |
("The pizza in Naples it the best", {'entities' : [(4, 8, 'FOOD'), (13, 18, 'LOC')]}), | |
("India is famous for Taj Mahal and Infosys", {'entities' : [(0, 4, 'LOC'), (20, 28, 'LOC'), (34, 40, 'ORG')]}), | |
("Germany is one of the best countries in Europe", {'entities' : [(0, 6, 'LOC'), (40, 45, 'LOC')]}) | |
] | |
# Instantiating the NLP piepline | |
nlp = spacy.blank('en') | |
ner = nlp.create_pipe('ner') | |
nlp.add_pipe("ner", last = True) | |
# Binding the entities to the NER pipeline | |
for _, annot in train_data: | |
for e in annot.get('entities'): | |
ner.add_label(e[2]) | |
from spacy.training.example import Example | |
losses = {} | |
# Training the NER model | |
for batch in spacy.util.minibatch(train_data, size=1): | |
for text, annotations in batch: | |
# create Example | |
doc = nlp.make_doc(text) | |
example = Example.from_dict(doc, annotations) | |
# Update the model | |
nlp.update([example], losses=losses, drop=0.5) | |
print(losses) | |
# Testing the output | |
for t,_ in train_data: | |
d = nlp(t) | |
print('Entities', [(ent.text, ent.label) for ent in doc.ents]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment