Skip to content

Instantly share code, notes, and snippets.

@mayhewsw
Last active December 22, 2018 23:11
Show Gist options
  • Save mayhewsw/d5a51fa24691a80b3238a19d4be8c6d7 to your computer and use it in GitHub Desktop.
Save mayhewsw/d5a51fa24691a80b3238a19d4be8c6d7 to your computer and use it in GitHub Desktop.
Use Allennlp for NER programmatically, and test the runtime.
from allennlp.predictors.predictor import Predictor
import time
model = "https://s3-us-west-2.amazonaws.com/allennlp/models/ner-model-2018.04.26.tar.gz"
print("Loading model...")
predictor = Predictor.from_path(model)
print("Done loading model.")
# this is so we can use pre-tokenized text. Just remove the next 3 lines
# if you want the text tokenized first.
from allennlp.data.tokenizers.word_splitter import JustSpacesWordSplitter
spacestok = JustSpacesWordSplitter()
predictor._tokenizer = spacestok
# pre-tokenized text.
sent = "Stephen Mayhew is a person who lives in the North Pole ."
# tag 100 times individually.
start = time.time()
for i in range(100):
res = predictor.predict_json({"sentence" : sent})
end = time.time()
print(end - start)
# create a batch of 100 elements and tag.
start = time.time()
sents = []
for i in range(100):
sents.append({"sentence": sent})
res = predictor.predict_batch_json(sents)
end = time.time()
print(end - start)
# Output (with GPU):
# Loading model...
# Done loading model.
# 19.09427809715271
# 2.3905272483825684
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment