mayhewsw/ner_allennlp_test.py

## ner_allennlp_test.py
from allennlp.predictors.predictor import Predictor
import time

model = "https://s3-us-west-2.amazonaws.com/allennlp/models/ner-model-2018.04.26.tar.gz"

print("Loading model...")
predictor = Predictor.from_path(model)
print("Done loading model.")

# this is so we can use pre-tokenized text. Just remove the next 3 lines
# if you want the text tokenized first.
from allennlp.data.tokenizers.word_splitter import JustSpacesWordSplitter
spacestok = JustSpacesWordSplitter()
predictor._tokenizer = spacestok

# pre-tokenized text.
sent = "Stephen Mayhew is a person who lives in the North Pole ."

# tag 100 times individually.
start = time.time()
for i in range(100):
    res = predictor.predict_json({"sentence" : sent})
end = time.time()
print(end - start)

# create a batch of 100 elements and tag.
start = time.time()
sents = []
for i in range(100):
    sents.append({"sentence": sent})
res = predictor.predict_batch_json(sents)
end = time.time()
print(end - start)

# Output (with GPU):
# Loading model...
# Done loading model.
# 19.09427809715271
# 2.3905272483825684
	from allennlp.predictors.predictor import Predictor
	import time

	model = "https://s3-us-west-2.amazonaws.com/allennlp/models/ner-model-2018.04.26.tar.gz"

	print("Loading model...")
	predictor = Predictor.from_path(model)
	print("Done loading model.")

	# this is so we can use pre-tokenized text. Just remove the next 3 lines
	# if you want the text tokenized first.
	from allennlp.data.tokenizers.word_splitter import JustSpacesWordSplitter
	spacestok = JustSpacesWordSplitter()
	predictor._tokenizer = spacestok

	# pre-tokenized text.
	sent = "Stephen Mayhew is a person who lives in the North Pole ."

	# tag 100 times individually.
	start = time.time()
	for i in range(100):
	res = predictor.predict_json({"sentence" : sent})
	end = time.time()
	print(end - start)

	# create a batch of 100 elements and tag.
	start = time.time()
	sents = []
	for i in range(100):
	sents.append({"sentence": sent})
	res = predictor.predict_batch_json(sents)
	end = time.time()
	print(end - start)

	# Output (with GPU):
	# Loading model...
	# Done loading model.
	# 19.09427809715271
	# 2.3905272483825684