Skip to content

Instantly share code, notes, and snippets.

@Hironsan
Created June 30, 2020 08:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Hironsan/534b99ddafa40fb1c677b7cb6ce4e89d to your computer and use it in GitHub Desktop.
Save Hironsan/534b99ddafa40fb1c677b7cb6ce4e89d to your computer and use it in GitHub Desktop.
spaCy for Lambda
import os
import urllib.request
import spacy
import tarfile
from pathlib import Path
def maybe_download(model, dest):
save_path = Path(dest) / model
if not os.path.exists(save_path):
print('Downloading...')
url = f'https://github.com/explosion/spacy-models/releases/download/{model}/{model}.tar.gz'
filename = Path(dest) / f'{model}.tar.gz'
res = urllib.request.urlretrieve(url, filename)
with tarfile.open(filename) as f:
f.extractall(path=dest)
dirname = model.split('-')[0]
return save_path / dirname / model
def lambda_handler(event, context):
doc = nlp(event['text'])
response = [
{
'text': ent.text,
'label': ent.label_,
'start': ent.start_char,
'end': ent.end_char
}
for ent in doc.ents
]
return response
model = 'en_core_web_sm-2.3.0'
mnt_path = '/mnt/models'
model_path = maybe_download(model, mnt_path)
nlp = spacy.load(model_path)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment