Skip to content

Instantly share code, notes, and snippets.

@b2m
Last active April 24, 2023 15:22
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save b2m/6e2697ce182548a98320e4b7b7b885b6 to your computer and use it in GitHub Desktop.
Save b2m/6e2697ce182548a98320e4b7b7b885b6 to your computer and use it in GitHub Desktop.
Documented FastAPI wrapper arround the NER component of the de_core_news_sm model from spaCy.
from typing import List
import spacy
import uvicorn
from fastapi import FastAPI
from pydantic import BaseModel, Field
app = FastAPI(
title="NER service based on spaCy",
description="""
Provides the NER component from [spaCy](https://spacy.io/) as web service.
- spaCy: 3.1.1
- Model: [de_core_news_sm](https://spacy.io/models/de#de_core_news_sm)
""",
docs_url="/",
)
# load trained pipeline with only the NER component
nlp = spacy.load(
"de_core_news_sm",
disable=[
"tok2vec",
"tagger",
"morphologizer",
"parser",
"attribute_ruler",
"lemmatizer",
],
)
class NER_Request(BaseModel):
"""
Request with text to perform NER.
"""
text: str = Field(
...,
title="Text",
description="Text to extract entities from.",
example="Martin Luther war in Wittenberg.",
)
class Entity(BaseModel):
"""
Named Entity found in the text.
"""
start: int = Field(
...,
title="Start",
description="Start position of entity in the text.",
ge=0,
example=0,
)
end: int = Field(
...,
title="End",
description="End position of entity in the text.",
ge=1,
example=2,
)
text: str = Field(
...,
title="Text",
description="The text of the Named Entity.",
min_length=1,
example="Martin Luther",
)
label: str = Field(
...,
title="Label",
description="The label (type) for the Named Entity.",
example="PER",
)
@app.post(
"/ner",
response_model=List[Entity],
summary="Perform NER on text.",
response_description="List of found entities.",
)
def ner(ner_request: NER_Request):
"""
Performs a Named Entity Recognition on the given `text`.
Will return the found entities in a list.
"""
doc = nlp(ner_request.text)
return [
Entity(start=ent.start, end=ent.end, text=ent.text, label=ent.label_)
for ent in doc.ents
]
if __name__ == "__main__":
uvicorn.run("ner-service:app", host="127.0.0.1", port=5000)
fastapi==0.67.0
pydantic==1.8.2
python-multipart==0.0.5
spacy== 3.1.1
uvicorn==0.14.0
@b2m
Copy link
Author

b2m commented Aug 5, 2021

This is a simple version without comments using form-urlencoded values.

import spacy
import uvicorn
from fastapi import FastAPI, Form

app = FastAPI()
nlp = spacy.load("de_core_news_sm")


@app.post("/ner")
def ner(text: str = Form(...)):
    doc = nlp(text)
    return [{"text": ent.text, "label": ent.label_} for ent in doc.ents]


if __name__ == "__main__":
    uvicorn.run("simple-ner-service:app", host="127.0.0.1", port=5000)

Jython expression in OpenRefine:

import json, urllib, urllib2
url = 'http://localhost:5000/ner'
request_data = urllib.urlencode({"text": value.encode('utf-8')})
response = urllib2.urlopen(url, request_data)
return json.dumps(json.load(response), ensure_ascii=False)

@ericleasemorgan
Copy link

Nice integration of many technologies ++

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment