Last active
February 12, 2021 08:53
-
-
Save tagtog-gists/ae22aed9de8819dc5e6d016631e6b3c3 to your computer and use it in GitHub Desktop.
Example of how to connect your models to tagtog using webhooks. Find the article explaining this gist's context at https://tagtog.medium.com/connect-your-nlp-models-to-tagtog-using-webhooks-13d422ae4dff
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from flask import Flask, request, Response | |
import json | |
import spacy | |
import requests | |
import os | |
def get_class_id(label): | |
""" | |
Translates the spaCy label id into the tagtog entity type id | |
- label: spaCy label id | |
""" | |
choices = {'PERSON': 'e_1', 'ORG': 'e_2', 'MONEY': 'e_3'} | |
return choices.get(label, None) | |
def get_entities(spans, pipeline): | |
""" | |
Translates a tuple of named entity Span objects (https://spacy.io/api/span) into a | |
list of tagtog entities (https://docs.tagtog.net/anndoc.html#ann-json). Each entity is | |
defined by the entity type ID (classId), the part name where the annotation is (part), | |
the entity offsets and the confidence (annotation status, who created it and probabilty). | |
- spans: the named entities in the spaCy doc | |
- pipeline: trained pipeline name | |
""" | |
default_prob = 1 | |
default_part_id = 's1v1' | |
default_state = 'pre-added' | |
tagtog_entities = [] | |
for span in spans: | |
class_id = get_class_id(span.label_) | |
if class_id is not None: | |
tagtog_entities.append( { | |
'classId': class_id, | |
'part': default_part_id, | |
'offsets':[{'start': span.start_char, 'text': span.text}], | |
'confidence': {'state': default_state,'who': ['ml:' + pipeline],'prob': default_prob}, | |
'fields':{}, | |
# this is related to the kb_id (knowledge base ID) field from the Span spaCy object | |
'normalizations': {}} ) | |
return tagtog_entities | |
# Set your credentials at tagtog | |
MY_USERNAME = os.environ['MY_TAGTOG_USERNAME'] | |
MY_PASSWORD = os.environ['MY_TAGTOG_PASSWORD'] | |
# Set tagtog project name | |
MY_PROJECT = 'demo-webhook' | |
# API authentication | |
tagtog_API_endpoint = "https://www.tagtog.net/-api/documents/v1" | |
auth = requests.auth.HTTPBasicAuth(username=MY_USERNAME, password=MY_PASSWORD) | |
# Parameters for the GET API call to get a document | |
# (see https://docs.tagtog.net/API_documents_v1.html#examples-get-the-original-document-by-document-id) | |
get_params_doc = {'owner': MY_USERNAME, 'project': MY_PROJECT, 'output': 'text'} | |
# Parameters for the POST API call to import a pre-annotated document | |
# (see https://docs.tagtog.net/API_documents_v1.html#examples-import-pre-annotated-plain-text-file) | |
post_params_doc = {'owner': MY_USERNAME, 'project': MY_PROJECT, 'output': 'null', 'format': 'default-plus-annjson'} | |
app = Flask(__name__) | |
# Handle any POST request coming to the app root path | |
@app.route('/', methods=['POST']) | |
def respond(): | |
print(request.json) | |
if request.json['tagtogID']: | |
# Add the doc ID to the parameters | |
get_params_doc['ids'] = request.json['tagtogID'] | |
print(request.json['tagtogID']) | |
get_response = requests.get(tagtog_API_endpoint, params=get_params_doc, auth=auth) | |
# Load the spaCy trained pipeline (https://spacy.io/models/en#en_core_web_sm) and apply it to text | |
pipeline = 'en_core_web_sm' | |
nlp = spacy.load(pipeline) | |
doc = nlp(get_response.text) | |
# Initialize ann.json (specification: https://docs.tagtog.net/anndoc.html#ann-json) | |
annjson = {} | |
# Set the document as not confirmed, an annotator will manually confirm whether the annotations are correct | |
annjson['anncomplete'] = False | |
annjson['metas'] = {} | |
annjson['relations'] = [] | |
# Transform the spaCy entities into tagtog entities | |
annjson['entities'] = get_entities(doc.ents, pipeline) | |
# Pre-annotated document composed of the content and the annotations | |
files=[('text.txt', get_response.text), ('text.ann.json', json.dumps(annjson))] | |
post_response = requests.post(tagtog_API_endpoint, params=post_params_doc, auth=auth, files=files) | |
print(post_response.text) | |
return Response() | |
if __name__ == "__main__": | |
app.run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment