tagtog-gists/tagtog-spaCy.py

## tagtog-spaCy.py
import spacy
import json
import requests
import os

def get_class_id(label):
  """
  Translates the spaCy label id into the tagtog entity type id
  - label: spaCy label id
  """
  choices = {'PERSON': 'e_1', 'ORG': 'e_2', 'MONEY': 'e_3'}
  return choices.get(label, None)

def get_entities(spans, pipeline):
  """
  Translates a tuple of named entity Span objects (https://spacy.io/api/span) into a
  list of tagtog entities (https://docs.tagtog.net/anndoc.html#ann-json). Each entity is
  defined by the entity type ID (classId), the part name where the annotation is (part),
  the entity offsets and the confidence (annotation status, who created it and probabilty).
  - spans: the named entities in the spaCy doc
  - pipeline: trained pipeline name
  """
  default_prob = 1
  default_part_id = 's1v1'
  default_state = 'pre-added'
  tagtog_entities = []
  for span in spans:
    class_id = get_class_id(span.label_)
    if class_id is not None:
      tagtog_entities.append( {
        'classId': class_id,
        'part': default_part_id,
        'offsets':[{'start': span.start_char, 'text': span.text}],
        'confidence': {'state': default_state,'who': ['ml:' + pipeline],'prob': default_prob},
        'fields':{},
        # this is related to the kb_id (knowledge base ID) field from the Span spaCy object
        'normalizations': {}} )
  return tagtog_entities

# Set the credentials at tagtog and project name
MY_USERNAME = os.environ['MY_TAGTOG_USERNAME']
MY_PASSWORD = os.environ['MY_TAGTOG_PASSWORD']
MY_PROJECT = 'demo-spaCy'

# API authentication
tagtogAPIUrl = "https://www.tagtog.net/-api/documents/v1"
auth = requests.auth.HTTPBasicAuth(username=MY_USERNAME, password=MY_PASSWORD)

text = "Paypal Holdings Inc (PYPL) President and CEO Daniel Schulman Sold $2.7 million of Shares"
# Load the spaCy trained pipeline (https://spacy.io/models/en#en_core_web_sm) and apply it to text
pipeline = 'en_core_web_sm'
nlp = spacy.load(pipeline)
doc = nlp(text)

# Initialize ann.json (specification: https://docs.tagtog.net/anndoc.html#ann-json)
annjson = {}
# Set the document as not confirmed, an annotator will manually confirm whether the annotations are correct
annjson['anncomplete'] = False
annjson['metas'] = {}
annjson['relations'] = []
# Transform the spaCy entities into tagtog entities
annjson['entities'] = get_entities(doc.ents, pipeline)

# Parameters for the API call
# see https://docs.tagtog.net/API_documents_v1.html#examples-import-pre-annotated-plain-text-file
params = {'owner': MY_USERNAME, 'project': MY_PROJECT, 'output': 'null', 'format': 'default-plus-annjson'}
# Pre-annotated document composed of the content and the annotations
files=[('doc1.txt', text), ('doc1.ann.json', json.dumps(annjson))]
# POST request to send the pre-annotated document
response = requests.post(tagtogAPIUrl, params=params, auth=auth, files=files)

print(response.text)
	import spacy
	import json
	import requests
	import os

	def get_class_id(label):
	"""
	Translates the spaCy label id into the tagtog entity type id
	- label: spaCy label id
	"""
	choices = {'PERSON': 'e_1', 'ORG': 'e_2', 'MONEY': 'e_3'}
	return choices.get(label, None)

	def get_entities(spans, pipeline):
	"""
	Translates a tuple of named entity Span objects (https://spacy.io/api/span) into a
	list of tagtog entities (https://docs.tagtog.net/anndoc.html#ann-json). Each entity is
	defined by the entity type ID (classId), the part name where the annotation is (part),
	the entity offsets and the confidence (annotation status, who created it and probabilty).
	- spans: the named entities in the spaCy doc
	- pipeline: trained pipeline name
	"""
	default_prob = 1
	default_part_id = 's1v1'
	default_state = 'pre-added'
	tagtog_entities = []
	for span in spans:
	class_id = get_class_id(span.label_)
	if class_id is not None:
	tagtog_entities.append( {
	'classId': class_id,
	'part': default_part_id,
	'offsets':[{'start': span.start_char, 'text': span.text}],
	'confidence': {'state': default_state,'who': ['ml:' + pipeline],'prob': default_prob},
	'fields':{},
	# this is related to the kb_id (knowledge base ID) field from the Span spaCy object
	'normalizations': {}} )
	return tagtog_entities

	# Set the credentials at tagtog and project name
	MY_USERNAME = os.environ['MY_TAGTOG_USERNAME']
	MY_PASSWORD = os.environ['MY_TAGTOG_PASSWORD']
	MY_PROJECT = 'demo-spaCy'

	# API authentication
	tagtogAPIUrl = "https://www.tagtog.net/-api/documents/v1"
	auth = requests.auth.HTTPBasicAuth(username=MY_USERNAME, password=MY_PASSWORD)

	text = "Paypal Holdings Inc (PYPL) President and CEO Daniel Schulman Sold $2.7 million of Shares"
	# Load the spaCy trained pipeline (https://spacy.io/models/en#en_core_web_sm) and apply it to text
	pipeline = 'en_core_web_sm'
	nlp = spacy.load(pipeline)
	doc = nlp(text)

	# Initialize ann.json (specification: https://docs.tagtog.net/anndoc.html#ann-json)
	annjson = {}
	# Set the document as not confirmed, an annotator will manually confirm whether the annotations are correct
	annjson['anncomplete'] = False
	annjson['metas'] = {}
	annjson['relations'] = []
	# Transform the spaCy entities into tagtog entities
	annjson['entities'] = get_entities(doc.ents, pipeline)

	# Parameters for the API call
	# see https://docs.tagtog.net/API_documents_v1.html#examples-import-pre-annotated-plain-text-file
	params = {'owner': MY_USERNAME, 'project': MY_PROJECT, 'output': 'null', 'format': 'default-plus-annjson'}
	# Pre-annotated document composed of the content and the annotations
	files=[('doc1.txt', text), ('doc1.ann.json', json.dumps(annjson))]
	# POST request to send the pre-annotated document
	response = requests.post(tagtogAPIUrl, params=params, auth=auth, files=files)

	print(response.text)