Skip to content

Instantly share code, notes, and snippets.

@pebbie
Last active February 1, 2022 19:29
Show Gist options
  • Save pebbie/1c19721b824892b47743aa470396942a to your computer and use it in GitHub Desktop.
Save pebbie/1c19721b824892b47743aa470396942a to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# KnowGraphs Winter School 2022 Data Challenge\n",
"**Group 2** : \n",
"+ Efstratios Koulierakis\n",
"+ Pere-Lluis Huguet Cabot\n",
"+ Yang Lu\n",
"+ Peb Ruswono Aryan\n",
"+ Bo Xiong"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from rdflib import Graph, Namespace, RDF, OWL, RDFS, XSD"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import os, glob"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import json"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.metrics import f1_score"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.preprocessing import MultiLabelBinarizer"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"import pandas as pd\n",
"from IPython.display import display"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"NIF = Namespace('http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#')"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"ITS = Namespace('http://www.w3.org/2005/11/its/rdf#')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Helper functions"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"def load_dataset(filename):\n",
" \"\"\"\n",
" load dataset reads NIF document in RDF format\n",
" \"\"\"\n",
" g = Graph().parse(filename)\n",
" ctx = sorted(list(g.subjects(RDF.type, NIF.Context)))\n",
" sents = []\n",
" for c in ctx:\n",
" s = {}\n",
" s['uri'] = c\n",
" s['text'] = list(g.objects(c, NIF.isString))[0].toPython()\n",
"\n",
" chunks = []\n",
" for item in g.subjects(NIF.referenceContext, c):\n",
"\n",
" # mention URI\n",
" chunk = {'uri':item}\n",
" \n",
" # doc URI\n",
" chunk['ctxUri'] = c\n",
"\n",
" # span (begin & end)\n",
" beginIndex = list(g.objects(item, NIF.beginIndex))[0]\n",
" endIndex = list(g.objects(item, NIF.endIndex))[0]\n",
" chunk['begin'] = beginIndex\n",
" chunk['end'] = endIndex\n",
" \n",
" # anchor\n",
" anchors = list(g.objects(item, NIF.anchorOf))\n",
" chunk['anchor'] = anchors[0]\n",
" # identRef\n",
" idRef = list(g.objects(item, ITS.taIdentRef))\n",
" chunk['identRef'] = idRef if len(idRef)>0 else []\n",
"\n",
" # classRef\n",
" clsRef = list(g.objects(item, ITS.taClassRef))\n",
" chunk['classRef'] = clsRef if len(clsRef)>0 else []\n",
" \n",
" # confidence\n",
" conf = list(g.objects(item, ITS.taConfidence))\n",
" chunk['confidence'] = float(conf[0].toPython()) if len(conf)>0 else 1.\n",
"\n",
" chunks.append(chunk)\n",
" \n",
" chunks = sorted(chunks, key=lambda x:x['begin'])\n",
" s['mentions'] = chunks\n",
" sents.append(s)\n",
" return sents"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"def intersection(lst1, lst2):\n",
" \"\"\"\n",
" return intersection between two list\n",
" \"\"\"\n",
" return list(set(lst1) & set(lst2))"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"def me_strong_entity_matching(mention1, mention2):\n",
" \"\"\"\n",
" return 1 if there is an intersection on the uris 0 otherwise\n",
" \"\"\"\n",
" return 1 if len(intersection(mention1['identRef'], mention2['identRef']))>0 else 0"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"def ma_strong_annotation_matching(mention1, mention2):\n",
" \"\"\"\n",
" returns 1 if the span exactly the same and entity uri intersects\n",
" \"\"\"\n",
" begin_cond = mention1['begin'] == mention2['begin']\n",
" end_cond = mention1['end'] == mention2['end']\n",
" entity_cond = len(intersection(mention1['identRef'], mention2['identRef']))>0\n",
" return 1 if begin_cond and end_cond and entity_cond else 0"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"def is_mention_overlap(m1, m2):\n",
" \"\"\"\n",
" check if span in mention m1 and m2 overlaps\n",
" \"\"\"\n",
" return max(m1['begin'], m2['begin'])<=min(m1['end'],m2['end'])"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"def mw_weak_annotation_matching(mention1, mention2):\n",
" \"\"\"\n",
" returns 1 if span overlaps and entity uris intersects\n",
" \"\"\"\n",
" annot_cond = is_mention_overlap(mention1, mention2)\n",
" entity_cond = len(intersection(mention1['identRef'], mention2['identRef']))>0\n",
" return 1 if annot_cond and entity_cond else 0"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Load Dataset\n",
"\n",
"please run the notebook in the `data-challenge` folder"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['datasets/MSNBC.ttl',\n",
" 'datasets/KORE50.ttl',\n",
" 'datasets/IITB.ttl',\n",
" 'datasets/OKE_2015_Task_1_example_set.ttl']"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dslist = glob.glob('datasets/*.*')\n",
"dslist"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"select one of the gold standard datasets"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"dataset = dslist[-1]"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[\n",
" {\n",
" \"uri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-1#char=0,146\",\n",
" \"text\": \"Florence May Harding studied at a school in Sydney, and with Douglas Robert Dundas , but in effect had no formal training in either botany or art.\",\n",
" \"mentions\": [\n",
" {\n",
" \"uri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-1#char=0,20\",\n",
" \"ctxUri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-1#char=0,146\",\n",
" \"begin\": \"0\",\n",
" \"end\": \"20\",\n",
" \"anchor\": \"Florence May Harding\",\n",
" \"identRef\": [\n",
" \"http://dbpedia.org/resource/Florence_May_Harding\",\n",
" \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/Florence_May_Harding\"\n",
" ],\n",
" \"classRef\": [\n",
" \"http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#Person\",\n",
" \"http://www.w3.org/2002/07/owl#Individual\"\n",
" ],\n",
" \"confidence\": 1.0\n",
" },\n",
" {\n",
" \"uri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-1#char=34,40\",\n",
" \"ctxUri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-1#char=0,146\",\n",
" \"begin\": \"34\",\n",
" \"end\": \"40\",\n",
" \"anchor\": \"school\",\n",
" \"identRef\": [\n",
" \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/National_Art_School\"\n",
" ],\n",
" \"classRef\": [],\n",
" \"confidence\": 1.0\n",
" },\n",
" {\n",
" \"uri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-1#char=44,50\",\n",
" \"ctxUri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-1#char=0,146\",\n",
" \"begin\": \"44\",\n",
" \"end\": \"50\",\n",
" \"anchor\": \"Sydney\",\n",
" \"identRef\": [\n",
" \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/Sydney\",\n",
" \"http://dbpedia.org/resource/Sydney\"\n",
" ],\n",
" \"classRef\": [\n",
" \"http://ontologydesignpatterns.org/ont/wikipedia/d0.owl#Location\",\n",
" \"http://www.w3.org/2002/07/owl#Individual\"\n",
" ],\n",
" \"confidence\": 1.0\n",
" },\n",
" {\n",
" \"uri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-1#char=61,82\",\n",
" \"ctxUri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-1#char=0,146\",\n",
" \"begin\": \"61\",\n",
" \"end\": \"82\",\n",
" \"anchor\": \"Douglas Robert Dundas\",\n",
" \"identRef\": [\n",
" \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/Douglas_Robert_Dundas\"\n",
" ],\n",
" \"classRef\": [\n",
" \"http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#Person\",\n",
" \"http://www.w3.org/2002/07/owl#Individual\"\n",
" ],\n",
" \"confidence\": 1.0\n",
" }\n",
" ]\n",
" },\n",
" {\n",
" \"uri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-2#char=0,192\",\n",
" \"text\": \"Such notables include James Carville, who was the senior political adviser to Bill Clinton, and Donna Brazile, the campaign manager of the 2000 presidential campaign of Vice-President Al Gore.\",\n",
" \"mentions\": [\n",
" {\n",
" \"uri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-2#char=22,36\",\n",
" \"ctxUri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-2#char=0,192\",\n",
" \"begin\": \"22\",\n",
" \"end\": \"36\",\n",
" \"anchor\": \"James Carville\",\n",
" \"identRef\": [\n",
" \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/James_Carville\",\n",
" \"http://dbpedia.org/resource/James_Carville\"\n",
" ],\n",
" \"classRef\": [\n",
" \"http://www.w3.org/2002/07/owl#Individual\",\n",
" \"http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#Person\"\n",
" ],\n",
" \"confidence\": 1.0\n",
" },\n",
" {\n",
" \"uri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-2#char=57,74\",\n",
" \"ctxUri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-2#char=0,192\",\n",
" \"begin\": \"57\",\n",
" \"end\": \"74\",\n",
" \"anchor\": \"political adviser\",\n",
" \"identRef\": [\n",
" \"http://dbpedia.org/resource/Political_consulting\",\n",
" \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/Political_adviser\"\n",
" ],\n",
" \"classRef\": [\n",
" \"http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#Role\",\n",
" \"http://www.w3.org/2002/07/owl#Individual\"\n",
" ],\n",
" \"confidence\": 1.0\n",
" },\n",
" {\n",
" \"uri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-2#char=78,90\",\n",
" \"ctxUri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-2#char=0,192\",\n",
" \"begin\": \"78\",\n",
" \"end\": \"90\",\n",
" \"anchor\": \"Bill Clinton\",\n",
" \"identRef\": [\n",
" \"http://dbpedia.org/resource/Bill_Clinton\",\n",
" \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/Bill_Clinton\"\n",
" ],\n",
" \"classRef\": [\n",
" \"http://www.w3.org/2002/07/owl#Individual\",\n",
" \"http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#Person\"\n",
" ],\n",
" \"confidence\": 1.0\n",
" },\n",
" {\n",
" \"uri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-2#char=96,109\",\n",
" \"ctxUri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-2#char=0,192\",\n",
" \"begin\": \"96\",\n",
" \"end\": \"109\",\n",
" \"anchor\": \"Donna Brazile\",\n",
" \"identRef\": [\n",
" \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/Donna_Brazile\",\n",
" \"http://dbpedia.org/resource/Donna_Brazile\"\n",
" ],\n",
" \"classRef\": [\n",
" \"http://www.w3.org/2002/07/owl#Individual\",\n",
" \"http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#Person\"\n",
" ],\n",
" \"confidence\": 1.0\n",
" },\n",
" {\n",
" \"uri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-2#char=115,131\",\n",
" \"ctxUri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-2#char=0,192\",\n",
" \"begin\": \"115\",\n",
" \"end\": \"131\",\n",
" \"anchor\": \"campaign manager\",\n",
" \"identRef\": [\n",
" \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/Campaign_manager\",\n",
" \"http://dbpedia.org/resource/Campaign_manager\"\n",
" ],\n",
" \"classRef\": [\n",
" \"http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#Role\",\n",
" \"http://www.w3.org/2002/07/owl#Individual\"\n",
" ],\n",
" \"confidence\": 1.0\n",
" },\n",
" {\n",
" \"uri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-2#char=184,191\",\n",
" \"ctxUri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-2#char=0,192\",\n",
" \"begin\": \"184\",\n",
" \"end\": \"191\",\n",
" \"anchor\": \"Al Gore\",\n",
" \"identRef\": [\n",
" \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/Al_Gore\",\n",
" \"http://dbpedia.org/resource/Al_Gore\"\n",
" ],\n",
" \"classRef\": [\n",
" \"http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#Person\",\n",
" \"http://www.w3.org/2002/07/owl#Individual\"\n",
" ],\n",
" \"confidence\": 1.0\n",
" }\n",
" ]\n",
" },\n",
" {\n",
" \"uri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-3#char=0,69\",\n",
" \"text\": \"The senator received a Bachelor of Laws from the Columbia University.\",\n",
" \"mentions\": [\n",
" {\n",
" \"uri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-3#char=4,11\",\n",
" \"ctxUri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-3#char=0,69\",\n",
" \"begin\": \"4\",\n",
" \"end\": \"11\",\n",
" \"anchor\": \"senator\",\n",
" \"identRef\": [\n",
" \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/Senator_1\"\n",
" ],\n",
" \"classRef\": [\n",
" \"http://www.w3.org/2002/07/owl#Individual\",\n",
" \"http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#Person\"\n",
" ],\n",
" \"confidence\": 1.0\n",
" },\n",
" {\n",
" \"uri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-3#char=49,68\",\n",
" \"ctxUri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-3#char=0,69\",\n",
" \"begin\": \"49\",\n",
" \"end\": \"68\",\n",
" \"anchor\": \"Columbia University\",\n",
" \"identRef\": [\n",
" \"http://dbpedia.org/resource/Columbia_University\",\n",
" \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/Columbia_University\"\n",
" ],\n",
" \"classRef\": [\n",
" \"http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#Organization\",\n",
" \"http://www.w3.org/2002/07/owl#Individual\"\n",
" ],\n",
" \"confidence\": 1.0\n",
" }\n",
" ]\n",
" }\n",
"]\n"
]
}
],
"source": [
"gold = load_dataset(dataset)\n",
"print(json.dumps(gold, indent=2))"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'OKE_2015_Task_1_example_set'"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"datasetname = os.path.basename(dataset)\n",
"datasetname, _ = os.path.splitext(datasetname)\n",
"datasetname"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"List the system outputs"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['OKE_2015_Task_1_example_set/Babelfy.ttl',\n",
" 'OKE_2015_Task_1_example_set/DBpedia_Spotlight.ttl',\n",
" 'OKE_2015_Task_1_example_set/WAT.ttl']"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"samples = sorted(glob.glob(datasetname+'/*.*'))\n",
"samples"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[\n",
" {\n",
" \"uri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-1#char=0,146\",\n",
" \"text\": \"Florence May Harding studied at a school in Sydney, and with Douglas Robert Dundas , but in effect had no formal training in either botany or art.\",\n",
" \"mentions\": [\n",
" {\n",
" \"uri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-1#char=0,20\",\n",
" \"ctxUri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-1#char=0,146\",\n",
" \"begin\": \"0\",\n",
" \"end\": \"20\",\n",
" \"anchor\": \"Florence May Harding\",\n",
" \"identRef\": [\n",
" \"http://dbpedia.org/resource/Florence_May_Harding\"\n",
" ],\n",
" \"classRef\": [],\n",
" \"confidence\": 1.0\n",
" },\n",
" {\n",
" \"uri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-1#char=34,40\",\n",
" \"ctxUri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-1#char=0,146\",\n",
" \"begin\": \"34\",\n",
" \"end\": \"40\",\n",
" \"anchor\": \"school\",\n",
" \"identRef\": [\n",
" \"http://dbpedia.org/resource/University\"\n",
" ],\n",
" \"classRef\": [],\n",
" \"confidence\": 0.7177966101694915\n",
" },\n",
" {\n",
" \"uri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-1#char=44,50\",\n",
" \"ctxUri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-1#char=0,146\",\n",
" \"begin\": \"44\",\n",
" \"end\": \"50\",\n",
" \"anchor\": \"Sydney\",\n",
" \"identRef\": [\n",
" \"http://dbpedia.org/resource/Australia\"\n",
" ],\n",
" \"classRef\": [],\n",
" \"confidence\": 0.7792207792207793\n",
" }\n",
" ]\n",
" },\n",
" {\n",
" \"uri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-2#char=0,192\",\n",
" \"text\": \"Such notables include James Carville, who was the senior political adviser to Bill Clinton, and Donna Brazile, the campaign manager of the 2000 presidential campaign of Vice-President Al Gore.\",\n",
" \"mentions\": [\n",
" {\n",
" \"uri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-2#char=22,36\",\n",
" \"ctxUri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-2#char=0,192\",\n",
" \"begin\": \"22\",\n",
" \"end\": \"36\",\n",
" \"anchor\": \"James Carville\",\n",
" \"identRef\": [\n",
" \"http://dbpedia.org/resource/James_Carville\"\n",
" ],\n",
" \"classRef\": [],\n",
" \"confidence\": 1.0\n",
" },\n",
" {\n",
" \"uri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-2#char=57,74\",\n",
" \"ctxUri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-2#char=0,192\",\n",
" \"begin\": \"57\",\n",
" \"end\": \"74\",\n",
" \"anchor\": \"political adviser\",\n",
" \"identRef\": [\n",
" \"http://dbpedia.org/resource/Political_consulting\"\n",
" ],\n",
" \"classRef\": [],\n",
" \"confidence\": 0.9238578680203046\n",
" },\n",
" {\n",
" \"uri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-2#char=78,90\",\n",
" \"ctxUri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-2#char=0,192\",\n",
" \"begin\": \"78\",\n",
" \"end\": \"90\",\n",
" \"anchor\": \"Bill Clinton\",\n",
" \"identRef\": [\n",
" \"http://dbpedia.org/resource/Bill_Clinton\"\n",
" ],\n",
" \"classRef\": [],\n",
" \"confidence\": 0.5225653206650831\n",
" },\n",
" {\n",
" \"uri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-2#char=96,109\",\n",
" \"ctxUri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-2#char=0,192\",\n",
" \"begin\": \"96\",\n",
" \"end\": \"109\",\n",
" \"anchor\": \"Donna Brazile\",\n",
" \"identRef\": [\n",
" \"http://dbpedia.org/resource/Donna_Brazile\"\n",
" ],\n",
" \"classRef\": [],\n",
" \"confidence\": 1.0\n",
" },\n",
" {\n",
" \"uri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-2#char=115,131\",\n",
" \"ctxUri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-2#char=0,192\",\n",
" \"begin\": \"115\",\n",
" \"end\": \"131\",\n",
" \"anchor\": \"campaign manager\",\n",
" \"identRef\": [\n",
" \"http://dbpedia.org/resource/Campaign_manager\"\n",
" ],\n",
" \"classRef\": [],\n",
" \"confidence\": 1.0\n",
" },\n",
" {\n",
" \"uri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-2#char=184,191\",\n",
" \"ctxUri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-2#char=0,192\",\n",
" \"begin\": \"184\",\n",
" \"end\": \"191\",\n",
" \"anchor\": \"Al Gore\",\n",
" \"identRef\": [\n",
" \"http://dbpedia.org/resource/Al_Gore\"\n",
" ],\n",
" \"classRef\": [],\n",
" \"confidence\": 1.0\n",
" }\n",
" ]\n",
" },\n",
" {\n",
" \"uri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-3#char=0,69\",\n",
" \"text\": \"The senator received a Bachelor of Laws from the Columbia University.\",\n",
" \"mentions\": [\n",
" {\n",
" \"uri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-3#char=4,11\",\n",
" \"ctxUri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-3#char=0,69\",\n",
" \"begin\": \"4\",\n",
" \"end\": \"11\",\n",
" \"anchor\": \"senator\",\n",
" \"identRef\": [\n",
" \"http://dbpedia.org/resource/Senator_of_the_College_of_Justice\"\n",
" ],\n",
" \"classRef\": [],\n",
" \"confidence\": 0.004651162790697674\n",
" },\n",
" {\n",
" \"uri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-3#char=49,68\",\n",
" \"ctxUri\": \"http://www.ontologydesignpatterns.org/data/oke-challenge/task-1/sentence-3#char=0,69\",\n",
" \"begin\": \"49\",\n",
" \"end\": \"68\",\n",
" \"anchor\": \"Columbia University\",\n",
" \"identRef\": [\n",
" \"http://dbpedia.org/resource/Columbia_University\"\n",
" ],\n",
" \"classRef\": [],\n",
" \"confidence\": 0.423728813559322\n",
" }\n",
" ]\n",
" }\n",
"]\n"
]
}
],
"source": [
"sample = load_dataset(samples[0])\n",
"print(json.dumps(sample, indent=2))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Evaluate"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{\n",
" \"accuracy\": 0.6666666666666666,\n",
" \"error\": 0.3333333333333333,\n",
" \"Macro-F1\": 0.6888888888888888,\n",
" \"Macro-Recall\": 0.8333333333333334,\n",
" \"Macro-Precision\": 0.611111111111111,\n",
" \"Micro-F1\": 0.7999999999999999,\n",
" \"Micro-Recall\": 0.8888888888888888,\n",
" \"Micro-Precision\": 0.7272727272727273,\n",
" \"Macro-F1-conf\": 0.8130936014542468,\n",
" \"Macro-Recall-conf\": 0.8333333333333334,\n",
" \"Macro-Precision-conf\": 0.7965400747434342,\n",
" \"Micro-F1-conf\": 0.8459750782454338,\n",
" \"Micro-Recall-conf\": 0.8729376510498422,\n",
" \"Micro-Precision-conf\": 0.8206281964097848,\n",
" \"Micro-F1-sklearn\": 0.8421052631578948,\n",
" \"Macro-F1-sklearn\": 0.7272727272727273\n",
"}\n"
]
}
],
"source": [
"def evaluate_dataset(gold_ds, system_ds):\n",
" result = {}\n",
" logs = []\n",
" \n",
" y_pred = []\n",
" y_true = []\n",
" mctr = 1\n",
"\n",
" true_pos = []\n",
" false_pos = []\n",
" false_neg = []\n",
" num_fp = [] # detected span overlap but wrong entity URI\n",
" num_fn = [] # entity not detected\n",
" num_tp = [] # correct prediction (strong matching)\n",
" num_ent = 0\n",
"\n",
" sample_uris = [s['uri'] for s in sample]\n",
"\n",
"# print(len(gold), len(sample))\n",
" for sent_id, gold_sent in enumerate(gold):\n",
"# print(gold_sent['uri'])\n",
" if gold_sent['uri'] not in sample_uris:\n",
" # system did not handle the sentence\n",
" logs.append('missing prediction : ' + gold_sent['uri'])\n",
" # num_ent += sum([m['confidence'] for m in gold_sent['mentions']])\n",
" num_ent += len(gold_sent['mentions'])\n",
" continue\n",
"\n",
" # sentence exists on the system\n",
" sample_sent = [s for s in sample if s['uri']==gold_sent['uri']][0]\n",
"\n",
" logs.append(\"detected %d out of %d mention(s)\" % (len(sample_sent['mentions']), len(gold_sent['mentions'])))\n",
" yp = []\n",
" yt = []\n",
" num_ent_s = 0\n",
" num_tp_s = 0\n",
" num_fp_s = 0\n",
" num_fn_s = 0\n",
" tp_s = 0\n",
" fp_s = 0\n",
" fn_s = 0\n",
" for gold_mention in gold_sent['mentions']:\n",
" num_ent_s += 1\n",
" # find mention in sample\n",
" sample_mention = None\n",
" for mention in sample_sent['mentions']:\n",
" if is_mention_overlap(mention, gold_mention):\n",
" sample_mention = mention\n",
" break\n",
"\n",
" if sample_mention is None:\n",
" # entity not detected\n",
" logs.append('mention not detected : ' + repr(gold_mention['anchor'].toPython()))\n",
" num_fn_s += 1\n",
" fn_s += gold_mention['confidence']\n",
" continue\n",
"\n",
" logs.append('begin : ' + repr( sample_mention['begin'] ) + ' ' + repr( gold_mention['begin'] ))\n",
" logs.append('begin : ' + repr( sample_mention['end'] ) + ' ' + repr( gold_mention['end'] ))\n",
" logs.append('anchor : ' + repr(sample_mention['anchor'].toPython()) + ' ' + repr(gold_mention['anchor'].toPython()))\n",
" logs.append('identRef sample : ' + repr( sample_mention['identRef'] ))\n",
" logs.append('identRef gold : ' + repr( gold_mention['identRef'] ))\n",
" \n",
" # strong match\n",
" logs.append('match score (ma): ' + repr( ma_strong_annotation_matching(sample_mention, gold_mention) ))\n",
" logs.append('confidence: ' + repr( sample_mention['confidence'] ))\n",
" \n",
" yt.append(mctr)\n",
" \n",
" if ma_strong_annotation_matching(sample_mention, gold_mention)==1:\n",
" yp.append(mctr)\n",
" num_tp_s += 1\n",
" tp_s += sample_mention['confidence']\n",
" else:\n",
" # false match\n",
" num_fp_s += 1\n",
" fp_s += sample_mention['confidence']\n",
" mctr += 1\n",
"\n",
" num_ent += num_ent_s\n",
" num_tp.append(num_tp_s)\n",
" num_fp.append(num_fp_s)\n",
" num_fn.append(num_fn_s)\n",
" #\n",
" true_pos.append(tp_s)\n",
" false_pos.append(fp_s)\n",
" false_neg.append(fn_s)\n",
"\n",
" y_true.append(yt)\n",
" y_pred.append(yp)\n",
" logs.append('--')\n",
"\n",
" \n",
" result['accuracy'] = sum(num_tp)/num_ent\n",
" result['error'] = (sum(num_fp)+sum(num_fn))/num_ent\n",
" \n",
" # unweighted by confidence\n",
" \n",
" F1_scores = []\n",
" pres_scores = []\n",
" recall_scores = []\n",
"\n",
" for tp, fp, fn in zip(num_tp, num_fp, num_fn):\n",
" pres = tp / (tp + fp) if (tp+fp)>0 else 0.\n",
" recall = tp / (tp + fn) if (tp+fn)>0 else 0.\n",
" pres_scores.append(pres)\n",
" recall_scores.append(recall)\n",
" f1 = (2 * pres * recall) / (pres + recall) if pres+recall>0 else 0.\n",
" F1_scores.append(f1)\n",
"\n",
" result['Macro-F1'] = sum(F1_scores)/len(F1_scores)\n",
" result['Macro-Recall'] = sum(recall_scores)/len(recall_scores)\n",
" result['Macro-Precision'] = sum(pres_scores)/len(pres_scores)\n",
" \n",
" precision = sum(num_tp) / (sum(num_tp)+sum(num_fp))\n",
" recall = sum(num_tp) / (sum(num_tp)+sum(num_fn))\n",
" F1 = (2 * precision * recall) / (precision + recall)\n",
"\n",
" result['Micro-F1'] = F1\n",
" result['Micro-Recall'] = recall\n",
" result['Micro-Precision'] = precision\n",
" \n",
" # weighted with confidence\n",
"\n",
" F1_scores = []\n",
" pres_scores = []\n",
" recall_scores = []\n",
"\n",
" for tp, fp, fn in zip(true_pos, false_pos, false_neg):\n",
" pres = tp / (tp + fp) if (tp+fp)>0 else 0.\n",
" recall = tp / (tp + fn) if (tp+fn)>0 else 0.\n",
" pres_scores.append(pres)\n",
" recall_scores.append(recall)\n",
" f1 = (2 * pres * recall) / (pres + recall) if pres+recall>0 else 0.\n",
" F1_scores.append(f1)\n",
"\n",
" result['Macro-F1-conf'] = sum(F1_scores)/len(F1_scores)\n",
" result['Macro-Recall-conf'] = sum(recall_scores)/len(recall_scores)\n",
" result['Macro-Precision-conf'] = sum(pres_scores)/len(pres_scores)\n",
" \n",
" precision = sum(true_pos) / (sum(true_pos)+sum(false_pos))\n",
" recall = sum(true_pos) / (sum(true_pos)+sum(false_neg))\n",
" F1 = (2 * precision * recall) / (precision + recall)\n",
"\n",
" result['Micro-F1-conf'] = F1\n",
" result['Micro-Recall-conf'] = recall\n",
" result['Micro-Precision-conf'] = precision\n",
" \n",
" # f1 score from scikit-learn as comparison\n",
" \n",
" mlb = MultiLabelBinarizer()\n",
" y_true_b = mlb.fit_transform(y_true)\n",
" y_pred_b = mlb.transform(y_pred)\n",
" \n",
" result['Micro-F1-sklearn'] = f1_score(y_true_b, y_pred_b, average='micro')\n",
" result['Macro-F1-sklearn'] = f1_score(y_true_b, y_pred_b, average='macro')\n",
" \n",
" # result['logs'] = logs\n",
" \n",
" return result\n",
"res = evaluate_dataset(gold, sample)\n",
"print(json.dumps(res, indent=2))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Evaluate all"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"benchmark dataset : IITB\n",
"system being evaluated: IITB/Babelfy.ttl\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"http://dbpedia.org/resource/Say_\"I_Gotta_Believe!\" does not look like a valid URI, trying to serialize this will break.\n",
"http://dbpedia.org/resource/Robert_\"Say\"_McIntosh does not look like a valid URI, trying to serialize this will break.\n",
"http://dbpedia.org/resource/John_\"Rookie\"_Wright does not look like a valid URI, trying to serialize this will break.\n",
"http://dbpedia.org/resource/Say_\"I_Gotta_Believe!\" does not look like a valid URI, trying to serialize this will break.\n",
"http://dbpedia.org/resource/Running_with_Scissors_(\"Weird_Al\"_Yankovic_album) does not look like a valid URI, trying to serialize this will break.\n",
"http://dbpedia.org/resource/The_Unreleased_\"D.C._Tapes\" does not look like a valid URI, trying to serialize this will break.\n",
"http://dbpedia.org/resource/\"A\"_Device does not look like a valid URI, trying to serialize this will break.\n",
"http://dbpedia.org/resource/Sacred_Heart_\"The_Video\" does not look like a valid URI, trying to serialize this will break.\n",
"http://dbpedia.org/resource/Robert_\"Say\"_McIntosh does not look like a valid URI, trying to serialize this will break.\n",
"http://dbpedia.org/resource/Say_\"I_Gotta_Believe!\" does not look like a valid URI, trying to serialize this will break.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"system being evaluated: IITB/DBpedia_Spotlight.ttl\n",
"system being evaluated: IITB/WAT.ttl\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>system</th>\n",
" <th>accuracy</th>\n",
" <th>error</th>\n",
" <th>Macro-F1</th>\n",
" <th>Macro-Recall</th>\n",
" <th>Macro-Precision</th>\n",
" <th>Micro-F1</th>\n",
" <th>Micro-Recall</th>\n",
" <th>Micro-Precision</th>\n",
" <th>Macro-F1-conf</th>\n",
" <th>Macro-Recall-conf</th>\n",
" <th>Macro-Precision-conf</th>\n",
" <th>Micro-F1-conf</th>\n",
" <th>Micro-Recall-conf</th>\n",
" <th>Micro-Precision-conf</th>\n",
" <th>Micro-F1-sklearn</th>\n",
" <th>Macro-F1-sklearn</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Babelfy</td>\n",
" <td>0.305713</td>\n",
" <td>0.694287</td>\n",
" <td>0.449039</td>\n",
" <td>0.701616</td>\n",
" <td>0.336072</td>\n",
" <td>0.468270</td>\n",
" <td>0.721449</td>\n",
" <td>0.346628</td>\n",
" <td>0.490400</td>\n",
" <td>0.641623</td>\n",
" <td>0.403131</td>\n",
" <td>0.514160</td>\n",
" <td>0.662756</td>\n",
" <td>0.419993</td>\n",
" <td>0.514809</td>\n",
" <td>0.346628</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>DBpedia_Spotlight</td>\n",
" <td>0.322591</td>\n",
" <td>0.677409</td>\n",
" <td>0.467552</td>\n",
" <td>0.808242</td>\n",
" <td>0.333896</td>\n",
" <td>0.487817</td>\n",
" <td>0.802991</td>\n",
" <td>0.350317</td>\n",
" <td>0.467552</td>\n",
" <td>0.808242</td>\n",
" <td>0.333896</td>\n",
" <td>0.487817</td>\n",
" <td>0.802991</td>\n",
" <td>0.350317</td>\n",
" <td>0.518867</td>\n",
" <td>0.350317</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>WAT</td>\n",
" <td>0.324940</td>\n",
" <td>0.675060</td>\n",
" <td>0.470418</td>\n",
" <td>0.607975</td>\n",
" <td>0.389413</td>\n",
" <td>0.490498</td>\n",
" <td>0.618142</td>\n",
" <td>0.406547</td>\n",
" <td>0.390630</td>\n",
" <td>0.297625</td>\n",
" <td>0.598235</td>\n",
" <td>0.395742</td>\n",
" <td>0.286696</td>\n",
" <td>0.638660</td>\n",
" <td>0.578078</td>\n",
" <td>0.406547</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" system accuracy error Macro-F1 Macro-Recall \\\n",
"0 Babelfy 0.305713 0.694287 0.449039 0.701616 \n",
"1 DBpedia_Spotlight 0.322591 0.677409 0.467552 0.808242 \n",
"2 WAT 0.324940 0.675060 0.470418 0.607975 \n",
"\n",
" Macro-Precision Micro-F1 Micro-Recall Micro-Precision Macro-F1-conf \\\n",
"0 0.336072 0.468270 0.721449 0.346628 0.490400 \n",
"1 0.333896 0.487817 0.802991 0.350317 0.467552 \n",
"2 0.389413 0.490498 0.618142 0.406547 0.390630 \n",
"\n",
" Macro-Recall-conf Macro-Precision-conf Micro-F1-conf Micro-Recall-conf \\\n",
"0 0.641623 0.403131 0.514160 0.662756 \n",
"1 0.808242 0.333896 0.487817 0.802991 \n",
"2 0.297625 0.598235 0.395742 0.286696 \n",
"\n",
" Micro-Precision-conf Micro-F1-sklearn Macro-F1-sklearn \n",
"0 0.419993 0.514809 0.346628 \n",
"1 0.350317 0.518867 0.350317 \n",
"2 0.638660 0.578078 0.406547 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"benchmark dataset : KORE50\n",
"system being evaluated: KORE50/Babelfy.ttl\n",
"system being evaluated: KORE50/DBpedia_Spotlight.ttl\n",
"system being evaluated: KORE50/WAT.ttl\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>system</th>\n",
" <th>accuracy</th>\n",
" <th>error</th>\n",
" <th>Macro-F1</th>\n",
" <th>Macro-Recall</th>\n",
" <th>Macro-Precision</th>\n",
" <th>Micro-F1</th>\n",
" <th>Micro-Recall</th>\n",
" <th>Micro-Precision</th>\n",
" <th>Macro-F1-conf</th>\n",
" <th>Macro-Recall-conf</th>\n",
" <th>Macro-Precision-conf</th>\n",
" <th>Micro-F1-conf</th>\n",
" <th>Micro-Recall-conf</th>\n",
" <th>Micro-Precision-conf</th>\n",
" <th>Micro-F1-sklearn</th>\n",
" <th>Macro-F1-sklearn</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Babelfy</td>\n",
" <td>0.666667</td>\n",
" <td>0.333333</td>\n",
" <td>0.724944</td>\n",
" <td>0.860000</td>\n",
" <td>0.661333</td>\n",
" <td>0.800000</td>\n",
" <td>0.932039</td>\n",
" <td>0.700730</td>\n",
" <td>0.728991</td>\n",
" <td>0.860000</td>\n",
" <td>0.678332</td>\n",
" <td>0.815881</td>\n",
" <td>0.892825</td>\n",
" <td>0.751147</td>\n",
" <td>0.824034</td>\n",
" <td>0.700730</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>DBpedia_Spotlight</td>\n",
" <td>0.430556</td>\n",
" <td>0.569444</td>\n",
" <td>0.498159</td>\n",
" <td>0.626000</td>\n",
" <td>0.453000</td>\n",
" <td>0.601942</td>\n",
" <td>0.815789</td>\n",
" <td>0.476923</td>\n",
" <td>0.498159</td>\n",
" <td>0.626000</td>\n",
" <td>0.453000</td>\n",
" <td>0.601942</td>\n",
" <td>0.815789</td>\n",
" <td>0.476923</td>\n",
" <td>0.645833</td>\n",
" <td>0.476923</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>WAT</td>\n",
" <td>0.520833</td>\n",
" <td>0.479167</td>\n",
" <td>0.578254</td>\n",
" <td>0.746667</td>\n",
" <td>0.497000</td>\n",
" <td>0.684932</td>\n",
" <td>0.903614</td>\n",
" <td>0.551471</td>\n",
" <td>0.593953</td>\n",
" <td>0.743413</td>\n",
" <td>0.531099</td>\n",
" <td>0.683245</td>\n",
" <td>0.765355</td>\n",
" <td>0.617047</td>\n",
" <td>0.710900</td>\n",
" <td>0.551471</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" system accuracy error Macro-F1 Macro-Recall \\\n",
"0 Babelfy 0.666667 0.333333 0.724944 0.860000 \n",
"1 DBpedia_Spotlight 0.430556 0.569444 0.498159 0.626000 \n",
"2 WAT 0.520833 0.479167 0.578254 0.746667 \n",
"\n",
" Macro-Precision Micro-F1 Micro-Recall Micro-Precision Macro-F1-conf \\\n",
"0 0.661333 0.800000 0.932039 0.700730 0.728991 \n",
"1 0.453000 0.601942 0.815789 0.476923 0.498159 \n",
"2 0.497000 0.684932 0.903614 0.551471 0.593953 \n",
"\n",
" Macro-Recall-conf Macro-Precision-conf Micro-F1-conf Micro-Recall-conf \\\n",
"0 0.860000 0.678332 0.815881 0.892825 \n",
"1 0.626000 0.453000 0.601942 0.815789 \n",
"2 0.743413 0.531099 0.683245 0.765355 \n",
"\n",
" Micro-Precision-conf Micro-F1-sklearn Macro-F1-sklearn \n",
"0 0.751147 0.824034 0.700730 \n",
"1 0.476923 0.645833 0.476923 \n",
"2 0.617047 0.710900 0.551471 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"benchmark dataset : MSNBC\n",
"system being evaluated: MSNBC/Babelfy.ttl\n",
"system being evaluated: MSNBC/DBpedia_Spotlight.ttl\n",
"system being evaluated: MSNBC/WAT.ttl\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>system</th>\n",
" <th>accuracy</th>\n",
" <th>error</th>\n",
" <th>Macro-F1</th>\n",
" <th>Macro-Recall</th>\n",
" <th>Macro-Precision</th>\n",
" <th>Micro-F1</th>\n",
" <th>Micro-Recall</th>\n",
" <th>Micro-Precision</th>\n",
" <th>Macro-F1-conf</th>\n",
" <th>Macro-Recall-conf</th>\n",
" <th>Macro-Precision-conf</th>\n",
" <th>Micro-F1-conf</th>\n",
" <th>Micro-Recall-conf</th>\n",
" <th>Micro-Precision-conf</th>\n",
" <th>Micro-F1-sklearn</th>\n",
" <th>Macro-F1-sklearn</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Babelfy</td>\n",
" <td>0.531459</td>\n",
" <td>0.468541</td>\n",
" <td>0.613726</td>\n",
" <td>0.743991</td>\n",
" <td>0.543233</td>\n",
" <td>0.694056</td>\n",
" <td>0.815195</td>\n",
" <td>0.604262</td>\n",
" <td>0.620300</td>\n",
" <td>0.701066</td>\n",
" <td>0.580370</td>\n",
" <td>0.691166</td>\n",
" <td>0.756431</td>\n",
" <td>0.636269</td>\n",
" <td>0.753321</td>\n",
" <td>0.604262</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>DBpedia_Spotlight</td>\n",
" <td>0.441767</td>\n",
" <td>0.558233</td>\n",
" <td>0.576075</td>\n",
" <td>0.650875</td>\n",
" <td>0.533669</td>\n",
" <td>0.612813</td>\n",
" <td>0.638298</td>\n",
" <td>0.589286</td>\n",
" <td>0.576075</td>\n",
" <td>0.650875</td>\n",
" <td>0.533669</td>\n",
" <td>0.612813</td>\n",
" <td>0.638298</td>\n",
" <td>0.589286</td>\n",
" <td>0.741573</td>\n",
" <td>0.589286</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>WAT</td>\n",
" <td>0.507363</td>\n",
" <td>0.492637</td>\n",
" <td>0.618954</td>\n",
" <td>0.706914</td>\n",
" <td>0.570966</td>\n",
" <td>0.673179</td>\n",
" <td>0.697974</td>\n",
" <td>0.650086</td>\n",
" <td>0.573834</td>\n",
" <td>0.537680</td>\n",
" <td>0.661128</td>\n",
" <td>0.586712</td>\n",
" <td>0.484312</td>\n",
" <td>0.744024</td>\n",
" <td>0.787942</td>\n",
" <td>0.650086</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" system accuracy error Macro-F1 Macro-Recall \\\n",
"0 Babelfy 0.531459 0.468541 0.613726 0.743991 \n",
"1 DBpedia_Spotlight 0.441767 0.558233 0.576075 0.650875 \n",
"2 WAT 0.507363 0.492637 0.618954 0.706914 \n",
"\n",
" Macro-Precision Micro-F1 Micro-Recall Micro-Precision Macro-F1-conf \\\n",
"0 0.543233 0.694056 0.815195 0.604262 0.620300 \n",
"1 0.533669 0.612813 0.638298 0.589286 0.576075 \n",
"2 0.570966 0.673179 0.697974 0.650086 0.573834 \n",
"\n",
" Macro-Recall-conf Macro-Precision-conf Micro-F1-conf Micro-Recall-conf \\\n",
"0 0.701066 0.580370 0.691166 0.756431 \n",
"1 0.650875 0.533669 0.612813 0.638298 \n",
"2 0.537680 0.661128 0.586712 0.484312 \n",
"\n",
" Micro-Precision-conf Micro-F1-sklearn Macro-F1-sklearn \n",
"0 0.636269 0.753321 0.604262 \n",
"1 0.589286 0.741573 0.589286 \n",
"2 0.744024 0.787942 0.650086 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"benchmark dataset : OKE_2015_Task_1_example_set\n",
"system being evaluated: OKE_2015_Task_1_example_set/Babelfy.ttl\n",
"system being evaluated: OKE_2015_Task_1_example_set/DBpedia_Spotlight.ttl\n",
"system being evaluated: OKE_2015_Task_1_example_set/WAT.ttl\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>system</th>\n",
" <th>accuracy</th>\n",
" <th>error</th>\n",
" <th>Macro-F1</th>\n",
" <th>Macro-Recall</th>\n",
" <th>Macro-Precision</th>\n",
" <th>Micro-F1</th>\n",
" <th>Micro-Recall</th>\n",
" <th>Micro-Precision</th>\n",
" <th>Macro-F1-conf</th>\n",
" <th>Macro-Recall-conf</th>\n",
" <th>Macro-Precision-conf</th>\n",
" <th>Micro-F1-conf</th>\n",
" <th>Micro-Recall-conf</th>\n",
" <th>Micro-Precision-conf</th>\n",
" <th>Micro-F1-sklearn</th>\n",
" <th>Macro-F1-sklearn</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Babelfy</td>\n",
" <td>0.666667</td>\n",
" <td>0.333333</td>\n",
" <td>0.688889</td>\n",
" <td>0.833333</td>\n",
" <td>0.611111</td>\n",
" <td>0.800000</td>\n",
" <td>0.888889</td>\n",
" <td>0.727273</td>\n",
" <td>0.813094</td>\n",
" <td>0.833333</td>\n",
" <td>0.796540</td>\n",
" <td>0.845975</td>\n",
" <td>0.872938</td>\n",
" <td>0.820628</td>\n",
" <td>0.842105</td>\n",
" <td>0.727273</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>DBpedia_Spotlight</td>\n",
" <td>0.083333</td>\n",
" <td>0.916667</td>\n",
" <td>0.133333</td>\n",
" <td>0.111111</td>\n",
" <td>0.166667</td>\n",
" <td>0.153846</td>\n",
" <td>0.100000</td>\n",
" <td>0.333333</td>\n",
" <td>0.133333</td>\n",
" <td>0.111111</td>\n",
" <td>0.166667</td>\n",
" <td>0.153846</td>\n",
" <td>0.100000</td>\n",
" <td>0.333333</td>\n",
" <td>0.500000</td>\n",
" <td>0.333333</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>WAT</td>\n",
" <td>0.750000</td>\n",
" <td>0.250000</td>\n",
" <td>0.777778</td>\n",
" <td>1.000000</td>\n",
" <td>0.666667</td>\n",
" <td>0.857143</td>\n",
" <td>1.000000</td>\n",
" <td>0.750000</td>\n",
" <td>0.823177</td>\n",
" <td>1.000000</td>\n",
" <td>0.721286</td>\n",
" <td>0.940835</td>\n",
" <td>1.000000</td>\n",
" <td>0.888279</td>\n",
" <td>0.857143</td>\n",
" <td>0.750000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" system accuracy error Macro-F1 Macro-Recall \\\n",
"0 Babelfy 0.666667 0.333333 0.688889 0.833333 \n",
"1 DBpedia_Spotlight 0.083333 0.916667 0.133333 0.111111 \n",
"2 WAT 0.750000 0.250000 0.777778 1.000000 \n",
"\n",
" Macro-Precision Micro-F1 Micro-Recall Micro-Precision Macro-F1-conf \\\n",
"0 0.611111 0.800000 0.888889 0.727273 0.813094 \n",
"1 0.166667 0.153846 0.100000 0.333333 0.133333 \n",
"2 0.666667 0.857143 1.000000 0.750000 0.823177 \n",
"\n",
" Macro-Recall-conf Macro-Precision-conf Micro-F1-conf Micro-Recall-conf \\\n",
"0 0.833333 0.796540 0.845975 0.872938 \n",
"1 0.111111 0.166667 0.153846 0.100000 \n",
"2 1.000000 0.721286 0.940835 1.000000 \n",
"\n",
" Micro-Precision-conf Micro-F1-sklearn Macro-F1-sklearn \n",
"0 0.820628 0.842105 0.727273 \n",
"1 0.333333 0.500000 0.333333 \n",
"2 0.888279 0.857143 0.750000 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
},
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"dslist = sorted(glob.glob('datasets/*.*'))\n",
"all_dataset = []\n",
"for dataset in dslist:\n",
" ds_item = {}\n",
" \n",
" \n",
" gold = load_dataset(dataset)\n",
" \n",
" datasetname = os.path.basename(dataset)\n",
" datasetname, _ = os.path.splitext(datasetname)\n",
" datasetname\n",
" \n",
" ds_item['dataset'] = datasetname\n",
" print('benchmark dataset : ', datasetname)\n",
" \n",
" all_systems = []\n",
" \n",
" systems = sorted(glob.glob(datasetname+'/*.*'))\n",
" for system in systems:\n",
" sys_name, _ = os.path.splitext(os.path.basename(system))\n",
" sys_item = {'system': sys_name}\n",
" print('system being evaluated: ', system)\n",
" sample = load_dataset(system)\n",
" benchmark = evaluate_dataset(gold, sample)\n",
" \n",
" all_systems.append({**sys_item, **benchmark})\n",
" df = pd.DataFrame(all_systems)\n",
" display(df)\n",
" df.plot(x=\"system\", y=['Micro-F1','Micro-F1-conf'], title=datasetname, ylim=(0., 1.));\n",
" ds_item['result'] = all_systems\n",
" all_dataset.append(ds_item)\n",
" \n",
"# print(json.dumps(all_dataset, indent=2))\n",
" "
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.8"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
@LittlePea13
Copy link

Code to get Confidence weighted Metrics

true_pos = []
false_pos = []
false_neg = []
mctr = 1
for sample_sent, gold_sent in zip(sample, gold):
    pred_id = [str(instance['uri']) for instance in sample_sent['mentions']]
    gold_id = [str(instance['uri']) for instance in gold_sent['mentions']]
    intersect = []
    false_preds = []
    # Check for intersecting URIs
    for idx, uri in enumerate(pred_id):
        if uri in gold_id:
            intersect.append([idx, gold_id.index(uri)])
        else:
            # False Predictions from system
            false_preds.append(idx)
    # Missing preds from system
    missing_preds = len(gold_id) - len(intersect)

    print(len(sample_sent['mentions']), len(gold_sent['mentions']))
    
    tp = 0
    # Start with false preds from system for False Positives
    fp = sum([float(sample_sent['mentions'][fal_pred]['confidence']) for fal_pred in false_preds])
    fn = missing_preds
    for sample_mention_id, gold_mention_id in intersect:
        print(sample_mention_id, gold_mention_id)
        sample_mention = sample_sent['mentions'][sample_mention_id]
        gold_mention = gold_sent['mentions'][gold_mention_id]

        print('begin : ',sample_mention['begin'], gold_mention['begin'])
        print('begin : ',sample_mention['end'], gold_mention['end'])
        print('anchor : ', repr(sample_mention['anchor'].toPython()), repr(gold_mention['anchor'].toPython()))
        print('identRef sample : ',sample_mention['identRef'])
        print('identRef gold : ', gold_mention['identRef'])
        # strong match
        print('confidence : ', sample_mention['confidence'])
#         print('match score (me): ', me_strong_entity_matching(sample_mention, gold_mention))
        print('match score (ma): ', ma_strong_annotation_matching(sample_mention, gold_mention))
#         yt.append(mctr)
        if ma_strong_annotation_matching(sample_mention, gold_mention)==1:
            tp += float(sample_mention['confidence'])
        else:
#             if 'notInWiki' in gold_mention['identRef'][0]:
#                 fp += float(confidence)
            fp += float(sample_mention['confidence'])
        print()
    true_pos.append(tp)
    false_pos.append(fp)
    false_neg.append(fn)
    print('--')

F1_scores = []
pres_scores = []
recall_scores = []

for tp, fp, fn in zip(true_pos, false_pos, false_neg):
    pres_scores.append(tp / (tp+fp))
    recall_scores.append(tp/(tp + fn))
    F1_scores.append((2 * pres_scores[-1] * recall_scores[-1]) / (pres_scores[-1] + recall_scores[-1]))
    
print('Macro-F1', sum(F1_scores)/len(F1_scores))
print('Macro-Recall', sum(recall_scores)/len(recall_scores))
print('Macro-Precision', sum(pres_scores)/len(pres_scores))


precision = sum(true_pos) / (sum(true_pos)+sum(false_pos))
recall = sum(true_pos) / (sum(true_pos)+sum(false_neg))
F1 = (2 * precision * recall) / (precision + recall)
print('Micro-F1', F1)
print('Macro-Recall', recall)
print('Macro-Precision', precision)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment