Skip to content

Instantly share code, notes, and snippets.

@cthoyt
Last active January 23, 2024 23:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cthoyt/d26df3ec12f6a15f3157546c6ebee3a2 to your computer and use it in GitHub Desktop.
Save cthoyt/d26df3ec12f6a15f3157546c6ebee3a2 to your computer and use it in GitHub Desktop.
Find matches between terms in proposed CaroliO ontology and existing OBO Foundry ontologies
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "86692f7c-5cdc-4caa-9e4a-7712f99190f2",
"metadata": {},
"outputs": [],
"source": [
"import bioontologies\n",
"import ols_client\n",
"from pyobo.gilda_utils import get_grounder\n",
"import gilda\n",
"import json"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "dfd80e63-9c10-4ba7-8ac5-e49176b9ab1d",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO: [2024-01-24 00:21:26] pyobo.api.species - [hso] no cached species found. getting from OBO loader\n",
"INFO: [2024-01-24 00:21:26] pyobo.api.species - [htn] no cached species found. getting from OBO loader\n",
"INFO: [2024-01-24 00:21:27] pyobo.api.species - [ido] no cached species found. getting from OBO loader\n",
"INFO: [2024-01-24 00:21:28] pyobo.api.species - [mf] no cached species found. getting from OBO loader\n",
"INFO: [2024-01-24 00:21:28] pyobo.api.species - [mfoem] no cached species found. getting from OBO loader\n",
"INFO: [2024-01-24 00:21:29] pyobo.api.species - [mfomd] no cached species found. getting from OBO loader\n",
"INFO: [2024-01-24 00:21:32] pyobo.api.species - [mpio] no cached species found. getting from OBO loader\n",
"INFO: [2024-01-24 00:21:38] pyobo.api.species - [oae] no cached species found. getting from OBO loader\n",
"INFO: [2024-01-24 00:21:40] pyobo.api.species - [ohd] no cached species found. getting from OBO loader\n",
"INFO: [2024-01-24 00:21:41] pyobo.api.species - [omrse] no cached species found. getting from OBO loader\n",
"INFO: [2024-01-24 00:21:41] pystow.utils - downloading with urllib from https://github.com/ufbmi/OMRSE/raw/master/omrse-full.obo to /Users/cthoyt/.data/pyobo/raw/omrse/2024-01-10/omrse-full.obo\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Downloading omrse-full.obo: 0.00B [00:00, ?B/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO: [2024-01-24 00:21:42] pyobo.api.species - [ontoneo] no cached species found. getting from OBO loader\n",
"INFO: [2024-01-24 00:21:47] pyobo.api.species - [oostt] no cached species found. getting from OBO loader\n",
"INFO: [2024-01-24 00:21:49] pyobo.api.species - [ovae] no cached species found. getting from OBO loader\n",
"INFO: [2024-01-24 00:21:52] gilda.term - Filtering 2404074 terms for uniqueness...\n",
"INFO: [2024-01-24 00:21:55] gilda.term - Got 2091018 unique terms...\n"
]
}
],
"source": [
"grounder = get_grounder([\n",
" # add the subset of OBO ontologies about \"health\"\n",
" 'doid', \n",
" 'mondo', 'hp', 'symp', 'maxo', 'obi', 'cmo', \n",
" # 'cto', # trash\n",
" 'disdriv', 'dron', 'exo', \n",
" # 'genepio', # can't be loaded\n",
" 'hso', 'htn', 'ido', 'mf', 'mfoem', 'mfomd', 'mmo',\n",
" 'mpath', 'mpio', 'ncit', 'oae', 'ogms', 'ohd', 'omrse',\n",
" 'ontoneo', 'oostt', 'ovae', 'trans', 'vo', 'xco',\n",
" 'scdo', 'zp', \n",
" # extras\n",
" \"chebi\",\n",
"], strict=False, progress=False)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "16b595ff-dfb5-4ffd-8881-1d17c87daba7",
"metadata": {},
"outputs": [],
"source": [
"# Should be able to do this, but ROBOT won't parse it as a remote file for some reason\n",
"# url = \"https://raw.githubusercontent.com/TootooniLab/CaroliO/main/CaroliO.owl\"\n",
"# g = bioontologies.get_obograph_by_iri(url)\n",
"\n",
"with open(\"/Users/cthoyt/Desktop/CaroliO.json\") as file:\n",
" data = json.load(file)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "9e352482-7645-4741-b1cd-c2877b66df13",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"## Lexical matching returned results\n",
"\n",
"- `CAROLIO:0000411` mild pain\n",
" - [`ncit:C136549`](https://bioregistry.io/ncit:C136549) Neck Pain Score 2 (0.54)\n",
"- `CAROLIO:0000412` moderate pain\n",
" - [`ncit:C121394`](https://bioregistry.io/ncit:C121394) Moderate Extremity Pain (0.54)\n",
" - [`ncit:C136551`](https://bioregistry.io/ncit:C136551) Neck Pain Score 4 (0.54)\n",
"- `CAROLIO:0000413` no pain\n",
" - [`ncit:C119987`](https://bioregistry.io/ncit:C119987) Had No Pain (0.54)\n",
" - [`ncit:C121390`](https://bioregistry.io/ncit:C121390) No Extremity Pain (0.54)\n",
" - [`ncit:C136547`](https://bioregistry.io/ncit:C136547) Neck Pain Score 0 (0.54)\n",
"- `CAROLIO:0000414` severe pain\n",
" - [`ncit:C121395`](https://bioregistry.io/ncit:C121395) Severe Extremity Pain (0.54)\n",
" - [`ncit:C136553`](https://bioregistry.io/ncit:C136553) Neck Pain Score 6 (0.54)\n",
"- `CAROLIO:0001000` caroli syndrome\n",
" - [`doid:0081394`](https://bioregistry.io/doid:0081394) Caroli syndrome (0.772)\n",
" - [`mondo:0018808`](https://bioregistry.io/mondo:0018808) Caroli syndrome (0.772)\n",
"- `CAROLIO:0003100` endoscopic treatment\n",
" - [`ncit:C16546`](https://bioregistry.io/ncit:C16546) Endoscopic Procedure (0.54)\n",
"- `CAROLIO:0003120` endoscopic retrograde cholangiopancreatography\n",
" - [`maxo:0035049`](https://bioregistry.io/maxo:0035049) endoscopic retrograde cholangiopancreatography (0.778)\n",
" - [`ncit:C16430`](https://bioregistry.io/ncit:C16430) Endoscopic Retrograde Cholangiopancreatography (0.762)\n",
"- `CAROLIO:0003200` interventional radiology procedure\n",
" - [`ncit:C63334`](https://bioregistry.io/ncit:C63334) Interventional Radiology Procedure (0.762)\n",
"- `CAROLIO:0003210` locoregional therapy\n",
" - [`ncit:C25388`](https://bioregistry.io/ncit:C25388) Local-Regional (0.54)\n",
" - [`ncit:C94796`](https://bioregistry.io/ncit:C94796) Locally Recurrent Malignant Neoplasm (0.54)\n",
"- `CAROLIO:0003220` paracentesis\n",
" - [`maxo:0035106`](https://bioregistry.io/maxo:0035106) paracentesis (0.778)\n",
" - [`ncit:C15310`](https://bioregistry.io/ncit:C15310) Paracentesis (0.762)\n",
"- `CAROLIO:0003250` transjugular intrahepatic portosystemic shunt\n",
" - [`ncit:C126288`](https://bioregistry.io/ncit:C126288) Transjugular Intrahepatic Portosystemic Shunt (0.762)\n",
"- `CAROLIO:0003300` pharmaceutical treatment\n",
" - [`maxo:0000058`](https://bioregistry.io/maxo:0000058) pharmacotherapy (0.556)\n",
"- `CAROLIO:0003310` antibiotic treatment\n",
" - [`ncit:C258`](https://bioregistry.io/ncit:C258) Antibiotic (0.762)\n",
" - [`chebi:33281`](https://bioregistry.io/chebi:33281) antimicrobial agent (0.556)\n",
" - [`xco:0000482`](https://bioregistry.io/xco:0000482) antimicrobial agent (0.556)\n",
"- `CAROLIO:0003320` antiemetic treatment\n",
" - [`chebi:50919`](https://bioregistry.io/chebi:50919) antiemetic (0.778)\n",
" - [`xco:0001245`](https://bioregistry.io/xco:0001245) antiemetic (0.778)\n",
" - [`ncit:C267`](https://bioregistry.io/ncit:C267) Antiemetic Agent (0.556)\n",
"- `CAROLIO:0003330` bile acid treatment\n",
" - [`chebi:3098`](https://bioregistry.io/chebi:3098) bile acid (0.778)\n",
" - [`chebi:22868`](https://bioregistry.io/chebi:22868) bile salt (0.549)\n",
" - [`ncit:C74800`](https://bioregistry.io/ncit:C74800) Bile Acid Measurement (0.54)\n",
"- `CAROLIO:0003340` chemotherapy\n",
" - [`maxo:0000647`](https://bioregistry.io/maxo:0000647) chemotherapy (0.778)\n",
" - [`ncit:C15632`](https://bioregistry.io/ncit:C15632) Chemotherapy (0.762)\n",
"- `CAROLIO:0003350` diuretics treatment\n",
" - [`chebi:35498`](https://bioregistry.io/chebi:35498) diuretic (0.778)\n",
" - [`xco:0000122`](https://bioregistry.io/xco:0000122) diuretic (0.778)\n",
" - [`ncit:C448`](https://bioregistry.io/ncit:C448) Diuretic (0.762)\n",
"- `CAROLIO:0003360` octreotide treatment\n",
" - [`chebi:7726`](https://bioregistry.io/chebi:7726) octreotide (0.778)\n",
" - [`ncit:C711`](https://bioregistry.io/ncit:C711) Octreotide (0.762)\n",
"- `CAROLIO:0003370` proton pump inhibitor treatment\n",
" - [`xco:0000577`](https://bioregistry.io/xco:0000577) proton pump inhibitor (0.778)\n",
" - [`ncit:C29723`](https://bioregistry.io/ncit:C29723) Proton Pump Inhibitor (0.762)\n",
" - [`chebi:49200`](https://bioregistry.io/chebi:49200) EC 3.6.3.10 (H(+)/K(+)-exchanging ATPase) inhibitor (0.556)\n",
"- `CAROLIO:0003380` pruritus treatment\n",
" - [`hp:0000989`](https://bioregistry.io/hp:0000989) Pruritus (0.762)\n",
" - [`ncit:C3344`](https://bioregistry.io/ncit:C3344) Pruritus (0.762)\n",
" - [`scdo:0000935`](https://bioregistry.io/scdo:0000935) Pruritus (0.762)\n",
" - [`symp:0000432`](https://bioregistry.io/symp:0000432) itching (0.556)\n",
" - [`ncit:C58006`](https://bioregistry.io/ncit:C58006) Pruritus, CTCAE (0.54)\n",
"- `CAROLIO:0003400` radiation therapy\n",
" - [`maxo:0000014`](https://bioregistry.io/maxo:0000014) radiation therapy (0.778)\n",
" - [`ncit:C15313`](https://bioregistry.io/ncit:C15313) Radiation Therapy (0.762)\n",
"- `CAROLIO:0003500` surgical treatment\n",
" - [`ncit:C15329`](https://bioregistry.io/ncit:C15329) Surgical Procedure (0.54)\n",
"- `CAROLIO:0003510` organ transplant\n",
" - [`ncit:C122934`](https://bioregistry.io/ncit:C122934) Organ Graft (0.54)\n",
"- `CAROLIO:0003520` roux-en-y\n",
" - [`ncit:C51756`](https://bioregistry.io/ncit:C51756) Roux-en-Y Anastomosis (0.549)\n",
"- `CAROLIO:0003530` surgical resection\n",
" - [`maxo:0000448`](https://bioregistry.io/maxo:0000448) surgical resection (0.778)\n",
" - [`ncit:C158758`](https://bioregistry.io/ncit:C158758) Resection (0.54)\n",
"\n",
"## Lexical matching returned no results\n",
"\n",
"- `CAROLIO:0000400` value partition\n",
"- `CAROLIO:0000410` pain scale\n",
"- `CAROLIO:0000420` symptom recurrence status\n",
"- `CAROLIO:0000421` non-recurrent symptom status\n",
"- `CAROLIO:0000422` recurrent symptom status\n",
"- `CAROLIO:0002000` variceal bleeding\n",
"- `CAROLIO:0003110` endoscopic band ligation\n",
"- `CAROLIO:0003121` biliary drainage\n",
"- `CAROLIO:0003122` biliary dilatation\n",
"- `CAROLIO:0003123` biliary stent placement\n",
"- `CAROLIO:0003124` gallstones removal\n",
"- `CAROLIO:0003230` percutaneous aspiration and drainage\n",
"- `CAROLIO:0003240` percutaneous transhepatic cholangiogram\n"
]
}
],
"source": [
"safe = []\n",
"\n",
"print(\"## Lexical matching returned results\\n\")\n",
"\n",
"for graph in data['graphs']:\n",
" for node in sorted(graph['nodes'], key=lambda n: n['id']):\n",
" id = node['id']\n",
" if not id.startswith(\"http://purl.obolibrary.org/obo/CAROLIO_\"):\n",
" continue\n",
" luid = id.removeprefix(\"http://purl.obolibrary.org/obo/CAROLIO_\")\n",
" lbl = node['lbl']\n",
" meta = node.get(\"meta\", {})\n",
" synonyms = [s['val'] for s in meta.get(\"synonyms\", [])]\n",
" #print(node)\n",
" node_type = node['type']\n",
" if node_type == \"PROPERTY\":\n",
" continue\n",
" #xx = client.search(lbl)\n",
"\n",
" results = grounder.ground(lbl)\n",
" if not results and lbl.endswith(\"treatment\"):\n",
" results = grounder.ground(lbl.removesuffix(\"treatment\").strip())\n",
" if not results and lbl.endswith(\"therapy\"):\n",
" results = grounder.ground(lbl.removesuffix(\"therapy\").strip())\n",
" if not results:\n",
" safe.append((luid, lbl)) \n",
" else:\n",
" print(f'- `CAROLIO:{luid}`', lbl)\n",
" for res in results:\n",
" curie = res.term.get_curie()\n",
" print(f' - [`{curie}`](https://bioregistry.io/{curie}) {res.term.entry_name} ({round(res.score, 3)})')\n",
"\n",
"print(\"\\n## Lexical matching returned no results\\n\")\n",
"for luid, lbl in safe:\n",
" print(f'- `CAROLIO:{luid}`', lbl)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment