Created
May 3, 2025 13:44
-
-
Save Vishnu-add/fc32b15070ca37502d4265980905a0e7 to your computer and use it in GitHub Desktop.
NER_with_Spacy_main.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"provenance": [], | |
"authorship_tag": "ABX9TyO7IqYxgmIhLcNdqf47/4cR", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/Vishnu-add/fc32b15070ca37502d4265980905a0e7/ner_with_spacy_main.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "Ua7LR5q-UoIb" | |
}, | |
"outputs": [], | |
"source": [ | |
"!pip install spacy==3.4.4\n", | |
"!pip install transformers==4.34.0\n", | |
"!pip install spacy-transformers==1.3.1\n", | |
"!pip install spacy[cuda] # Install spaCy with CUDA support\n", | |
"!pip install spacy[cuda100] # Install spaCy with CUDA support\n", | |
"!pip install -U scikit-learn # Upgrade scikit-learn for compatibility\n", | |
"\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"from google.colab import drive\n", | |
"drive.mount('/content/drive')" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "BvMFjFkV77r3", | |
"outputId": "4a0ed753-d596-41d5-a6c8-3f93ce136c48" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "w58yMGUX7Ssd" | |
}, | |
"source": [ | |
"Creating a custom Named Entity Recognition model with spaCy, which can recognize the disease names from clinical text\n", | |
"\n", | |
"Once you download and unzip the files you get 8 datasets with each dataset having the following files:\n", | |
"train.tsv, test.tsv , dev.tsv and devel.tsv\n", | |
"In These tsv files each word is annotated using the BIO format.\n", | |
"A few lines from train.tsv in BC5CDR-disease dataset looks like:\n", | |
"Selegiline\tO\n", | |
"-\tO\n", | |
"induced\tO\n", | |
"postural\tB\n", | |
"hypotension\tI\n", | |
"in\tO\n", | |
"Parkinson\tB\n", | |
"'\tI\n", | |
"s\tI\n", | |
"disease\tI\n", | |
":\tO\n", | |
"a\tO\n", | |
"longitudinal\tO\n", | |
"study\tO\n", | |
"on\tO\n", | |
"the\tO\n", | |
"effects\tO\n", | |
"of\tO\n", | |
"drug\tO\n", | |
"withdrawal\tO\n", | |
".\tO\n", | |
"Here it is of the format:\n", | |
"word \\t label\\n\n", | |
"for instance:\n", | |
"postural\tB\n", | |
"hypotension\tI\n", | |
"\n", | |
"here B-> Begin entity, I-> inside entity and O-> outside entity\n", | |
"\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "LeOQqsr07Ssk" | |
}, | |
"outputs": [], | |
"source": [ | |
"#Import all required libraries\n", | |
"import spacy\n", | |
"import random\n", | |
"import time\n", | |
"import numpy as np\n", | |
"import sys\n", | |
"from spacy import displacy\n", | |
"from itertools import chain\n", | |
"import matplotlib.pyplot as plt\n", | |
"from matplotlib.ticker import MaxNLocator" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "Mqdv1zvg7Ssm" | |
}, | |
"source": [ | |
"We have to convert tsv file to the format accepted by spaCy for training.\n", | |
"One of the format supported by spaCy is:\n", | |
"TRAIN_DATA = [[(Sentence, {entities: [(start, end, label)]], ...]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "CN8tTZzy7Ssn" | |
}, | |
"outputs": [], | |
"source": [ | |
"def load_data_spacy(file_path):\n", | |
" ''' Converts data from:\n", | |
" word \\t label \\n word \\t label \\n \\n word \\t label\n", | |
" to: sentence, {entities : [(start, end, label), (stard, end, label)]}\n", | |
" '''\n", | |
" file = open(file_path, 'r')\n", | |
" training_data, entities, sentence, unique_labels = [], [], [], []\n", | |
" current_annotation = None\n", | |
" start =0\n", | |
" end = 0 # initialize counter to keep track of start and end characters\n", | |
" for line in file:\n", | |
" line = line.strip(\"\\n\").split(\"\\t\")\n", | |
" # lines with len > 1 are words\n", | |
" if len(line) > 1:\n", | |
" label = line[1]\n", | |
" if(label != 'O'):\n", | |
" label = line[1]+\"_Disease\" # the .txt is formatted: label \\t word, label[0:2] = label_type\n", | |
" #label_type = line[0][0] # beginning of annotations - \"B\", intermediate - \"I\"\n", | |
" word = line[0]\n", | |
" sentence.append(word)\n", | |
" start = end\n", | |
" end += (len(word) + 1) # length of the word + trailing space\n", | |
"\n", | |
" if label == 'I_Disease' : # if at the end of an annotation\n", | |
" entities.append(( start,end-1, label)) # append the annotation\n", | |
"\n", | |
" if label == 'B_Disease': # if beginning new annotation\n", | |
" entities.append(( start,end-1, label))# start annotation at beginning of word\n", | |
"\n", | |
"\n", | |
"\n", | |
" if label != 'O' and label not in unique_labels:\n", | |
" unique_labels.append(label)\n", | |
"\n", | |
" # lines with len == 1 are breaks between sentences\n", | |
" if len(line) == 1:\n", | |
" if(len(entities) > 0):\n", | |
" sentence = \" \".join(sentence)\n", | |
" training_data.append([sentence, {'entities' : entities}])\n", | |
" # reset the counters and temporary lists\n", | |
" end = 0\n", | |
" start = 0\n", | |
" entities, sentence = [], []\n", | |
"\n", | |
" file.close()\n", | |
" return training_data, unique_labels" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [], | |
"metadata": { | |
"id": "bSKdrhkS75o2" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "fPtVaRkM7Sso" | |
}, | |
"source": [ | |
"Let us convert our train data,test data and validation data to spaCy format" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "zCzMxVhO7Ssp", | |
"outputId": "cd827412-9206-4a20-b603-ec138bd8e870" | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"2658 ['B_Disease', 'I_Disease']\n", | |
"2842\n", | |
"5385\n" | |
] | |
} | |
], | |
"source": [ | |
"TRAIN_DATA, LABELS = load_data_spacy(\"/content/drive/MyDrive/Colab Notebooks/CustomNERwithSpacy/train.tsv\")\n", | |
"# print(TRAIN_DATA)\n", | |
"print(len(TRAIN_DATA),LABELS)\n", | |
"TEST_DATA, _ = load_data_spacy(\"/content/drive/MyDrive/Colab Notebooks/CustomNERwithSpacy/test.tsv\")\n", | |
"print(len(TEST_DATA))\n", | |
"VALID_DATA, _ = load_data_spacy(\"/content/drive/MyDrive/Colab Notebooks/CustomNERwithSpacy/train_dev.tsv\")\n", | |
"print(len(VALID_DATA))\n" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "Ae8fBl7p7Ssq" | |
}, | |
"source": [ | |
"https://stackoverflow.com/questions/67407433/using-spacy-3-0-to-convert-data-from-old-spacy-v2-format-to-the-brand-new-spacy The below method is used to convert the train and validation data from old format to new format" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "5g3i3W8k7Ssr", | |
"outputId": "ce039109-b8aa-49ae-b15f-74fccc1533c9" | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stderr", | |
"text": [ | |
"100%|██████████| 2658/2658 [00:00<00:00, 2954.00it/s]\n", | |
"100%|██████████| 5385/5385 [00:01<00:00, 4271.46it/s]\n" | |
] | |
} | |
], | |
"source": [ | |
"import pandas as pd\n", | |
"from tqdm import tqdm\n", | |
"import spacy\n", | |
"from spacy.tokens import DocBin\n", | |
"nlp = spacy.blank(\"en\") # load a new spacy model\n", | |
"db = DocBin() # create a DocBin object\n", | |
"\n", | |
"for text, annot in tqdm(TRAIN_DATA): # data in previous format\n", | |
" doc = nlp.make_doc(text) # create doc object from text\n", | |
" ents = []\n", | |
" for start, end, label in annot[\"entities\"]: # add character indexes\n", | |
" span = doc.char_span(start, end, label=label, alignment_mode=\"contract\")\n", | |
" if span is None:\n", | |
" print(\"Skipping entity\")\n", | |
" else:\n", | |
" ents.append(span)\n", | |
" doc.ents = ents # label the text with the ents\n", | |
" db.add(doc)\n", | |
"\n", | |
"db.to_disk(\"./train.spacy\") # save the docbin object\n", | |
"\n", | |
"db = DocBin()\n", | |
"for text, annot in tqdm(VALID_DATA): # data in previous format\n", | |
" doc = nlp.make_doc(text) # create doc object from text\n", | |
" ents = []\n", | |
" for start, end, label in annot[\"entities\"]: # add character indexes\n", | |
" span = doc.char_span(start, end, label=label, alignment_mode=\"contract\")\n", | |
" if span is None:\n", | |
" print(\"Skipping entity\")\n", | |
" else:\n", | |
" ents.append(span)\n", | |
" doc.ents = ents # label the text with the ents\n", | |
" db.add(doc)\n", | |
"\n", | |
"db.to_disk(\"./valid.spacy\") # save the docbin object" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"import spacy\n", | |
"gpu = spacy.prefer_gpu()\n", | |
"print('GPU:', gpu)" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "lGYQxadmBPJ7", | |
"outputId": "0622db98-41d5-4b76-8297-9a04fd658109" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"GPU: True\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "GGSet1NA7Sst", | |
"outputId": "110df5ec-a893-44ab-a241-17da7f67e9f1" | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"2023-10-10 08:33:32.967273: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n", | |
"[2023-10-10 08:33:36,181] [DEBUG] Config overrides from CLI: ['paths.train', 'paths.dev']\n", | |
"\u001b[38;5;2m✔ Created output directory: content/drive/MyDrive/Colab\n", | |
"Notebooks/CustomNERwithSpacy/spacy344/output/training\u001b[0m\n", | |
"\u001b[38;5;4mℹ Saving to output directory: content/drive/MyDrive/Colab\n", | |
"Notebooks/CustomNERwithSpacy/spacy344/output/training\u001b[0m\n", | |
"\u001b[38;5;4mℹ Using GPU: 0\u001b[0m\n", | |
"\u001b[1m\n", | |
"=========================== Initializing pipeline ===========================\u001b[0m\n", | |
"[2023-10-10 08:33:37,592] [INFO] Set up nlp object from config\n", | |
"[2023-10-10 08:33:37,626] [DEBUG] Loading corpus from path: /content/drive/MyDrive/Colab Notebooks/CustomNERwithSpacy/spacy344/valid.spacy\n", | |
"[2023-10-10 08:33:37,628] [DEBUG] Loading corpus from path: /content/drive/MyDrive/Colab Notebooks/CustomNERwithSpacy/spacy344/train.spacy\n", | |
"[2023-10-10 08:33:37,629] [INFO] Pipeline: ['transformer', 'ner']\n", | |
"[2023-10-10 08:33:37,634] [INFO] Created vocabulary\n", | |
"[2023-10-10 08:33:37,638] [INFO] Finished initializing nlp object\n", | |
"Downloading (…)lve/main/config.json: 100% 481/481 [00:00<00:00, 2.55MB/s]\n", | |
"Downloading (…)olve/main/vocab.json: 100% 899k/899k [00:00<00:00, 15.1MB/s]\n", | |
"Downloading (…)olve/main/merges.txt: 100% 456k/456k [00:00<00:00, 3.58MB/s]\n", | |
"Downloading (…)/main/tokenizer.json: 100% 1.36M/1.36M [00:00<00:00, 20.0MB/s]\n", | |
"Downloading model.safetensors: 100% 499M/499M [00:04<00:00, 120MB/s]\n", | |
"Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']\n", | |
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", | |
"[2023-10-10 08:33:46,673] [DEBUG] [W033] Training a new parser or NER using a model with no lexeme normalization table. This may degrade the performance of the model to some degree. If this is intentional or the language you're using doesn't have a normalization table, please ignore this warning. If this is surprising, make sure you have the spacy-lookups-data package installed and load the table in your config. The languages with lexeme normalization tables are currently: cs, da, de, el, en, id, lb, mk, pt, ru, sr, ta, th\n", | |
"\n", | |
"Load the table in your config with:\n", | |
"\n", | |
"[initialize.lookups]\n", | |
"@misc = \"spacy.LookupsDataLoader.v1\"\n", | |
"lang = ${nlp.lang}\n", | |
"tables = [\"lexeme_norm\"]\n", | |
"\n", | |
"[2023-10-10 08:34:09,878] [INFO] Initialized pipeline components: ['transformer', 'ner']\n", | |
"\u001b[38;5;2m✔ Initialized pipeline\u001b[0m\n", | |
"\u001b[1m\n", | |
"============================= Training pipeline =============================\u001b[0m\n", | |
"[2023-10-10 08:34:09,907] [DEBUG] Loading corpus from path: /content/drive/MyDrive/Colab Notebooks/CustomNERwithSpacy/spacy344/valid.spacy\n", | |
"[2023-10-10 08:34:09,910] [DEBUG] Loading corpus from path: /content/drive/MyDrive/Colab Notebooks/CustomNERwithSpacy/spacy344/train.spacy\n", | |
"\u001b[38;5;4mℹ Pipeline: ['transformer', 'ner']\u001b[0m\n", | |
"\u001b[38;5;4mℹ Initial learn rate: 0.0\u001b[0m\n", | |
"E # LOSS TRANS... LOSS NER ENTS_F ENTS_P ENTS_R SCORE \n", | |
"--- ------ ------------- -------- ------ ------ ------ ------\n", | |
" 0 0 5594.18 378.02 0.22 0.26 0.19 0.00\n", | |
" 4 200 125606.89 42874.09 87.38 85.14 89.73 0.87\n", | |
" 8 400 2576.09 4571.11 89.26 85.90 92.88 0.89\n", | |
" 12 600 1046.74 1799.70 92.00 92.12 91.88 0.92\n", | |
" 16 800 552.53 966.78 92.39 92.50 92.27 0.92\n", | |
" 21 1000 404.45 710.16 92.46 92.61 92.32 0.92\n", | |
" 25 1200 282.92 470.83 92.27 91.99 92.54 0.92\n", | |
" 29 1400 238.77 382.30 92.49 93.27 91.72 0.92\n", | |
" 33 1600 288.66 457.23 92.70 93.77 91.65 0.93\n", | |
" 38 1800 147.23 238.45 92.13 90.74 93.56 0.92\n", | |
" 42 2000 134.77 215.83 92.32 94.01 90.68 0.92\n", | |
" 46 2200 139.64 202.70 92.21 91.75 92.68 0.92\n", | |
" 50 2400 160.78 224.21 91.95 90.67 93.28 0.92\n", | |
" 55 2600 74.02 109.29 92.24 91.74 92.74 0.92\n", | |
" 59 2800 138.42 177.19 92.37 93.15 91.60 0.92\n", | |
" 63 3000 125.07 155.56 92.16 92.12 92.20 0.92\n", | |
" 67 3200 84.20 105.86 92.61 93.24 91.99 0.93\n", | |
"\u001b[38;5;2m✔ Saved pipeline to output directory\u001b[0m\n", | |
"content/drive/MyDrive/Colab\n", | |
"Notebooks/CustomNERwithSpacy/spacy344/output/training/model-last\n" | |
] | |
} | |
], | |
"source": [ | |
" !python -m spacy train \"/content/drive/MyDrive/Colab Notebooks/CustomNERwithSpacy/spacy344/config.cfg\" --verbose --output \"./content/drive/MyDrive/Colab Notebooks/CustomNERwithSpacy/spacy344/output/training/\" --paths.train \"/content/drive/MyDrive/Colab Notebooks/CustomNERwithSpacy/spacy344/train.spacy\" --paths.dev \"/content/drive/MyDrive/Colab Notebooks/CustomNERwithSpacy/spacy344/valid.spacy\" --gpu-id 0" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"ner = spacy.load(R\"./content/drive/MyDrive/Colab Notebooks/CustomNERwithSpacy/spacy344/output/training/model-best\") #load the best model\n" | |
], | |
"metadata": { | |
"id": "gZjdxuoQJM4E" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "a1lDVIAa7Ssu" | |
}, | |
"outputs": [], | |
"source": [ | |
"\n", | |
"\n", | |
"\n", | |
"\n", | |
"test_sentences = [x[0] for x in TEST_DATA[0:4000]] # extract the sentences from [sentence, entity]\n", | |
"for x in test_sentences:\n", | |
" doc = ner(x)\n", | |
" for ent in doc.ents:\n", | |
" print(ent.text, ent.start_char, ent.end_char, ent.label_)\n", | |
" displacy.render(doc,jupyter=True, style = \"ent\")\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "Czd2DtV-7Ssv", | |
"outputId": "d602a0ac-cf98-462b-de2f-18bb24b389de", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 122 | |
} | |
}, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
], | |
"text/html": [ | |
"<span class=\"tex2jax_ignore\"><div class=\"entities\" style=\"line-height: 2.5; direction: ltr\">Selegiline - induced \n", | |
"<mark class=\"entity\" style=\"background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", | |
" postural\n", | |
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">B_Disease</span>\n", | |
"</mark>\n", | |
" \n", | |
"<mark class=\"entity\" style=\"background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", | |
" hypotension\n", | |
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">I_Disease</span>\n", | |
"</mark>\n", | |
" in \n", | |
"<mark class=\"entity\" style=\"background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", | |
" Parkinson\n", | |
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">B_Disease</span>\n", | |
"</mark>\n", | |
" \n", | |
"<mark class=\"entity\" style=\"background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", | |
" '\n", | |
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">I_Disease</span>\n", | |
"</mark>\n", | |
" \n", | |
"<mark class=\"entity\" style=\"background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", | |
" s\n", | |
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">I_Disease</span>\n", | |
"</mark>\n", | |
" \n", | |
"<mark class=\"entity\" style=\"background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", | |
" disease\n", | |
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">I_Disease</span>\n", | |
"</mark>\n", | |
" : a longitudinal study on the effects of drug withdrawal.The aims of this study were to confirm our previous findings in a separate cohort of patients and to determine the time course of the cardiovascular consequences of stopping selegiline in the expectation that this might shed light on the mechanisms by which the drug causes \n", | |
"<mark class=\"entity\" style=\"background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", | |
" orthostatic\n", | |
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">B_Disease</span>\n", | |
"</mark>\n", | |
" \n", | |
"<mark class=\"entity\" style=\"background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", | |
" hypotension\n", | |
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">I_Disease</span>\n", | |
"</mark>\n", | |
"</div></span>" | |
] | |
}, | |
"metadata": {} | |
} | |
], | |
"source": [ | |
"# ner = spacy.load(R\"ner_demo/training/model-best\") #load the best model\n", | |
"doc = ner(\"Selegiline - induced postural hypotension in Parkinson ' s disease : a longitudinal study on the effects of drug withdrawal.The aims of this study were to confirm our previous findings in a separate cohort of patients and to determine the time course of the cardiovascular consequences of stopping selegiline in the expectation that this might shed light on the mechanisms by which the drug causes orthostatic hypotension\")\n", | |
"displacy.render(doc,jupyter=True, style = \"ent\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "llmnVAjy7Ssv", | |
"outputId": "b25cb955-343e-4222-c930-b5cde36660d0", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 122 | |
} | |
}, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"text/plain": [ | |
"<IPython.core.display.HTML object>" | |
], | |
"text/html": [ | |
"<span class=\"tex2jax_ignore\"><div class=\"entities\" style=\"line-height: 2.5; direction: ltr\">Selegiline - induced \n", | |
"<mark class=\"entity\" style=\"background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", | |
" postural\n", | |
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">B_Disease</span>\n", | |
"</mark>\n", | |
" \n", | |
"<mark class=\"entity\" style=\"background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", | |
" hypotension\n", | |
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">I_Disease</span>\n", | |
"</mark>\n", | |
" in \n", | |
"<mark class=\"entity\" style=\"background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", | |
" Parkinson\n", | |
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">B_Disease</span>\n", | |
"</mark>\n", | |
" \n", | |
"<mark class=\"entity\" style=\"background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", | |
" '\n", | |
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">I_Disease</span>\n", | |
"</mark>\n", | |
" \n", | |
"<mark class=\"entity\" style=\"background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", | |
" s\n", | |
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">I_Disease</span>\n", | |
"</mark>\n", | |
" \n", | |
"<mark class=\"entity\" style=\"background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", | |
" disease\n", | |
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">I_Disease</span>\n", | |
"</mark>\n", | |
" : a longitudinal study on the effects of drug withdrawal.The aims of this study were to confirm our previous findings in a separate cohort of patients and to determine the time course of the cardiovascular consequences of stopping selegiline in the expectation that this might shed light on the mechanisms by which the drug causes \n", | |
"<mark class=\"entity\" style=\"background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", | |
" orthostatic\n", | |
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">B_Disease</span>\n", | |
"</mark>\n", | |
" \n", | |
"<mark class=\"entity\" style=\"background: #ddd; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;\">\n", | |
" hypotension\n", | |
" <span style=\"font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem\">I_Disease</span>\n", | |
"</mark>\n", | |
"</div></span>" | |
] | |
}, | |
"metadata": {} | |
} | |
], | |
"source": [ | |
"# ner = spacy.load(R\"ner_demo/training/model-best\") #load the best model\n", | |
"doc = ner(\"Selegiline - induced postural hypotension in Parkinson ' s disease : a longitudinal study on the effects of drug withdrawal.The aims of this study were to confirm our previous findings in a separate cohort of patients and to determine the time course of the cardiovascular consequences of stopping selegiline in the expectation that this might shed light on the mechanisms by which the drug causes orthostatic hypotension\")\n", | |
"displacy.render(doc,jupyter=True, style = \"ent\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "-Hxy0lT07Ssw" | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment