Skip to content

Instantly share code, notes, and snippets.

View ezbc's full-sized avatar

Elijah Bernstein-Cooper ezbc

View GitHub Profile
curl -X POST \
"https://drug-portal.appspot.com/ner/drug.json" \
--header "Content-Type: application/json" \
--data '{"text": "Assess the patients pulse and blood pressure before and intermittently for 30 minutes after AdreView administration. AdreView may increase release of norepinephrine from chromaffin granules and produce a transient episode of hypertension, although this was not observed in the clinical studies. Prior to AdreView administration, ensure emergency cardiac and anti-hypertensive treatments are readily available."}'
{"entities": [
{"text": "death", "start_char": 113, "end_char": 118, "label": "AdverseReaction"},
{"text": "corticosteroids", "start_char": 207, "end_char": 222, "label": "DrugClass"},
{"text": "death", "start_char": 310, "end_char": 315, "label": "AdverseReaction"},
{"text": "LABA", "start_char": 321, "end_char": 325, "label": "DrugClass"},
{"text": "LABA", "start_char": 564, "end_char": 568, "label": "DrugClass"},
{"text": "deaths", "start_char": 660, "end_char": 666, "label": "AdverseReaction"}]}
import random
def split_test_train(data, test_frac=0.1):
random.shuffle(data)
split_index = int( (1-test_frac) * len(data))
return [data[:split_index], data[split_index:]]
TRAIN_DATA, TEST_DATA = split_test_train(DATA, 0.1)
def build_entity_ref(attrib, entities, labels):
''' Builds an entity annotation.
A sequence of (start, end, label) triples.
start and end should be character-offset integers
denoting the slice into the original string.
'''
if (',' in attrib['start']):
# XML looks like
# <Mention id="M16" section="S1" type="AdverseReaction" start="1577,1600" len="14,8" str="injection site problems" />
if (attrib['type'] not in labels):
# Imports the Google Cloud client library
from google.cloud import storage
import xml.etree.ElementTree as ET
# Instantiates a client
storage_client = storage.Client()
# The name for the new bucket
bucket_name = 'drug_portal'
import spacy
from spacy.gold import GoldParse
from spacy.scorer import Scorer
#ner_model = spacy.load('en_core_web_md') # for spaCy's pretrained use 'en_core_web_sm'
def evaluate(ner_model, examples):
''' Score the NER model with a test set.'''
scorer = Scorer()
import spacy
class DrugNER:
'''Extracts entities from text using spacy with a particular model'''
def __init__(self, model_name, model_path):
self.model_name = model_name
self.model_path = model_path
import plac
from __future__ import unicode_literals, print_function
import plac
import random
from pathlib import Path
import spacy
@ezbc
ezbc / entity-extraction-training-data.py
Last active August 12, 2018 21:03
Training data example
train_data =
('''6 adverse reactions
EXCERPT: Serious hypersensitivity reactions have been reported following AdreView administration. The most common adverse reactions, dizziness, rash, pruritis, flushing, headache, and injection site hemorrhage occurred in &lt; 1.3% of patients. ( 6.1 , 6.2 ))''',
{'entities': [
(38, 45, 'Serious'),
(46, 72, 'AdverseReaction')
]
}
)
@ezbc
ezbc / entity-extraction-annotated-doc.xml
Created August 12, 2018 20:58
Annotated Drug Label XML
<Label drug="adreview" track="TAC2017_ADR">
<Text>
<section name="adverse reactions" id="s1"> 6 adverse reactions
EXCERPT: Serious hypersensitivity reactions have been reported following AdreView administration. The most common adverse reactions, dizziness, rash, pruritis, flushing, headache, and injection site hemorrhage occurred in &lt; 1.3% of patients. ( 6.1 , 6.2 )
</section>
</Text>
</Section>
</Text>
<Mentions>
<Mention id="M1" section="S1" type="Severity" start="38" len="7" str="Serious" />