Elijah Bernstein-Cooper ezbc

## entity-extraction-post-request.sh
 curl -X POST \
 "https://drug-portal.appspot.com/ner/drug.json" \
 --header "Content-Type: application/json" \
 --data '{"text": "Assess the patients pulse and blood pressure before and intermittently for 30 minutes after AdreView administration. AdreView may increase release of norepinephrine from chromaffin granules and produce a transient episode of hypertension, although this was not observed in the clinical studies. Prior to AdreView administration, ensure emergency cardiac and anti-hypertensive treatments are readily available."}'

## entity-extraction-entities.json
{"entities": [
{"text": "death", "start_char": 113, "end_char": 118, "label": "AdverseReaction"},
{"text": "corticosteroids", "start_char": 207, "end_char": 222, "label": "DrugClass"},
{"text": "death", "start_char": 310, "end_char": 315, "label": "AdverseReaction"},
{"text": "LABA", "start_char": 321, "end_char": 325, "label": "DrugClass"},
{"text": "LABA", "start_char": 564, "end_char": 568, "label": "DrugClass"},
{"text": "deaths", "start_char": 660, "end_char": 666, "label": "AdverseReaction"}]}

## entity-extraction-split-test-train.py
import random

def split_test_train(data, test_frac=0.1):
  random.shuffle(data)
  split_index = int( (1-test_frac) * len(data))
  return [data[:split_index], data[split_index:]]

TRAIN_DATA, TEST_DATA = split_test_train(DATA, 0.1)

## entity-extraction-build-training-set.py
def build_entity_ref(attrib, entities, labels):
  ''' Builds an entity annotation.
  A sequence of (start, end, label) triples.
  start and end should be character-offset integers
  denoting the slice into the original string.
  '''
  if (',' in attrib['start']):
    # XML looks like
    #     <Mention id="M16" section="S1" type="AdverseReaction" start="1577,1600" len="14,8" str="injection site problems" />
    if (attrib['type'] not in labels):

## entity-extraction-get-xml-docs.py
# Imports the Google Cloud client library
from google.cloud import storage
import xml.etree.ElementTree as ET

# Instantiates a client
storage_client = storage.Client()

# The name for the new bucket
bucket_name = 'drug_portal'

## entity-extraction-model-evaluation.py
import spacy
from spacy.gold import GoldParse
from spacy.scorer import Scorer

#ner_model = spacy.load('en_core_web_md') # for spaCy's pretrained use 'en_core_web_sm'

def evaluate(ner_model, examples):

    ''' Score the NER model with a test set.'''
    scorer = Scorer()

## entity-extraction-drug-ner.py
import spacy

class DrugNER:
    '''Extracts entities from text using spacy with a particular model'''

    def __init__(self, model_name, model_path):

    		self.model_name = model_name
    		self.model_path = model_path


## entity-extraction-train-ner.py

import plac
from __future__ import unicode_literals, print_function

import plac
import random
from pathlib import Path
import spacy


## entity-extraction-training-data.py
train_data =
('''6 adverse reactions
	  EXCERPT:   Serious hypersensitivity reactions have been reported following AdreView administration. The most common adverse reactions, dizziness, rash, pruritis, flushing, headache, and injection site hemorrhage occurred in &lt; 1.3% of patients. (  6.1  ,  6.2  ))''',
 {'entities': [
   (38, 45, 'Serious'),
   (46, 72, 'AdverseReaction')
  ]
 }
)

## entity-extraction-annotated-doc.xml
<Label drug="adreview" track="TAC2017_ADR">
  <Text>
    <section name="adverse reactions" id="s1">    6 adverse reactions
		  EXCERPT:   Serious hypersensitivity reactions have been reported following AdreView administration. The most common adverse reactions, dizziness, rash, pruritis, flushing, headache, and injection site hemorrhage occurred in &lt; 1.3% of patients. (  6.1  ,  6.2  )
    </section>
  </Text>
</Section>
  </Text>
  <Mentions>
    <Mention id="M1" section="S1" type="Severity" start="38" len="7" str="Serious" />
	curl -X POST \
	"https://drug-portal.appspot.com/ner/drug.json" \
	--header "Content-Type: application/json" \
	--data '{"text": "Assess the patients pulse and blood pressure before and intermittently for 30 minutes after AdreView administration. AdreView may increase release of norepinephrine from chromaffin granules and produce a transient episode of hypertension, although this was not observed in the clinical studies. Prior to AdreView administration, ensure emergency cardiac and anti-hypertensive treatments are readily available."}'
	{"entities": [
	{"text": "death", "start_char": 113, "end_char": 118, "label": "AdverseReaction"},
	{"text": "corticosteroids", "start_char": 207, "end_char": 222, "label": "DrugClass"},
	{"text": "death", "start_char": 310, "end_char": 315, "label": "AdverseReaction"},
	{"text": "LABA", "start_char": 321, "end_char": 325, "label": "DrugClass"},
	{"text": "LABA", "start_char": 564, "end_char": 568, "label": "DrugClass"},
	{"text": "deaths", "start_char": 660, "end_char": 666, "label": "AdverseReaction"}]}
	import random

	def split_test_train(data, test_frac=0.1):
	random.shuffle(data)
	split_index = int( (1-test_frac) * len(data))
	return [data[:split_index], data[split_index:]]

	TRAIN_DATA, TEST_DATA = split_test_train(DATA, 0.1)
	def build_entity_ref(attrib, entities, labels):
	''' Builds an entity annotation.
	A sequence of (start, end, label) triples.
	start and end should be character-offset integers
	denoting the slice into the original string.
	'''
	if (',' in attrib['start']):
	# XML looks like
	# <Mention id="M16" section="S1" type="AdverseReaction" start="1577,1600" len="14,8" str="injection site problems" />
	if (attrib['type'] not in labels):
	# Imports the Google Cloud client library
	from google.cloud import storage
	import xml.etree.ElementTree as ET

	# Instantiates a client
	storage_client = storage.Client()

	# The name for the new bucket
	bucket_name = 'drug_portal'
	import spacy
	from spacy.gold import GoldParse
	from spacy.scorer import Scorer

	#ner_model = spacy.load('en_core_web_md') # for spaCy's pretrained use 'en_core_web_sm'

	def evaluate(ner_model, examples):

	''' Score the NER model with a test set.'''
	scorer = Scorer()
	import spacy

	class DrugNER:
	'''Extracts entities from text using spacy with a particular model'''

	def __init__(self, model_name, model_path):

	self.model_name = model_name
	self.model_path = model_path

	import plac
	from __future__ import unicode_literals, print_function

	import plac
	import random
	from pathlib import Path
	import spacy
	train_data =
	('''6 adverse reactions
	EXCERPT: Serious hypersensitivity reactions have been reported following AdreView administration. The most common adverse reactions, dizziness, rash, pruritis, flushing, headache, and injection site hemorrhage occurred in < 1.3% of patients. ( 6.1 , 6.2 ))''',
	{'entities': [
	(38, 45, 'Serious'),
	(46, 72, 'AdverseReaction')
	]
	}
	)
	<Label drug="adreview" track="TAC2017_ADR">
	<Text>
	<section name="adverse reactions" id="s1"> 6 adverse reactions
	EXCERPT: Serious hypersensitivity reactions have been reported following AdreView administration. The most common adverse reactions, dizziness, rash, pruritis, flushing, headache, and injection site hemorrhage occurred in < 1.3% of patients. ( 6.1 , 6.2 )
	</section>
	</Text>
	</Section>
	</Text>
	<Mentions>
	<Mention id="M1" section="S1" type="Severity" start="38" len="7" str="Serious" />