Skip to content

Instantly share code, notes, and snippets.

@Aditya1001001
Created February 8, 2022 13:40
Show Gist options
  • Save Aditya1001001/75fc522ef87ae72aa624388586216ada to your computer and use it in GitHub Desktop.
Save Aditya1001001/75fc522ef87ae72aa624388586216ada to your computer and use it in GitHub Desktop.
training_data = {'classes' : ['MEDICINE', "MEDICALCONDITION", "PATHOGEN"], 'annotations' : []}
for example in data['examples']:
temp_dict = {}
temp_dict['text'] = example['content']
temp_dict['entities'] = []
for annotation in example['annotations']:
start = annotation['start']
end = annotation['end']
label = annotation['tag_name'].upper()
temp_dict['entities'].append((start, end, label))
training_data['annotations'].append(temp_dict)
print(training_data['annotations'][0])
from spacy.util import filter_spans
for training_example in tqdm(training_data['annotations']):
text = training_example['text']
labels = training_example['entities']
doc = nlp.make_doc(text)
ents = []
for start, end, label in labels:
span = doc.char_span(start, end, label=label, alignment_mode="contract")
if span is None:
print("Skipping entity")
else:
ents.append(span)
filtered_ents = filter_spans(ents)
doc.ents = filtered_ents
doc_bin.add(doc)
doc_bin.to_disk("training_data.spacy") # save the docbin object
import json
with open('data.json', 'r') as f:
data = json.load(f)
print(data['examples'][0])
import spacy
from spacy.tokens import DocBin
from tqdm import tqdm
nlp = spacy.blank("en") # load a new spacy model
doc_bin = DocBin() # create a DocBin object
python -m spacy init fill-config base_config.cfg config.cfg
pip install spacy[transformers]
text = "What video sharing service did Steve Chen, Chad Hurley, and Jawed Karim create in 2005?"
from spacy import displacy
displacy.render(doc, style="ent", jupyter=True)
string = "Antiretroviral therapy ( ART ) is recommended for all HIV-infected individuals"
doc = nlp(string)
displacy.render(doc, style="ent", jupyter=True)
import spacy
nlp = spacy.load("en_core_web_sm")
doc = nlp(text)
print(nlp.pipe_names)
nlp_ner = spacy.load("model-best")
doc = nlp_ner("Antiretroviral therapy (ART) is recommended for all HIV-infected\
individuals to reduce the risk of disease progression.\nART also is recommended \
for HIV-infected individuals for the prevention of transmission of HIV.\nPatients \
starting ART should be willing and able to commit to treatment and understand the\
benefits and risks of therapy and the importance of adherence. Patients may choose\
to postpone therapy, and providers, on a case-by-case basis, may elect to defer\
therapy on the basis of clinical and/or psychosocial factors.")
colors = {"PATHOGEN": "#F67DE3", "MEDICINE": "#7DF6D9", "MEDICALCONDITION":"#FFFFFF"}
options = {"colors": colors}
spacy.displacy.render(doc, style="ent", options= options, jupyter=True)
python -m spacy train config.cfg --output ./ --paths.train ./training_data.spacy --paths.dev ./training_data.spacy --gpu-id 0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment