Created
February 8, 2022 13:40
-
-
Save Aditya1001001/75fc522ef87ae72aa624388586216ada to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
training_data = {'classes' : ['MEDICINE', "MEDICALCONDITION", "PATHOGEN"], 'annotations' : []} | |
for example in data['examples']: | |
temp_dict = {} | |
temp_dict['text'] = example['content'] | |
temp_dict['entities'] = [] | |
for annotation in example['annotations']: | |
start = annotation['start'] | |
end = annotation['end'] | |
label = annotation['tag_name'].upper() | |
temp_dict['entities'].append((start, end, label)) | |
training_data['annotations'].append(temp_dict) | |
print(training_data['annotations'][0]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from spacy.util import filter_spans | |
for training_example in tqdm(training_data['annotations']): | |
text = training_example['text'] | |
labels = training_example['entities'] | |
doc = nlp.make_doc(text) | |
ents = [] | |
for start, end, label in labels: | |
span = doc.char_span(start, end, label=label, alignment_mode="contract") | |
if span is None: | |
print("Skipping entity") | |
else: | |
ents.append(span) | |
filtered_ents = filter_spans(ents) | |
doc.ents = filtered_ents | |
doc_bin.add(doc) | |
doc_bin.to_disk("training_data.spacy") # save the docbin object |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
with open('data.json', 'r') as f: | |
data = json.load(f) | |
print(data['examples'][0]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import spacy | |
from spacy.tokens import DocBin | |
from tqdm import tqdm | |
nlp = spacy.blank("en") # load a new spacy model | |
doc_bin = DocBin() # create a DocBin object |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
python -m spacy init fill-config base_config.cfg config.cfg |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pip install spacy[transformers] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
text = "What video sharing service did Steve Chen, Chad Hurley, and Jawed Karim create in 2005?" | |
from spacy import displacy | |
displacy.render(doc, style="ent", jupyter=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
string = "Antiretroviral therapy ( ART ) is recommended for all HIV-infected individuals" | |
doc = nlp(string) | |
displacy.render(doc, style="ent", jupyter=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import spacy | |
nlp = spacy.load("en_core_web_sm") | |
doc = nlp(text) | |
print(nlp.pipe_names) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
nlp_ner = spacy.load("model-best") | |
doc = nlp_ner("Antiretroviral therapy (ART) is recommended for all HIV-infected\ | |
individuals to reduce the risk of disease progression.\nART also is recommended \ | |
for HIV-infected individuals for the prevention of transmission of HIV.\nPatients \ | |
starting ART should be willing and able to commit to treatment and understand the\ | |
benefits and risks of therapy and the importance of adherence. Patients may choose\ | |
to postpone therapy, and providers, on a case-by-case basis, may elect to defer\ | |
therapy on the basis of clinical and/or psychosocial factors.") | |
colors = {"PATHOGEN": "#F67DE3", "MEDICINE": "#7DF6D9", "MEDICALCONDITION":"#FFFFFF"} | |
options = {"colors": colors} | |
spacy.displacy.render(doc, style="ent", options= options, jupyter=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
python -m spacy train config.cfg --output ./ --paths.train ./training_data.spacy --paths.dev ./training_data.spacy --gpu-id 0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment