Last active
July 31, 2018 15:27
-
-
Save callahantiff/8f08f28f6b51f1dff1e677c977f1db35 to your computer and use it in GitHub Desktop.
Script annotates clinical concepts to ontology terms using the NCBO API
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
########################################################################################## | |
# Concept_Annotator.py | |
# Purpose: script annotates clinical concepts to ontology terms using the NCBO API | |
# version 1.0.0 | |
########################################################################################## | |
import urllib2 | |
import json | |
import pandas as pd | |
REST_URL = "http://data.bioontology.org" | |
API_KEY = "3da0dc97-0afd-45a0-9ae8-ddc16f7df642" | |
def get_json(url): | |
opener = urllib2.build_opener() | |
opener.addheaders = [('Authorization', 'apikey token=' + API_KEY)] | |
return json.loads(opener.open(url).read()) | |
def print_annotations(annotations, get_class=True): | |
matches = [] | |
for result in annotations: | |
class_details = get_json(result["annotatedClass"]["links"]["self"]) if get_class else result["annotatedClass"] | |
matches.append([class_details["@id"], class_details["prefLabel"]]) | |
return matches | |
# Annotate using the provided text | |
# read in results | |
df = pd.read_csv('RareDisease_Medications.csv') | |
# read out results | |
results = {} | |
for index, row in df.iterrows(): | |
key = row['concept_code'] | |
concept = row['ac_name'] | |
annotations = get_json(REST_URL + "/annotator?ontologies=RXNORM,MESH&longest_only=true&text=" + | |
urllib2.quote(concept)) | |
print "\n" + str(key), str(concept) | |
print print_annotations(annotations) | |
results[key] = print_annotations(annotations) | |
# Process results and write to file | |
myfile = open('RxNORM_Mapping.txt', 'w') | |
for keys, value in results.items(): | |
print keys | |
for item in value: | |
if "MESH" in item[0]: | |
myfile.write(str(keys) + "\t" + str(item[0].split("/")[-1]) + "\n") | |
myfile.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment