Skip to content

Instantly share code, notes, and snippets.

@dkoslicki
Created December 7, 2023 22:22
Show Gist options
  • Save dkoslicki/034832c5e6a8f8fa226ca780bd002b8a to your computer and use it in GitHub Desktop.
Save dkoslicki/034832c5e6a8f8fa226ca780bd002b8a to your computer and use it in GitHub Desktop.
score_ars_by_g_score.py
import requests
import os
import json
import glob
import re
query_classes = ['ameliorates','CPIC','drug_treats_rare_disease','DrugCentral_creative',
'GTRx','OOPD','RareDisease','three_hop','treats']
aras = ["aragorn", "arax", "bte", 'improving']
ara_pattern = re.compile(fr"({'|'.join(aras)})")
query_pattern = re.compile(fr"({'|'.join(query_classes)})")
# url = "https://nodenormalization-sri.renci.org/1.4/get_normalized_nodes"
response_files = glob.glob('normalized_results/**/*.json',recursive=True)
counter = 1
failed_to_get_results = []
# Old g-score
def old_get_confidence(result):
"""
This function iterates through the results from multiple ARAs,
If only a single score is non-zero the result is thresholded to be in [0,1-eps]
If a result has non-zero scores from multiple ARAs,
then all the scores are added together and thresholded to be in [0,1]
eps is set to 0.001
"""
score_sum = 0.0
non_zero_count = 0
eps = 0.001
for analysis in result.get("analyses") or []:
if analysis.get("score") is not None:
score_sum += analysis["score"]
if analysis["score"] > 0:
non_zero_count += 1
if non_zero_count == 1 and score_sum > 1 - eps:
score_sum = 1 - eps
elif non_zero_count > 1 and score_sum > 1:
score_sum = 1
return score_sum
def new_get_confidence(result):
"""
via: https://ncatstranslator.slack.com/archives/C0442D7N7J9/p1694416813304959
"""
non_zero_count = 0
score_product = 1
for analysis in result.get("analyses") or []:
if analysis.get("score") is not None:
score_product = score_product * (1 - analysis["score"])
if analysis["score"] > 0:
non_zero_count += 1
if non_zero_count == 0:
return 0
final_score = 1 - score_product
return final_score
for json_file in response_files:
ars_count = json_file.count('ars')
if ars_count < 2:
continue
not_found_node_counter = 0
print(counter)
counter += 1
# query_class = query_pattern.search(json_file).group(1)
# ara = ara_pattern.search(json_file).group(1)
with open(json_file) as fp:
json_response = json.loads(fp.read())
nodes_to_normalize = set()
try:
results = json_response['fields']['data']['message']['results']
if not results:
continue
except:
failed_to_get_results.append(json_file)
continue
for result in results:
new_score = new_get_confidence(result)
old_score = old_get_confidence(result)
result["old_score"] = old_score
result["new_score"] = new_score
results = sorted(results, key=lambda x: x['old_score'],reverse=True)
json_response['fields']['data']['message']['results'] = results
new_file_path = 'old_gscore' + json_file
os.makedirs(os.path.dirname(new_file_path), exist_ok=True)
with open(new_file_path, 'w') as fp:
json.dump(json_response, fp, indent=4)
results = sorted(results, key=lambda x: x['new_score'],reverse=True)
json_response['fields']['data']['message']['results'] = results
new_file_path = 'new_gscore' + json_file
os.makedirs(os.path.dirname(new_file_path), exist_ok=True)
with open(new_file_path, 'w') as fp:
json.dump(json_response, fp, indent=4)
import pdb;pdb.set_trace()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment