Skip to content

Instantly share code, notes, and snippets.

@romanegloo
Created September 6, 2019 15:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save romanegloo/20d7bf84908fcfb4ebb880952849814d to your computer and use it in GitHub Desktop.
Save romanegloo/20d7bf84908fcfb4ebb880952849814d to your computer and use it in GitHub Desktop.
Find corresponding MeSH terms from CUIs in evaluation datasets
#!/usr/bin/env python3
# pylint: disable=invalid-name
"""Reads UMNSRS datasets where CUIs are used, add corresponding MeSHes to the
CUIs"""
from pathlib import Path
import csv
from tqdm import tqdm
from BMET.uts_api_client import UtsClient
file_in = Path('')
while not file_in.is_file():
user_input = input("Enter path to a csv reference file containing CUIs: ")
file_in = Path(user_input)
file_out = Path(file_in.with_suffix('').as_posix() + '_mesh' +
file_in.suffix)
# Gather all the CUI codes
uts_client = UtsClient()
rows = sum(1 for line in open(file_in)) - 1
with open(file_in) as fin, open(file_out, 'w') as fout:
csv_reader = csv.DictReader(fin)
csv_writer = csv.writer(fout, quotechar='"')
csv_writer.writerow(csv_reader.fieldnames + ['MESH1', 'MESH2'])
pbar = tqdm(total=rows)
for flds in csv_reader:
pbar.update()
for i in '12':
cui = flds['CUI'+i]
tok = flds['TERM'+i]
try:
rst = uts_client.get_concept_mesh_atoms(cui, tok)
if rst is None:
flds['MESH'+i] = 'None'
else:
mesh = rst['result'][0]['code'].split('/')[-1]
flds['MESH'+i] = mesh
except Exception as e:
print(f"Error: {e} Cui: {cui} Rec: {flds}")
raise
csv_writer.writerow(flds.values())
pbar.close()
print(f"Finished writing records on {file_out}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment