Skip to content

Instantly share code, notes, and snippets.

@th3o6a1d
Created October 12, 2016 02:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save th3o6a1d/c09aedc647b1481a1e865e57b7265f87 to your computer and use it in GitHub Desktop.
Save th3o6a1d/c09aedc647b1481a1e865e57b7265f87 to your computer and use it in GitHub Desktop.
Clean up radiology reports from EPIC data dump. Usage: python report_cleaner.py input_filename.csv
#!/usr/bin/python
import re
import csv
import sys
# Usage: python report_cleaner.py input_file_name.csv
# This will write to input_filename_cleaned.csv
# Define how we clean up the reports after the lines
# have been collected.
def clean_report(report):
report = re.sub(r'\s+', ' ', report)
return report
# Read the data from the input file.
f = open(sys.argv[1],'rU')
data = csv.DictReader(f)
reports = {}
for row in data:
order_proc_id = row['ORDER_PROC_ID']
if order_proc_id not in reports:
reports[order_proc_id] = { 'ORDER_PROC_ID': row['ORDER_PROC_ID'], \
'PAT_ENC_CSN_ID': row['PAT_ENC_CSN_ID'], \
'PROC_CODE': row['PROC_CODE'], \
'RADIOLOGIST': row['RADIOLOGIST'],\
'NARRATIVE': row['NARRATIVE'],\
'DESCRIPTION': row['DESCRIPTION'],\
'ORDER_TIME': row['ORDER_TIME']}
else:
reports[order_proc_id]['NARRATIVE'] += row['NARRATIVE']
# Set up the output file
o = open(sys.argv[1][:-4] + '_cleaned.csv','wb')
fieldnames = ['ORDER_PROC_ID','PAT_ENC_CSN_ID','PROC_CODE','RADIOLOGIST','NARRATIVE','DESCRIPTION','ORDER_TIME']
output = csv.DictWriter(o, fieldnames=fieldnames, delimiter=',', lineterminator='\n')
output.writeheader()
# Clean the report and write to file.
for report in reports:
if report:
reports[report]['NARRATIVE'] = clean_report(reports[report]['NARRATIVE'])
output.writerow(reports[report])
print(reports[report]['NARRATIVE'])
o.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment