Last active
August 29, 2015 14:07
-
-
Save th3o6a1d/28761b93bbf80265229b to your computer and use it in GitHub Desktop.
Python Script for Extracting Records from SPARCS Limited IP Data Files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys, csv, os | |
### | |
### Author Jason Theobald, MD/MBA Student 2014 | |
### This script reads all SPARCS files with the .DAT extension in the same folder as the script | |
### Use this to retrieve patients by ICD-9 Code and generate a .csv file | |
### To run: python extractor.py on Mac OSX or Windows PC (with Python 2.7 installed) | |
### | |
### Shows what files are being read. Will open all .dat files in current directory. | |
files = [i for i in os.listdir(os.curdir) if i[-3:].lower() == "dat"] | |
print "" | |
print "Files to extract from..." | |
for f in files: | |
print f | |
### Wait for keyboard prompt to show user the files that are being read. | |
raw_input("Press enter to continue") | |
### Opens each .dat file | |
for i in files: | |
f = open(i) | |
o = open('JOINTS' + str(i[-6:-4]) + '.csv', 'wb') | |
output = csv.writer(o, delimiter=',') | |
### | |
### Header of .csv output file | |
### Here you enter the field names that will be the first row of your CSV file. | |
### Type them in order, as shown below. | |
### | |
output.writerow(["Type","Age","Zip","County","FacilityID","FacilityName","Principal ICD", "ICD2", "ICD3", "ICD4", "ICD5", "ICD6", "ICD7", "ICD8", "ICD9", "ICD10", "ICD11", "ICD12", "ICD13", "ICD14", "ICD15"]) | |
### Refer to the SPARCS code list to tell the script where to look for each item of interest. Remember that python uses 0-based indexing, so you | |
### need to subtract 1 from the start number of all of the SPARCS codes. End number is the same. | |
### For each line in the file, grab the data of interest and load into a variable. | |
for line in f: | |
age = line[101:104].strip() | |
zip = line[164:169].strip() | |
county = line[173:175].strip() | |
facilityID = line[199:205].strip() | |
facilityName = line[206:276].strip() | |
PICD = line[1642:1649].strip() # Primary ICD code | |
ICD2 = line[1666:1673].strip() # All the other ICD codes | |
ICD3 = line[1690:1697].strip() | |
ICD4 = line[1714:1721].strip() | |
ICD5 = line[1738:1745].strip() | |
ICD6 = line[1762:1769].strip() | |
ICD7 = line[1786:1793].strip() | |
ICD8 = line[1810:1817].strip() | |
ICD9 = line[1834:1841].strip() | |
ICD10 = line[1858:1865].strip() | |
ICD11 = line[1882:1889].strip() | |
ICD12 = line[1906:1913].strip() | |
ICD13 = line[1930:1937].strip() | |
ICD14 = line[1954:1961].strip() | |
ICD15 = line[1978:1961].strip() | |
### Enter your collection of ICD-9 codes of interest. In this case, we are looking for hips and knees. | |
codelist = [PICD, ICD2, ICD3, ICD4, ICD5, ICD6, ICD7, ICD8, ICD9, ICD10, ICD11, ICD12, ICD13, ICD14, ICD15] | |
hiplist = ["8151", "8152", "0070", "0071", "0072", "0073", "8153", "0074", "0075", "0076", "0077"] | |
kneelist = ["8154", "8155", "0080", "0081", "0082", "0083", "0084"] | |
### Start by assuming this is not a hip patient or a knee patient. | |
hip = False | |
knee = False | |
### Go through each code in the code list. If any of them are found in the patient line, hip or knee = True | |
for code in codelist: | |
if code in hiplist: | |
hip = True | |
if code in kneelist: | |
knee = True | |
### After going through the codes, now you can say whether this is a hip patient, a knee patient, or both. | |
### If it's a hip, it will print the row. Same for knee. | |
if hip == True: | |
print "Hip" | |
output.writerow(["Hip"] + [age,zip,county,facilityID,facilityName]+ codelist) | |
if knee == True: | |
print "Knee" | |
output.writerow(["Knee"] + [age,zip,county,facilityID,facilityName]+ codelist) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment