Skip to content

Instantly share code, notes, and snippets.

@atrisovic
Last active April 17, 2022 04:32
Show Gist options
  • Save atrisovic/d9ae4600607739b095bf46567a5a677a to your computer and use it in GitHub Desktop.
Save atrisovic/d9ae4600607739b095bf46567a5a677a to your computer and use it in GitHub Desktop.
whanhee
import pandas as pd
import numpy as np
import json
from simplejson import loads
def get_outcomes():
""" Get and return ICD codes """""
f = open('icd_codes.json')
outcomes_ = json.load(f)
f.close()
return loads(outcomes_[0])
def read_admissions():
admissions_ = pd.read_csv("admissions_2000.csv")
admissions_['ADATE'] = pd.to_datetime(admissions_['ADATE'], format='%d%b%Y')
admissions_['DDATE'] = pd.to_datetime(admissions_['DDATE'], format='%d%b%Y')
return admissions_
def primary(row, outcome=None):
outcomes = get_outcomes()
if row["DIAG1"] in outcomes[outcome]["icd9"] and \
row["DDATE"] < icd_date:
return 1
if row["DIAG1"] in outcomes[outcome]["icd10"] and \
row["DDATE"] >= icd_date:
return 1
return 0
def primary_secondary(row, outcome=None, secondary=False):
""" Get primary and secondary or secondary from DIAG1-10 """
outcomes = get_outcomes()
start_number = 1
if secondary:
# start from DIAG2 if secondary
start_number = 2
diags = ["DIAG" + str(num) for num in np.arange(
start_number, 11)]
for diag in diags:
if row[diag] in outcomes[outcome]["icd9"] and \
row["DDATE"] < icd_date:
return 1
if row[diag] in outcomes[outcome]["icd10"] and \
row["DDATE"] >= icd_date:
return 1
return 0
def simple(aki_primarysecondary):
""" if aki_primarysecondary is first diag then return 0;
if it's not, the outcome is first, hence return 1 """
if aki_primarysecondary.iloc[0] == 1:
return 0
return 1
def get_first_hosp(df_):
""" returns a list 1 followed by 0s """
if len(df_) == 1:
return 1
if len(df_) == 2:
return [1, 0]
return [1]+[0 for i in range(len(df_)-1)]
outcomes = get_outcomes()
icd_date = pd.Timestamp(year=2015, month=10, day=1)
if __name__ == '__main__':
# import json
# print(json.dumps(outcomes, sort_keys=True, indent=4))
admissions = read_admissions()
for outcome in outcomes:
admissions[outcome + "_primary"] = admissions.apply(
primary, axis=1, outcome=outcome)
admissions[outcome + "_primarysecondary"] = admissions.apply(
primary_secondary, axis=1, outcome=outcome)
admissions[outcome + "_secondary"] = admissions.apply(
primary_secondary, axis=1, outcome=outcome, secondary=True)
# ? drop all where outcome_primarysecondary == 0
# ? drop diag cols
# aki secondary co_morbidity primary
co_morbidity = ["diabetes", "csd", "ihd", "pneumonia", "hf", "ami", "cerd", "uti"]
for d in co_morbidity:
admissions[d+'_primary_aki_secondary'] = admissions[
[d+"_primary", "aki_secondary"]].min(axis=1)
# diabetes or ckd as prior hosp diagnosis
admissions['diabeteshosp_prior_aki'] = admissions[
(admissions['aki_primarysecondary'] == 1) | (admissions['diabetes_primarysecondary'] == 1)].sort_values(
by='ADATE').groupby('QID')['aki_primarysecondary'].transform(simple)
admissions['ckdhosp_prior_aki'] = admissions[
(admissions['aki_primarysecondary'] == 1) | (admissions['ckd_primarysecondary'] == 1)].sort_values(
by='ADATE').groupby('QID')['aki_primarysecondary'].transform(simple)
# correct primary and secondary so that only the first diag counts
for outcome in outcomes:
admissions[outcome + "_primary"] = \
admissions[admissions[outcome + "_primary"] == 1].sort_values(
by=['ADATE']).groupby('QID')['ADATE'].transform(get_first_hosp)
admissions[outcome + "_secondary"] = \
admissions[admissions[outcome + "_secondary"] == 1].sort_values(
by=['ADATE']).groupby('QID')['ADATE'].transform(get_first_hosp)
admissions.sort_values(by=['QID','ADATE']).to_csv("temp.csv", index=False)
@atrisovic
Copy link
Author

New header:

QID,AGE,SEX,RACE,SSA_CNTY_CD,ADATE,DDATE,BENE_DOD,DIAG1,DIAG2,DIAG3,DIAG4,DIAG5,DIAG6,DIAG7,DIAG8,DIAG9,DIAG10,YEAR,all_kidney_primary,all_kidney_primarysecondary,all_kidney_secondary,ckd_primary,ckd_primarysecondary,ckd_secondary,aki_primary,aki_primarysecondary,aki_secondary,glomerular_primary,glomerular_primarysecondary,glomerular_secondary,diabetes_primary,diabetes_primarysecondary,diabetes_secondary,csd_primary,csd_primarysecondary,csd_secondary,ihd_primary,ihd_primarysecondary,ihd_secondary,pneumonia_primary,pneumonia_primarysecondary,pneumonia_secondary,hf_primary,hf_primarysecondary,hf_secondary,ami_primary,ami_primarysecondary,ami_secondary,cerd_primary,cerd_primarysecondary,cerd_secondary,uti_primary,uti_primarysecondary,uti_secondary,diabetes_primary_aki_secondary,csd_primary_aki_secondary,ihd_primary_aki_secondary,pneumonia_primary_aki_secondary,hf_primary_aki_secondary,ami_primary_aki_secondary,cerd_primary_aki_secondary,uti_primary_aki_secondary,diabeteshosp_prior_aki,ckdhosp_prior_aki

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment