Created
April 9, 2022 18:29
-
-
Save atrisovic/3d45cf9a37b7853597c5a258e2aa1908 to your computer and use it in GitHub Desktop.
id_hosp.R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Before running, activate env: | |
# export CONDA_ENVS_PATH=/nfs/projects/n/nsaph_common/conda/envs/ | |
# export CONDA_PKGS_PATH=/nfs/projects/n/nsaph_common/conda/pkgs/ | |
# source activate nsaph | |
## Code to ID hospitalizations | |
library(data.table) | |
library(fst) | |
library(devtools) | |
library(NSAPHutils) | |
library(lubridate) | |
#set_threads() # why this doesn't work? | |
library(icd) | |
# R version on RCE: 3.5.2 | |
outcomes <- list() | |
outcomes[["all_kidney"]] <- list() | |
outcomes[["all_kidney"]][["icd9"]] <- expand_range("580","599") | |
outcomes[["all_kidney"]][["icd10"]] <- expand_range("N00","N39") | |
outcomes[["ckd"]] <- list() | |
outcomes[["ckd"]][["icd9"]] <- children("585") | |
outcomes[["ckd"]][["icd10"]] <- children("N18") | |
outcomes[["aki"]] <- list() | |
outcomes[["aki"]][["icd9"]] <- children("584") | |
outcomes[["aki"]][["icd10"]] <- children("N17") | |
outcomes[["glomerular"]][["icd9"]] <- expand_range("580", "583") | |
outcomes[["glomerular"]][["icd10"]] <- expand_range("N00", "N08") | |
# add diabetes | |
outcomes[["diabetes"]] <- list() | |
outcomes[["diabetes"]][["icd9"]] <- children("250") | |
outcomes[["diabetes"]][["icd10"]] <- c(children("E08"),children("E09"), children("E10"), children("E11"), children("E12"), children("E13") ) | |
#expand_range("N08", "N13") | |
#outcomes[["diabetes"]][["icd10"]] <- expand_range("E08", "E13") # <- this won't work, need to check ICD package | |
#add co-morbidities | |
#Circulatory system disease: ICD-9 390-459 / ICD-10 I00-I99 | |
outcomes[["csd"]] <- list() | |
outcomes[["csd"]][["icd9"]] <- expand_range("390", "459") | |
outcomes[["csd"]][["icd10"]] <- expand_range("I00", "I99") | |
#Ischemic heart disease: ICD-9 410-414 / ICD-10 I20-I25 | |
outcomes[["ihd"]] <- list() | |
outcomes[["ihd"]][["icd9"]] <- expand_range("410", "414") | |
outcomes[["ihd"]][["icd10"]] <- expand_range("I20", "I25") | |
#Pneumonia: ICD-9 480-486 / ICD-10 J12-J18 | |
outcomes[["pneumonia"]] <- list() | |
outcomes[["pneumonia"]][["icd9"]] <- expand_range("480", "486") | |
outcomes[["pneumonia"]][["icd10"]] <- expand_range("J12", "J18") | |
#Heart failure: ICD-9 428/ ICD-10 I50 | |
outcomes[["hf"]] <- list() | |
outcomes[["hf"]][["icd9"]] <- children("428") | |
outcomes[["hf"]][["icd10"]] <- children("I50") | |
#Acute myocardial infarction: ICD-9 410/ ICD-10 I21 | |
outcomes[["ami"]] <- list() | |
outcomes[["ami"]][["icd9"]] <- children("410") | |
outcomes[["ami"]][["icd10"]] <- children("I21") | |
#Cerebrovascular diseases: ICD-9 430-438/ ICD-10 I60-I69 | |
outcomes[["cerd"]] <- list() | |
outcomes[["cerd"]][["icd9"]] <- expand_range("430", "438") | |
outcomes[["cerd"]][["icd10"]] <- expand_range("I60", "I69") | |
#Urinary tract infection: ICD-9 599.0 / ICD-10 N39.0 | |
outcomes[["uti"]] <- list() | |
outcomes[["uti"]][["icd9"]] <- children("599.0") | |
outcomes[["uti"]][["icd10"]] <- children("N39.0") | |
## clear out old data in case of re-run | |
for (outcome in names(outcomes)) { | |
for (diag_type in c("primary", "secondary")) { | |
file.remove(list.files(file.path("../output_data_test/", outcome, diag_type), | |
pattern = ".fst", | |
full.names = T)) | |
} | |
} | |
# Let's test for a single year (2000) | |
for (year_ in 2000:2000) { | |
admissions <- read_data("whanhee/data/hosp_data/", years = year_, columns = c("QID", | |
"ADATE", | |
"DDATE", | |
"DIAG1", | |
"DIAG2", | |
"DIAG3", | |
"DIAG4", | |
"DIAG5", | |
"DIAG6", | |
"DIAG7", | |
"DIAG8", | |
"DIAG9", | |
"DIAG10")) | |
} | |
print(admissions) | |
admissions[, ADATE := dmy(ADATE)] | |
admissions[, DDATE := dmy(DDATE)] | |
admissions[, year := year(ADATE)] | |
admissions <- admissions[year %in% 2000:2016] | |
for (outcome in names(outcomes)) { | |
#primary only | |
admissions[DDATE < "2015-10-01", (paste0(outcome, "_primary")) := DIAG1 %in% outcomes[[outcome]][["icd9"]]] | |
admissions[DDATE >= "2015-10-01", (paste0(outcome, "_primary")) := DIAG1 %in% outcomes[[outcome]][["icd10"]]] | |
#primary or secondary | |
admissions[DDATE < "2015-10-01", (paste0(outcome, "_primarysecondary")) := DIAG1 %in% outcomes[[outcome]][["icd9"]]| | |
DIAG2 %in% outcomes[[outcome]][["icd9"]]| | |
DIAG3 %in% outcomes[[outcome]][["icd9"]]| | |
DIAG4 %in% outcomes[[outcome]][["icd9"]]| | |
DIAG5 %in% outcomes[[outcome]][["icd9"]]| | |
DIAG6 %in% outcomes[[outcome]][["icd9"]]| | |
DIAG7 %in% outcomes[[outcome]][["icd9"]]| | |
DIAG8 %in% outcomes[[outcome]][["icd9"]]| | |
DIAG9 %in% outcomes[[outcome]][["icd9"]]| | |
DIAG10 %in% outcomes[[outcome]][["icd9"]]] | |
admissions[DDATE >= "2015-10-01", (paste0(outcome, "_primarysecondary")) := DIAG1 %in% outcomes[[outcome]][["icd10"]]| | |
DIAG2 %in% outcomes[[outcome]][["icd10"]]| | |
DIAG3 %in% outcomes[[outcome]][["icd10"]]| | |
DIAG4 %in% outcomes[[outcome]][["icd10"]]| | |
DIAG5 %in% outcomes[[outcome]][["icd10"]]| | |
DIAG6 %in% outcomes[[outcome]][["icd10"]]| | |
DIAG7 %in% outcomes[[outcome]][["icd10"]]| | |
DIAG8 %in% outcomes[[outcome]][["icd10"]]| | |
DIAG9 %in% outcomes[[outcome]][["icd10"]]| | |
DIAG10 %in% outcomes[[outcome]][["icd10"]]] | |
#secondary only | |
admissions[DDATE < "2015-10-01", (paste0(outcome, "_secondary")) := DIAG2 %in% outcomes[[outcome]][["icd9"]]| | |
DIAG3 %in% outcomes[[outcome]][["icd9"]]| | |
DIAG4 %in% outcomes[[outcome]][["icd9"]]| | |
DIAG5 %in% outcomes[[outcome]][["icd9"]]| | |
DIAG6 %in% outcomes[[outcome]][["icd9"]]| | |
DIAG7 %in% outcomes[[outcome]][["icd9"]]| | |
DIAG8 %in% outcomes[[outcome]][["icd9"]]| | |
DIAG9 %in% outcomes[[outcome]][["icd9"]]| | |
DIAG10 %in% outcomes[[outcome]][["icd9"]]] | |
admissions[DDATE >= "2015-10-01", (paste0(outcome, "_secondary")) := DIAG2 %in% outcomes[[outcome]][["icd10"]]| | |
DIAG3 %in% outcomes[[outcome]][["icd10"]]| | |
DIAG4 %in% outcomes[[outcome]][["icd10"]]| | |
DIAG5 %in% outcomes[[outcome]][["icd10"]]| | |
DIAG6 %in% outcomes[[outcome]][["icd10"]]| | |
DIAG7 %in% outcomes[[outcome]][["icd10"]]| | |
DIAG8 %in% outcomes[[outcome]][["icd10"]]| | |
DIAG9 %in% outcomes[[outcome]][["icd10"]]| | |
DIAG10 %in% outcomes[[outcome]][["icd10"]]] | |
} | |
#adding second ask, among those with secondary AKI which have the following 8 co-morbidities | |
#if AKI_secondaryONLY==1 (they were admitted for AKI DIAG 2-10), then indicators on what tehir DIAG1 is for same hospitalization | |
#among those, DIAG=1 cardiovasuclar, then add indicator had cardiovascular as primary during AKI hospitalization.. (etc for 10 conditions) | |
# 8 co-morbidities | |
#1. diabetes primary aki secondary | |
admissions$diabetesaki<-"FALSE" | |
admissions$diabetesaki[admissions$aki_secondary=="TRUE" & admissions$diabetes_primary=="TRUE"]<-"TRUE" | |
#2. circulatory system disease primary aki secondary | |
admissions$csdaki<-"FALSE" | |
admissions$csdaki[admissions$aki_secondary=="TRUE" & admissions$csd_primary=="TRUE"]<-"TRUE" | |
#3. ischemic heart disease primary aki secondary | |
admissions$ihdaki<-"FALSE" | |
admissions$ihdaki[admissions$aki_secondary=="TRUE" & admissions$ihd_primary=="TRUE"]<-"TRUE" | |
#4. pnemonia disease primary aki secondary | |
admissions$pneumoniaaki<-"FALSE" | |
admissions$pneumoniaaki[admissions$aki_secondary=="TRUE" & admissions$pneumonia_primary=="TRUE"]<-"TRUE" | |
#5. heart failure primary aki secondary | |
admissions$hfaki<-"FALSE" | |
admissions$hfaki[admissions$aki_secondary=="TRUE" & admissions$hf_primary=="TRUE"]<-"TRUE" | |
#6. acute myocardial infraction primary aki secondary | |
admissions$amiaki<-"FALSE" | |
admissions$amiaki[admissions$aki_secondary=="TRUE" & admissions$ami_primary=="TRUE"]<-"TRUE" | |
#7. cerberovascular disease primary aki secondary | |
admissions$cerdaki<-"FALSE" | |
admissions$cerdaki[admissions$aki_secondary=="TRUE" & admissions$cerd_primary=="TRUE"]<-"TRUE" | |
#8. uti primary aki secondary | |
admissions$utiaki<-"FALSE" | |
admissions$utiaki[admissions$aki_secondary=="TRUE" & admissions$uti_primary=="TRUE"]<-"TRUE" | |
# first ask: | |
# this paper is on AKI, among those with AKI secondary (DIAG1-10) an | |
# indicator if diabetes (DIAG1-10) or CKD (DIAG1-10) prior to AKI hospitalization | |
# indicator that if they had AKI_secondary (either before oct 2015 or after), | |
# did they have DIABETES_secondary before AKI_secondary? | |
# if AKI_secondary==1 (they were admitted for AKI primary or secondary), take date for first admission for AKI | |
# admissions$DDATE (for first admission AKI)> admissions$DDATE (for diaebtes primary or secondary) | |
# --> diabetes_prior_aki (indicator) | |
# want to add indicator that they had diabetes prior to AKI | |
#steps | |
#check if someone with same QID has one row with admissions$aki_primarysecondary=="TRUE" | |
#and a different row with admissions$diabetes_primarysecondary=="TRUE", and admi_date of first row > admission date of second row | |
#add indicator admissions$diabeteshosp_prior_aki only for hospitalization corresponding to aki | |
#for testing add duplicated QID | |
admissions<-rbind(admissions, admissions[100,]) | |
admissions[101]$DIAG1<-"N08" | |
admissions[101]$ADATE<-admissions$ADATE[100]-10 | |
admissions[101]$diabetes_primarysecondary<-"TRUE" | |
admissions[101]$aki_primarysecondary<-"FALSE" | |
#identify indviduals with multiple admissions | |
mult_admissions_qid<-admissions$QID[which(table(admissions$QID)>1)] | |
#qid of those with diabetes prior to aki | |
akidiabetesqid<-c() | |
#for loop to see which of the ones with multiple admission have AKI and prior diabetes | |
for (i in unique(mult_admissions_qid)){ | |
hos_sub<-admissions[admissions$QID==i,] # program stuck here | |
#check if they have AKI hospitalization | |
if (any(hos_sub$aki_primarysecondary)) { | |
#check if also have diabetes hospitalization | |
if (any(hos_sub$diabetes_primarysecondary)) { | |
#check if diabetes happened prior to aki | |
#take first aki hospitalization | |
dateaki<-min(hos_sub$ADATE[which(hos_sub$aki_primarysecondary=="TRUE")]) | |
#take first diabetes hospitalization | |
datediabetes<-min(hos_sub$ADATE[which(hos_sub$diabetes_primarysecondary=="TRUE")]) | |
if (datediabetes<dateaki) { | |
akidiabetesqid<-c(akidiabetesqid, i) | |
} | |
} | |
} | |
} | |
admissions$diabeteshosp_prior_aki<-"FALSE" | |
#add indicator only to the hospitalization with AKI to indicate prior diabetes | |
admissions$diabeteshosp_prior_aki[admissions$aki_primarysecondary=="TRUE" & admissions$QID %in% akidiabetesqid]<-"TRUE" | |
#ana please add same to define prior CKD | |
#print(admissions) | |
# declare diabetes_prior_aki TRUE if | |
# aki_DDATE > diabetes_DDATE | |
#admissions <- transform( | |
# admissions, diabetes_prior_aki= ifelse( | |
# admissions$aki_secondary == TRUE, | |
# TRUE, | |
#FALSE) | |
#) | |
#print(admissions) | |
#same for CKD, want to add inidcator that they had CKD prior to AKI | |
#total of 10 new columns | |
for (i in 2000:2000) { | |
for (outcome in names(outcomes)) { | |
for (type in c("primary", "secondary")) { | |
varname <- paste0(outcome, "_", type) | |
if (file.exists(paste0(file.path("../data/output_data_test2/", outcome, type), | |
"/", varname, "_", i, ".fst"))) { | |
year_admissions <- read_fst(paste0(file.path("../data/output_data_test2/", outcome, type), | |
"/", varname, "_", i, ".fst")) | |
} else { | |
year_admissions <- NULL | |
} | |
year_admissions <- rbind(year_admissions, admissions[year == i & get(varname) == T]) | |
if (nrow(year_admissions) != 0) { | |
write_fst(year_admissions,paste0(file.path("../data/output_data_test2/", outcome, type), | |
"/", varname, "_", i, ".fst")) | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment