Skip to content

Instantly share code, notes, and snippets.

@atrisovic
Created April 9, 2022 18:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save atrisovic/3d45cf9a37b7853597c5a258e2aa1908 to your computer and use it in GitHub Desktop.
Save atrisovic/3d45cf9a37b7853597c5a258e2aa1908 to your computer and use it in GitHub Desktop.
id_hosp.R
# Before running, activate env:
# export CONDA_ENVS_PATH=/nfs/projects/n/nsaph_common/conda/envs/
# export CONDA_PKGS_PATH=/nfs/projects/n/nsaph_common/conda/pkgs/
# source activate nsaph
## Code to ID hospitalizations
library(data.table)
library(fst)
library(devtools)
library(NSAPHutils)
library(lubridate)
#set_threads() # why this doesn't work?
library(icd)
# R version on RCE: 3.5.2
outcomes <- list()
outcomes[["all_kidney"]] <- list()
outcomes[["all_kidney"]][["icd9"]] <- expand_range("580","599")
outcomes[["all_kidney"]][["icd10"]] <- expand_range("N00","N39")
outcomes[["ckd"]] <- list()
outcomes[["ckd"]][["icd9"]] <- children("585")
outcomes[["ckd"]][["icd10"]] <- children("N18")
outcomes[["aki"]] <- list()
outcomes[["aki"]][["icd9"]] <- children("584")
outcomes[["aki"]][["icd10"]] <- children("N17")
outcomes[["glomerular"]][["icd9"]] <- expand_range("580", "583")
outcomes[["glomerular"]][["icd10"]] <- expand_range("N00", "N08")
# add diabetes
outcomes[["diabetes"]] <- list()
outcomes[["diabetes"]][["icd9"]] <- children("250")
outcomes[["diabetes"]][["icd10"]] <- c(children("E08"),children("E09"), children("E10"), children("E11"), children("E12"), children("E13") )
#expand_range("N08", "N13")
#outcomes[["diabetes"]][["icd10"]] <- expand_range("E08", "E13") # <- this won't work, need to check ICD package
#add co-morbidities
#Circulatory system disease: ICD-9 390-459 / ICD-10 I00-I99
outcomes[["csd"]] <- list()
outcomes[["csd"]][["icd9"]] <- expand_range("390", "459")
outcomes[["csd"]][["icd10"]] <- expand_range("I00", "I99")
#Ischemic heart disease: ICD-9 410-414 / ICD-10 I20-I25
outcomes[["ihd"]] <- list()
outcomes[["ihd"]][["icd9"]] <- expand_range("410", "414")
outcomes[["ihd"]][["icd10"]] <- expand_range("I20", "I25")
#Pneumonia: ICD-9 480-486 / ICD-10 J12-J18
outcomes[["pneumonia"]] <- list()
outcomes[["pneumonia"]][["icd9"]] <- expand_range("480", "486")
outcomes[["pneumonia"]][["icd10"]] <- expand_range("J12", "J18")
#Heart failure: ICD-9 428/ ICD-10 I50
outcomes[["hf"]] <- list()
outcomes[["hf"]][["icd9"]] <- children("428")
outcomes[["hf"]][["icd10"]] <- children("I50")
#Acute myocardial infarction: ICD-9 410/ ICD-10 I21
outcomes[["ami"]] <- list()
outcomes[["ami"]][["icd9"]] <- children("410")
outcomes[["ami"]][["icd10"]] <- children("I21")
#Cerebrovascular diseases: ICD-9 430-438/ ICD-10 I60-I69
outcomes[["cerd"]] <- list()
outcomes[["cerd"]][["icd9"]] <- expand_range("430", "438")
outcomes[["cerd"]][["icd10"]] <- expand_range("I60", "I69")
#Urinary tract infection: ICD-9 599.0 / ICD-10 N39.0
outcomes[["uti"]] <- list()
outcomes[["uti"]][["icd9"]] <- children("599.0")
outcomes[["uti"]][["icd10"]] <- children("N39.0")
## clear out old data in case of re-run
for (outcome in names(outcomes)) {
for (diag_type in c("primary", "secondary")) {
file.remove(list.files(file.path("../output_data_test/", outcome, diag_type),
pattern = ".fst",
full.names = T))
}
}
# Let's test for a single year (2000)
for (year_ in 2000:2000) {
admissions <- read_data("whanhee/data/hosp_data/", years = year_, columns = c("QID",
"ADATE",
"DDATE",
"DIAG1",
"DIAG2",
"DIAG3",
"DIAG4",
"DIAG5",
"DIAG6",
"DIAG7",
"DIAG8",
"DIAG9",
"DIAG10"))
}
print(admissions)
admissions[, ADATE := dmy(ADATE)]
admissions[, DDATE := dmy(DDATE)]
admissions[, year := year(ADATE)]
admissions <- admissions[year %in% 2000:2016]
for (outcome in names(outcomes)) {
#primary only
admissions[DDATE < "2015-10-01", (paste0(outcome, "_primary")) := DIAG1 %in% outcomes[[outcome]][["icd9"]]]
admissions[DDATE >= "2015-10-01", (paste0(outcome, "_primary")) := DIAG1 %in% outcomes[[outcome]][["icd10"]]]
#primary or secondary
admissions[DDATE < "2015-10-01", (paste0(outcome, "_primarysecondary")) := DIAG1 %in% outcomes[[outcome]][["icd9"]]|
DIAG2 %in% outcomes[[outcome]][["icd9"]]|
DIAG3 %in% outcomes[[outcome]][["icd9"]]|
DIAG4 %in% outcomes[[outcome]][["icd9"]]|
DIAG5 %in% outcomes[[outcome]][["icd9"]]|
DIAG6 %in% outcomes[[outcome]][["icd9"]]|
DIAG7 %in% outcomes[[outcome]][["icd9"]]|
DIAG8 %in% outcomes[[outcome]][["icd9"]]|
DIAG9 %in% outcomes[[outcome]][["icd9"]]|
DIAG10 %in% outcomes[[outcome]][["icd9"]]]
admissions[DDATE >= "2015-10-01", (paste0(outcome, "_primarysecondary")) := DIAG1 %in% outcomes[[outcome]][["icd10"]]|
DIAG2 %in% outcomes[[outcome]][["icd10"]]|
DIAG3 %in% outcomes[[outcome]][["icd10"]]|
DIAG4 %in% outcomes[[outcome]][["icd10"]]|
DIAG5 %in% outcomes[[outcome]][["icd10"]]|
DIAG6 %in% outcomes[[outcome]][["icd10"]]|
DIAG7 %in% outcomes[[outcome]][["icd10"]]|
DIAG8 %in% outcomes[[outcome]][["icd10"]]|
DIAG9 %in% outcomes[[outcome]][["icd10"]]|
DIAG10 %in% outcomes[[outcome]][["icd10"]]]
#secondary only
admissions[DDATE < "2015-10-01", (paste0(outcome, "_secondary")) := DIAG2 %in% outcomes[[outcome]][["icd9"]]|
DIAG3 %in% outcomes[[outcome]][["icd9"]]|
DIAG4 %in% outcomes[[outcome]][["icd9"]]|
DIAG5 %in% outcomes[[outcome]][["icd9"]]|
DIAG6 %in% outcomes[[outcome]][["icd9"]]|
DIAG7 %in% outcomes[[outcome]][["icd9"]]|
DIAG8 %in% outcomes[[outcome]][["icd9"]]|
DIAG9 %in% outcomes[[outcome]][["icd9"]]|
DIAG10 %in% outcomes[[outcome]][["icd9"]]]
admissions[DDATE >= "2015-10-01", (paste0(outcome, "_secondary")) := DIAG2 %in% outcomes[[outcome]][["icd10"]]|
DIAG3 %in% outcomes[[outcome]][["icd10"]]|
DIAG4 %in% outcomes[[outcome]][["icd10"]]|
DIAG5 %in% outcomes[[outcome]][["icd10"]]|
DIAG6 %in% outcomes[[outcome]][["icd10"]]|
DIAG7 %in% outcomes[[outcome]][["icd10"]]|
DIAG8 %in% outcomes[[outcome]][["icd10"]]|
DIAG9 %in% outcomes[[outcome]][["icd10"]]|
DIAG10 %in% outcomes[[outcome]][["icd10"]]]
}
#adding second ask, among those with secondary AKI which have the following 8 co-morbidities
#if AKI_secondaryONLY==1 (they were admitted for AKI DIAG 2-10), then indicators on what tehir DIAG1 is for same hospitalization
#among those, DIAG=1 cardiovasuclar, then add indicator had cardiovascular as primary during AKI hospitalization.. (etc for 10 conditions)
# 8 co-morbidities
#1. diabetes primary aki secondary
admissions$diabetesaki<-"FALSE"
admissions$diabetesaki[admissions$aki_secondary=="TRUE" & admissions$diabetes_primary=="TRUE"]<-"TRUE"
#2. circulatory system disease primary aki secondary
admissions$csdaki<-"FALSE"
admissions$csdaki[admissions$aki_secondary=="TRUE" & admissions$csd_primary=="TRUE"]<-"TRUE"
#3. ischemic heart disease primary aki secondary
admissions$ihdaki<-"FALSE"
admissions$ihdaki[admissions$aki_secondary=="TRUE" & admissions$ihd_primary=="TRUE"]<-"TRUE"
#4. pnemonia disease primary aki secondary
admissions$pneumoniaaki<-"FALSE"
admissions$pneumoniaaki[admissions$aki_secondary=="TRUE" & admissions$pneumonia_primary=="TRUE"]<-"TRUE"
#5. heart failure primary aki secondary
admissions$hfaki<-"FALSE"
admissions$hfaki[admissions$aki_secondary=="TRUE" & admissions$hf_primary=="TRUE"]<-"TRUE"
#6. acute myocardial infraction primary aki secondary
admissions$amiaki<-"FALSE"
admissions$amiaki[admissions$aki_secondary=="TRUE" & admissions$ami_primary=="TRUE"]<-"TRUE"
#7. cerberovascular disease primary aki secondary
admissions$cerdaki<-"FALSE"
admissions$cerdaki[admissions$aki_secondary=="TRUE" & admissions$cerd_primary=="TRUE"]<-"TRUE"
#8. uti primary aki secondary
admissions$utiaki<-"FALSE"
admissions$utiaki[admissions$aki_secondary=="TRUE" & admissions$uti_primary=="TRUE"]<-"TRUE"
# first ask:
# this paper is on AKI, among those with AKI secondary (DIAG1-10) an
# indicator if diabetes (DIAG1-10) or CKD (DIAG1-10) prior to AKI hospitalization
# indicator that if they had AKI_secondary (either before oct 2015 or after),
# did they have DIABETES_secondary before AKI_secondary?
# if AKI_secondary==1 (they were admitted for AKI primary or secondary), take date for first admission for AKI
# admissions$DDATE (for first admission AKI)> admissions$DDATE (for diaebtes primary or secondary)
# --> diabetes_prior_aki (indicator)
# want to add indicator that they had diabetes prior to AKI
#steps
#check if someone with same QID has one row with admissions$aki_primarysecondary=="TRUE"
#and a different row with admissions$diabetes_primarysecondary=="TRUE", and admi_date of first row > admission date of second row
#add indicator admissions$diabeteshosp_prior_aki only for hospitalization corresponding to aki
#for testing add duplicated QID
admissions<-rbind(admissions, admissions[100,])
admissions[101]$DIAG1<-"N08"
admissions[101]$ADATE<-admissions$ADATE[100]-10
admissions[101]$diabetes_primarysecondary<-"TRUE"
admissions[101]$aki_primarysecondary<-"FALSE"
#identify indviduals with multiple admissions
mult_admissions_qid<-admissions$QID[which(table(admissions$QID)>1)]
#qid of those with diabetes prior to aki
akidiabetesqid<-c()
#for loop to see which of the ones with multiple admission have AKI and prior diabetes
for (i in unique(mult_admissions_qid)){
hos_sub<-admissions[admissions$QID==i,] # program stuck here
#check if they have AKI hospitalization
if (any(hos_sub$aki_primarysecondary)) {
#check if also have diabetes hospitalization
if (any(hos_sub$diabetes_primarysecondary)) {
#check if diabetes happened prior to aki
#take first aki hospitalization
dateaki<-min(hos_sub$ADATE[which(hos_sub$aki_primarysecondary=="TRUE")])
#take first diabetes hospitalization
datediabetes<-min(hos_sub$ADATE[which(hos_sub$diabetes_primarysecondary=="TRUE")])
if (datediabetes<dateaki) {
akidiabetesqid<-c(akidiabetesqid, i)
}
}
}
}
admissions$diabeteshosp_prior_aki<-"FALSE"
#add indicator only to the hospitalization with AKI to indicate prior diabetes
admissions$diabeteshosp_prior_aki[admissions$aki_primarysecondary=="TRUE" & admissions$QID %in% akidiabetesqid]<-"TRUE"
#ana please add same to define prior CKD
#print(admissions)
# declare diabetes_prior_aki TRUE if
# aki_DDATE > diabetes_DDATE
#admissions <- transform(
# admissions, diabetes_prior_aki= ifelse(
# admissions$aki_secondary == TRUE,
# TRUE,
#FALSE)
#)
#print(admissions)
#same for CKD, want to add inidcator that they had CKD prior to AKI
#total of 10 new columns
for (i in 2000:2000) {
for (outcome in names(outcomes)) {
for (type in c("primary", "secondary")) {
varname <- paste0(outcome, "_", type)
if (file.exists(paste0(file.path("../data/output_data_test2/", outcome, type),
"/", varname, "_", i, ".fst"))) {
year_admissions <- read_fst(paste0(file.path("../data/output_data_test2/", outcome, type),
"/", varname, "_", i, ".fst"))
} else {
year_admissions <- NULL
}
year_admissions <- rbind(year_admissions, admissions[year == i & get(varname) == T])
if (nrow(year_admissions) != 0) {
write_fst(year_admissions,paste0(file.path("../data/output_data_test2/", outcome, type),
"/", varname, "_", i, ".fst"))
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment