Skip to content

Instantly share code, notes, and snippets.

@guineverelee
Created June 22, 2015 22:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save guineverelee/0f12e12d6c1f641a86f9 to your computer and use it in GitHub Desktop.
Save guineverelee/0f12e12d6c1f641a86f9 to your computer and use it in GitHub Desktop.
Survival Analysis: Sample R script to retrieve (1) date of virologic suppression (2) date of post-suppression virologic rebound from a longitudinal database file
#Goal: To find out time-to-: VSupp401 VReb401 etc (days and event0/censor1)
#Keyboard shortcuts
#Run-all F5
#Run-line Ctrl+R
#CSV date MUST be in yyyy-mm-dd or yyyy/mm/dd (otherwise error)
#format: A character string. If not specified, it will try '"%Y-%m-%d"' then '"%Y/%m/%d"' on the first non-'NA' element, and give an error if neither works.
#csv must be sorted already by PATID and coldate
#Step1: Settings: Input filename
filename_VL_CD4_csv <- "database_file.csv"
#print(filename_VL_CD4_csv)
#Step2: Load main VL CD4 database
VLCD4_df <- read.csv(filename_VL_CD4_csv,colClasses=c(rep("character",2),rep("Date",2),"character",rep("numeric",4)))
#Step3: Subset to Post-ARV, make unique PATID vector
VLCD4_df2_PostARV <- subset(VLCD4_df,VLCD4_df$TIMING=="Post-ARV")
#Note, row names were NOT reset
unique_id_list <- unique(VLCD4_df2_PostARV$PSEUDO)
#print(unique_id_list[1:10])
#length(unique_id_list)
row_header <- c("PATID","VSupp401Date","VSupp401event","VReb401Date","VReb401event","CensoredDate")
output_table <- rbind(row_header)
#Step4: Loop and subset each patient...etc
for (n in seq_along(unique_id_list)){
#print(n)
#default event is 0=censored; 1=happened
VSupp_event <- 0
VReb_event <- 0
#print(unique_id_list[n])
#Step4b: subset to each patient
#VLCD4_df3_uniqPATID is ONE patient All Post-ARV NO "NA"
VLCD4_df2b_uniqPATID <- subset(VLCD4_df2_PostARV, VLCD4_df2_PostARV$PSEUDO==unique_id_list[n])
VLCD4_df3_uniqPATID <- subset(VLCD4_df2b_uniqPATID, VLCD4_df2b_uniqPATID$VLOAD!="NA")
#print(head(VLCD4_df3_uniqPATID))
#Step4c: Make the VL column into a vector
VL_vec1 <- VLCD4_df3_uniqPATID$VLOAD
#print(VL_vec1)
#print(length(VL_vec1)) #89, 48
###########VSUPP-START#################
#Step4d: Index the first of two consecutive VL <401 #take worst case scenario
if (length(VL_vec1)>=2){
for (i in 1:(length(VL_vec1)-1)){
#print(i)1-88 and 1-47
if (VL_vec1[i]<401 & VL_vec1[i+1]<401){
#print(VL_vec1[i])
#print(i)
#break NOTE: break only break for loops, stop() stops everything
#Step4e: index is i already, next retrieve the full row and index that for original database
df1index_VSupp <- row.names(VLCD4_df3_uniqPATID[i,]) #get 6, 97
df3vecindex_VSupp <- i
VSupp_event <- 1
#Step4f: Retrieve coldate from df1 row index
VSupp_date <- VLCD4_df[df1index_VSupp,]$COLDATE
#print(VSupp_date) spot checked; correct
#print(VSupp_event)
#print(df3vecindex_VSupp)
################VREB-START######################
#Step4g: Index the first of two consecutive >=401
if (length(VL_vec1[df3vecindex_VSupp:(length(VL_vec1))])>=2){
for (j in df3vecindex_VSupp:(length(VL_vec1)-1)){
#print(VL_vec1[j])
#break
if (VL_vec1[j]>=401 & VL_vec1[j+1]>=401){
#Step4h: index is j already, next retrieve the full row and index that for original database
df1index_VReb <- row.names(VLCD4_df3_uniqPATID[j,]) #get 12,115
#print(df1index_VReb)
df3vecindex_VReb <- j
VReb_event <- 1
#Step4i: Retrieve VReb coldate
VReb_date <- VLCD4_df[df1index_VReb,]$COLDATE
#print(VReb_date) got 1999-11-16 and 2003-06-27 correct
#print(VReb_event)
break
}else{
VReb_event <- 0
VReb_date <- "NA"
}
}
}else{
VReb_event <- 0
VReb_date <- "NA"
}
break
}else{
VSupp_event <- 0
VSupp_date <- "NA"
VReb_event <- 0
VReb_date <- "NA"
}
}
}else{
VSupp_event <- 0
VSupp_date <- "NA"
VReb_event <- 0
VReb_date <- "NA"
}
#List of output
#1. unique_id_list[n]
#2. VSupp_date
#3. VSupp_event
#4. VReb_date
#5. VReb_event
#####################CENSORDATE-START##############################
if (length(VLCD4_df3_uniqPATID$COLDATE)==0){
Censor_date <- "NA"
}else{
Censor_date <- VLCD4_df3_uniqPATID$COLDATE[length(VLCD4_df3_uniqPATID$COLDATE)]
}
#print(censor_date)
#List of output
#6. Censor_date
print(unique_id_list[n])
#print(class(VSupp_date))
#print(VSupp_event)
#print(VReb_date)
#print(VReb_event)
#print(Censor_date)
row_current <- c(unique_id_list[n],as.character(VSupp_date),VSupp_event,as.character(VReb_date),VReb_event,as.character(Censor_date))
#print(row_current)
output_table <- rbind(output_table,row_current)
}
#print(output_table) #correct
write.table(output_table,"output_VSupp_VReb401.csv",sep=",")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment