Skip to content

Instantly share code, notes, and snippets.

@joseph-rickert
Created April 25, 2016 21:42
Show Gist options
  • Save joseph-rickert/447346bbe24ccfdf730167499b90cb5f to your computer and use it in GitHub Desktop.
Save joseph-rickert/447346bbe24ccfdf730167499b90cb5f to your computer and use it in GitHub Desktop.
Code for post on Reading with R
# Joseph Rickert
# April 2016
# Some code for reading:
# LOGISTIC REGRESSION, SURVIVL ANALYSIS, AND THE KAPLAN-MEIER CURVE
# by Bradley Efron (1987)
#https://statistics.stanford.edu/sites/default/files/BIO%20115.pdf
library(survival)
library(ggplot2)
#--------------------------------------
# Data for Efron's Table 1
# Enter raw data for arm A
Adays <- c(7, 34, 42, 63, 64, 74, 83, 84, 91,
108, 112, 129, 133, 133, 139, 140, 140, 146,
149, 154, 157, 160, 160, 165, 173, 176, 185,
218, 225, 241, 248, 273, 277, 279, 297, 319,
405, 417, 420, 440, 523, 523, 583, 594,
1101, 1116, 1146, 1226, 1349, 1412, 1417)
Astatus <- rep(1,51)
Astatus[c(6,27,34,36,42,46,48,49,50)] <-0
Aobj <- Surv(time = Adays, Astatus==1)
Aobj
# [1] 7 34 42 63 64 74+ 83 84 91 108 112 129 133 133
# [15] 139 140 140 146 149 154 157 160 160 165 173 176 185+ 218
# [29] 225 241 248 273 277 279+ 297 319+ 405 417 420 440 523 523+
# [43] 583 594 1101 1116+ 1146 1226+ 1349+ 1412+ 1417
#--------------------------
# Data for Efron's Table 2
# data for arm B
Bdays <- c(37, 84, 92, 94,
110, 112, 119, 127, 130, 133, 140, 146, 155, 159,
169, 173, 179, 194, 195, 209, 249, 281, 319, 339,
432, 469, 519, 528, 547, 613, 633, 725, 759, 817,
1092, 1245, 1331, 1557,1642, 1771,
1776, 1897, 2023, 2146, 2297)
Bstatus <- rep(1,45)
Bstatus[c(15,28,29,30,33,35,36,37,39,40,42,43,44,45)] <- 0
Bobj <- Surv(Bdays, Bstatus == 1)
Bobj
# [1] 37 84 92 94 110 112 119 127 130 133 140 146 155 159 169+ 173 179
# [18] 194 195 209 249 281 319 339 432 469 519 528+ 547+ 613+ 633 725 759+ 817
# [35] 1092+ 1245+ 1331+ 1557 1642+ 1771+ 1776 1897+ 2023+ 2146+ 2297+
#----------------------
# Plot Kaplan-Meier Curves
Afit <- survfit(Aobj ~ 1, type="kaplan-meier")
Bfit <- survfit(Bobj ~ 1, type="kaplan-meier")
plot(Afit, conf.int = FALSE, col = "blue",
xlim = c(0, 2400),
xlab = "days",
ylab = "Proportion Surving",
main = " Kaplan-Meier Curves: Efron Fig 1")
lines(Bfit, conf.int = FALSE, col = "red")
legend("topright", # places a legend at the appropriate place
legend = c("Arm A","Arm B"), # puts text in the legend
lty=c(1,1), # gives the legend appropriate symbols (lines)
lwd=c(2.5,2.5),
cex=0.5,
col=c("Blue","Red")) # gives the legend lines the correct color and width
#------------------------------
# Models for Arm A
m <- 30.348 #days per month
KM_A <- summary(Afit, times=seq(m,47*m,by=m),scale=m)
KM_A
# Call: survfit(formula = Aobj ~ 1, type = "kaplan-meier")
# time n.risk n.event survival std.err lower 95% CI upper 95% CI
# 1 50 1 0.980 0.0194 0.9431 1.000
# 2 48 2 0.941 0.0329 0.8788 1.000
# 3 42 5 0.842 0.0513 0.7470 0.949
# 4 40 2 0.802 0.0562 0.6989 0.920
# .
# .
# .
# 45 2 0 0.126 0.0528 0.0552 0.286
# 46 2 0 0.126 0.0528 0.0552 0.286
# Set up variables for models
ni <- c(51,KM_A$n.risk) # Number at risk in month i
si <- c(KM_A$n.event,1) # Number of deaths in month i
failure <- ni - si # Number of "failures" at risk who didn't die
n <- length(ni)
month <- 1:n
t <- (1:n - .5) # base time
t2 <- t^2
t3 <- t^3
t11 <- pmin((t-11),0) # Cubic spline time
t11.2 <- t11^2
t11.3 <- t11^3
#-----------------------------
# Life Table Model
hLT <- si/ni # hazard estimate
#-----------------------------
# Cubic Model
Y <- matrix(c(si, failure),n,2) # Response matrix
form <- formula(Y ~ t + t2 + t3)
logitC <- glm(form,family=binomial(link="logit"))
hc <- logitC$fitted.values # hazard
summary(logitC)
#-----------------------------
# Cubic Spline Model
formSpline <- formula(Y ~ t + t11.2 + t11.3)
logitCS <- glm(formSpline,family=binomial(link="logit"))
hs <- logitCS$fitted.values # hazard
summary(logitCS)
#-----------------------------
# Calculate Survival Functions for Arm A
G <- vector(); Gc <- vector(); Gs <- vector()
G[1] <- 1; Gc[1] <- 1; Gs[1] <- 1
n <- length(hs)
for (i in 2:n){
G[i] <- G[i-1]*(1 - hLT[i - 1]) # Life Tale
Gc[i] <- Gc[i-1]*(1 - hc[i - 1]) # Cubic Model
Gs[i] <- Gs[i-1]*( 1 - hs[i - 1]) # Cubic Spline Model
}
#-------------------------
# Recreate figure 3 from Efron's Paper
armA <- data.frame(month,ni,si,failure,hLT,t,t2,t3,t11,t11.2,t11.3)
head(armA,3)
# month ni si failure hLT t t2 t3 t11 t11.2 t11.3
# 1 1 51 1 50 0.01960784 0.5 0.25 0.125 -10.5 110.25 -1157.625
# 2 2 50 2 48 0.04000000 1.5 2.25 3.375 -9.5 90.25 -857.375
# 3 3 48 5 43 0.10416667 2.5 6.25 15.625 -8.5 72.25 -614.125
p <- ggplot(armA, aes(month))
p + geom_line(aes(y = G)) +
geom_point(aes(y = Gc), colour = "blue", shape = 2) +
geom_point(aes(y = Gs), colour = "red", shape = 3) +
geom_line(aes(y = Gs), colour = "red") +
xlab("months") +
ylab("probability") +
ggtitle("Efron Figure 3")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment