Skip to content

Instantly share code, notes, and snippets.

@mattbaggott
Created November 12, 2012 15:53
Show Gist options
  • Save mattbaggott/4060133 to your computer and use it in GitHub Desktop.
Save mattbaggott/4060133 to your computer and use it in GitHub Desktop.
Turnbull's nonparametric estimator for interval-censored data
# Giolo, Suely Ruiz. "Turnbull's nonparametric estimator for interval-censored data'."
# Department of Statistics, Federal University of Paraná (2004): 1-10.
dat <-structure(list(left = c(1, 1, 1, 28, 34, 34, 35, 36, 36, 3, 3,
7, 9, 9, 9, 10, 11, 10, 13, 15, 15, 16, 17, 18, 21, 22, 23, 25,
25, 25, 25, 26, 26, 26, 26, 27, 29, 31, 32, 32, 32, 32, 32, 32,
32, 32, 1, 1, 28, 28, 34, 37, 37, 2, 2, 3, 3, 3, 3, 3, 4, 5,
5, 5, 5, 6, 6, 7, 8, 8, 8, 8, 9, 9, 9, 10, 10, 11, 12, 38, 14,
15, 15, 19, 19, 20, 21, 21, 23, 23, 24, 24, 30, 33), right = c(32,
33, 30, 2, 3, 2, 1, 7, 5, 6, 9, NA, NA, 16, 16, NA, 23, 17, NA,
NA, NA, 25, 27, 22, NA, NA, NA, 28, 29, NA, NA, 28, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 13, 30, 34, 33, 33,
3, 12, 23, 8, 4, NA, 8, NA, 11, 11, NA, 26, NA, NA, 8, 10, 13,
15, 11, 15, 31, 18, 14, 17, 16, 15, 21, NA, 21, NA, 20, 19, 22,
24, NA, NA, 27, NA, NA, NA, 26, 29, NA), ther = c(2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1), cens = c(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1, 1, 2, 2, 1, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 1, 1, 2,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 1, 2, 1, 2, 2, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 2, 1, 1, 2, 1, 1, 1, 2, 2, 1)), .Names = c("left",
"right", "ther", "cens"), row.names = 2:95, class = "data.frame")
#===============================================
#Survival curves using intervals - Figure 1
#===============================================
require(survival)
source("Turnbull.R") # Turnbull.R
dat1 <- dat[dat$ther==1,]
dat1$right[is.na(dat1$right)] <- Inf
tau <- cria.tau(dat1)
p <- S.ini(tau=tau)
A <- cria.A(data=dat1,tau=tau)
tb1 <- Turnbull(p,A,dat1)
tb1
dat1 <- dat[dat$ther==0,]
dat1$right[is.na(dat1$right)] <- Inf
tau <- cria.tau(dat1)
p <- S.ini(tau=tau)
A <- cria.A(data=dat1,tau=tau)
tb2 <- Turnbull(p,A,dat1)
tb2
plot(tb1$time,tb1$surv,lty=1, col = 4,type="s",ylim=c(0,1),xlim=range(c(0,60)),
xlab="Tempos (meses)",ylab="S(t)")
lines(tb2$time,tb2$surv,lty=4,col=2,type="s")
legend(1,0.3,lty=c(1,4),col=c(4,2),c("Radioterapia","Radioterapia + Quimioterapia"), bty="n",cex=0.8)
#===============================================
#Survival curves using midpoints - Figure 2
#===============================================
p <-dat$left+((dat$right-dat$left)/2)
pm <-ifelse(is.finite(p),p,dat$left)
cens <- ifelse(is.finite(p),1,0)
ekm<-survfit(Surv(pm,cens)~ther,type=c("kaplan-meier"),data=dat)
plot(tb1$time,tb1$surv,lty=1,type="s",col=4,ylim=c(0,1),xlim=c(0,50),xlab="Tempos (meses)",ylab="S(t)")
lines(tb2$time,tb2$surv,lty=1,col=2,type="s")
lines(ekm[1]$time,ekm[1]$surv,type="s",col=2,lty=2)
lines(ekm[2]$time,ekm[2]$surv,type="s",col=4,lty=2)
legend(3,0.30,lty=2,col=4, "Radiotherapy using midpoints", bty="n",cex=0.8)
legend(3,0.25,lty=1,col=4, "Radiotherapy using intervals", bty="n",cex=0.8)
legend(3,0.2,lty=2,col=2,"Radio + Chemotherapy using midpoints", bty="n",cex=0.8)
legend(3,0.15,lty=1,col=2,"Radio + Chemotherapy using intervals", bty="n",cex=0.8)
cria.tau <- function(data){
# Giolo, Suely Ruiz. "Turnbull's nonparametric estimator for interval-censored data'."
# Department of Statistics, Federal University of Paraná (2004): 1-10.
l <- data$left
r <- data$right
tau <- sort(unique(c(l,r[is.finite(r)])))
return(tau)
}
S.ini <- function(tau){
m<-length(tau)
ekm<-survfit(Surv(tau[1:m-1],rep(1,m-1)))
So<-c(1,ekm$surv)
p <- -diff(So)
return(p)
}
cria.A <- function(data,tau){
tau12 <- cbind(tau[-length(tau)],tau[-1])
interv <- function(x,inf,sup) ifelse(x[1]>=inf & x[2]<=sup,1,0)
A <- apply(tau12,1,interv,inf=data$left,sup=data$right)
id.lin.zero <- which(apply(A==0, 1, all))
if(length(id.lin.zero)>0) A <- A[-id.lin.zero, ]
return(A)
}
Turnbull <- function(p, A, data, eps=1e-3, iter.max=200, verbose=FALSE){
n<-nrow(A)
m<-ncol(A)
Q<-matrix(1,m)
iter <- 0
repeat {
iter <- iter + 1
diff<- (Q-p)
maxdiff<-max(abs(as.vector(diff)))
if (verbose)
print(maxdiff)
if (maxdiff<eps | iter>=iter.max)
break
Q<-p
C<-A%*%p
p<-p*((t(A)%*%(1/C))/n)
}
cat("Iterations = ", iter,"\n")
cat("Max difference = ", maxdiff,"\n")
cat("Convergence criteria: Max difference < 1e-3","\n")
dimnames(p)<-list(NULL,c("P Estimate"))
surv<-round(c(1,1-cumsum(p)),digits=5)
right <- data$right
if(any(!(is.finite(right)))){
t <- max(right[is.finite(right)])
return(list(time=tau[tau<t],surv=surv[tau<t]))
}
else
return(list(time=tau,surv=surv))
}
@paulocerqueirajr
Copy link

In the function "S.ini" is missing "~1".
ekm<-survfit(Surv(tau[1:m-1],rep(1,m-1)))
ekm<-survfit(Surv(tau[1:m-1],rep(1,m-1))~1)

Thank you for the post.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment