sashagusev/competing_sim.R

## competing_sim.R
library("survival")
library("cmprsk")
library("reshape2")
library("ggplot2")
library("RColorBrewer")

# Standard function for simulating survival with a Weibull baseline hazard
# https://stats.stackexchange.com/questions/135124/how-to-create-a-toy-survival-time-to-event-data-with-right-censoring

# baseline hazard: Weibull
# N = sample size
# lambda = scale parameter in h0()
# rho = shape parameter in h0()
# beta = fixed effect parameter
# rateC = rate parameter of the exponential distribution of C
# x_freq = number of carrieres for the covariate

simulWeib <- function(N, lambda, rho, beta, rateC , x_freq)
{
  # covariate --> N Bernoulli trials
  x <- rbinom(N,1,x_freq)
  # x = sample(x=c(0, 1), size=N, replace=TRUE, prob=c(0.5, 0.5))

  # Weibull latent event times
  v <- runif(n=N)
  Tlat <- (- log(v) / (lambda * exp(x * beta)))^(1 / rho)

  # censoring times
  C <- rexp(n=N, rate=rateC)

  # follow-up times and event indicators
  tstop <- pmin(Tlat, C)
  event <- as.numeric(Tlat <= C)

  # data set
  df = data.frame(id=1:N,
             tstop=tstop,
             event=event,
             x=x)
  return(df)
}

set.seed(42)

# Generate multiple simulations and store results

seeds = 20
results = matrix(nrow=seeds,ncol=4)
colnames(results) = c("OS","GLM","CPH","CRR")

for ( s in 1:seeds ) {
  N_tot = 5e3

  # Generate weibull survival phenotype associated with CHIP
  dat <- simulWeib(N=N_tot, lambda=0.0001, rho=3.5, beta = log(1.4) , rateC=0.01 , x_freq = 0.1)

  # Measure effect of CHIP on survival
  cs_fit <- coxph(Surv(tstop, event) ~ x , data=dat )
  results[s,"OS"] = summary(cs_fit)$coef[1,2]

  # Generate weibull AD phenotype
  AD_v = runif(n=N_tot)
  AD_tstop = (- log(AD_v) / (0.000015))^(1 / 3.5)
  AD_case = AD_tstop < dat$tstop
  mean(AD_case)

  # Measure effect of CHIP on AD with logistic regression
  glm_fit = summary( glm( AD_case ~ dat$x  , family="binomial" ) )
  cat( "GLM:" , exp(glm_fit$coef[2,1]) , '\n' )
  results[s,"GLM"] = exp(glm_fit$coef[2,1])

  # Generate competing risk regression
  dat$cr_event = dat$event*2
  dat$cr_event[ AD_case ] = 1
  dat$cr_tstop = dat$tstop
  dat$cr_tstop[ AD_case ] = AD_tstop[ AD_case ]

  # Measure effect of CHIP on AD with Competing Risk
  crr_fit = crr(dat$cr_tstop,dat$cr_event,dat$x)
  cat( "CRR:" , summary(crr_fit)$coef[1,2] , '\n' )
  results[s,"CRR"] = summary(crr_fit)$coef[1,2]

  # Measure effect of CHIP on AD with Cause Specific CoxPH
  cs_fit <- coxph(Surv(dat$cr_tstop, dat$cr_event == 1) ~ dat$x )
  cat( "CoxCS HR:" , summary(cs_fit)$coef[1,2], '\n' )
  results[s,"CPH"] = summary(cs_fit)$coef[1,2]
}

# Compute the mean / se of results
apply(results,2,mean)
apply(results,2,sd)/sqrt(nrow(results))

# Plot the distribution of results
df = melt(results)
colnames(df) = c("id","Method","HR")
mu_se <- function(x) {
  m <- mean(x)
  ymin <- m-sd(x)/sqrt(length(x))
  ymax <- m+sd(x)/sqrt(length(x))
  return(c(y=m,ymin=ymin,ymax=ymax))
}
ggplot(df, aes(x=Method, y=HR,fill=Method)) + geom_violin() + stat_summary(fun.data=mu_se) +
  theme_bw() + scale_fill_brewer(palette="Set2") + theme(legend.position="none") +
  scale_x_discrete(labels=c("OS" = "Death ~ CHIP", "GLM" = "GLM: AD ~ CHIP","CPH" = "CPH: AD ~ CHIP","CRR" = "CRR: AD ~ CHIP"))

# Plot the survival curves
clr = brewer.pal(3,"Set1")
kmfit = survfit(Surv(tstop, event) ~ x, data=dat)
plot(kmfit,col=c(clr[2],clr[1]),las=1 , xlab="Time", ylab="Fraction Event Free",lwd=2)
kmfit2 = survfit(Surv(AD_tstop, rep(1,N_tot)) ~ 1)
lines(kmfit2,col=clr[3],conf.int=F)
legend("bottomleft",legend=c("Mortality - No CHIP","Mortality - CHIP","AD"),col=c(clr[2],clr[1],clr[3]),bty="n",pch=19)
	library("survival")
	library("cmprsk")
	library("reshape2")
	library("ggplot2")
	library("RColorBrewer")

	# Standard function for simulating survival with a Weibull baseline hazard
	# https://stats.stackexchange.com/questions/135124/how-to-create-a-toy-survival-time-to-event-data-with-right-censoring

	# baseline hazard: Weibull
	# N = sample size
	# lambda = scale parameter in h0()
	# rho = shape parameter in h0()
	# beta = fixed effect parameter
	# rateC = rate parameter of the exponential distribution of C
	# x_freq = number of carrieres for the covariate

	simulWeib <- function(N, lambda, rho, beta, rateC , x_freq)
	{
	# covariate --> N Bernoulli trials
	x <- rbinom(N,1,x_freq)
	# x = sample(x=c(0, 1), size=N, replace=TRUE, prob=c(0.5, 0.5))

	# Weibull latent event times
	v <- runif(n=N)
	Tlat <- (- log(v) / (lambda * exp(x * beta)))^(1 / rho)

	# censoring times
	C <- rexp(n=N, rate=rateC)

	# follow-up times and event indicators
	tstop <- pmin(Tlat, C)
	event <- as.numeric(Tlat <= C)

	# data set
	df = data.frame(id=1:N,
	tstop=tstop,
	event=event,
	x=x)
	return(df)
	}

	set.seed(42)

	# Generate multiple simulations and store results

	seeds = 20
	results = matrix(nrow=seeds,ncol=4)
	colnames(results) = c("OS","GLM","CPH","CRR")

	for ( s in 1:seeds ) {
	N_tot = 5e3

	# Generate weibull survival phenotype associated with CHIP
	dat <- simulWeib(N=N_tot, lambda=0.0001, rho=3.5, beta = log(1.4) , rateC=0.01 , x_freq = 0.1)

	# Measure effect of CHIP on survival
	cs_fit <- coxph(Surv(tstop, event) ~ x , data=dat )
	results[s,"OS"] = summary(cs_fit)$coef[1,2]

	# Generate weibull AD phenotype
	AD_v = runif(n=N_tot)
	AD_tstop = (- log(AD_v) / (0.000015))^(1 / 3.5)
	AD_case = AD_tstop < dat$tstop
	mean(AD_case)

	# Measure effect of CHIP on AD with logistic regression
	glm_fit = summary( glm( AD_case ~ dat$x , family="binomial" ) )
	cat( "GLM:" , exp(glm_fit$coef[2,1]) , '\n' )
	results[s,"GLM"] = exp(glm_fit$coef[2,1])

	# Generate competing risk regression
	dat$cr_event = dat$event*2
	dat$cr_event[ AD_case ] = 1
	dat$cr_tstop = dat$tstop
	dat$cr_tstop[ AD_case ] = AD_tstop[ AD_case ]

	# Measure effect of CHIP on AD with Competing Risk
	crr_fit = crr(dat$cr_tstop,dat$cr_event,dat$x)
	cat( "CRR:" , summary(crr_fit)$coef[1,2] , '\n' )
	results[s,"CRR"] = summary(crr_fit)$coef[1,2]

	# Measure effect of CHIP on AD with Cause Specific CoxPH
	cs_fit <- coxph(Surv(dat$cr_tstop, dat$cr_event == 1) ~ dat$x )
	cat( "CoxCS HR:" , summary(cs_fit)$coef[1,2], '\n' )
	results[s,"CPH"] = summary(cs_fit)$coef[1,2]
	}

	# Compute the mean / se of results
	apply(results,2,mean)
	apply(results,2,sd)/sqrt(nrow(results))

	# Plot the distribution of results
	df = melt(results)
	colnames(df) = c("id","Method","HR")
	mu_se <- function(x) {
	m <- mean(x)
	ymin <- m-sd(x)/sqrt(length(x))
	ymax <- m+sd(x)/sqrt(length(x))
	return(c(y=m,ymin=ymin,ymax=ymax))
	}
	ggplot(df, aes(x=Method, y=HR,fill=Method)) + geom_violin() + stat_summary(fun.data=mu_se) +
	theme_bw() + scale_fill_brewer(palette="Set2") + theme(legend.position="none") +
	scale_x_discrete(labels=c("OS" = "Death ~ CHIP", "GLM" = "GLM: AD ~ CHIP","CPH" = "CPH: AD ~ CHIP","CRR" = "CRR: AD ~ CHIP"))

	# Plot the survival curves
	clr = brewer.pal(3,"Set1")
	kmfit = survfit(Surv(tstop, event) ~ x, data=dat)
	plot(kmfit,col=c(clr[2],clr[1]),las=1 , xlab="Time", ylab="Fraction Event Free",lwd=2)
	kmfit2 = survfit(Surv(AD_tstop, rep(1,N_tot)) ~ 1)
	lines(kmfit2,col=clr[3],conf.int=F)
	legend("bottomleft",legend=c("Mortality - No CHIP","Mortality - CHIP","AD"),col=c(clr[2],clr[1],clr[3]),bty="n",pch=19)