Skip to content

Instantly share code, notes, and snippets.

@sashagusev
Created June 16, 2023 04:32
Show Gist options
  • Save sashagusev/400a7d695305d9c3cdd1383c71204415 to your computer and use it in GitHub Desktop.
Save sashagusev/400a7d695305d9c3cdd1383c71204415 to your computer and use it in GitHub Desktop.
library("survival")
library("cmprsk")
library("reshape2")
library("ggplot2")
library("RColorBrewer")
# Standard function for simulating survival with a Weibull baseline hazard
# https://stats.stackexchange.com/questions/135124/how-to-create-a-toy-survival-time-to-event-data-with-right-censoring
# baseline hazard: Weibull
# N = sample size
# lambda = scale parameter in h0()
# rho = shape parameter in h0()
# beta = fixed effect parameter
# rateC = rate parameter of the exponential distribution of C
# x_freq = number of carrieres for the covariate
simulWeib <- function(N, lambda, rho, beta, rateC , x_freq)
{
# covariate --> N Bernoulli trials
x <- rbinom(N,1,x_freq)
# x = sample(x=c(0, 1), size=N, replace=TRUE, prob=c(0.5, 0.5))
# Weibull latent event times
v <- runif(n=N)
Tlat <- (- log(v) / (lambda * exp(x * beta)))^(1 / rho)
# censoring times
C <- rexp(n=N, rate=rateC)
# follow-up times and event indicators
tstop <- pmin(Tlat, C)
event <- as.numeric(Tlat <= C)
# data set
df = data.frame(id=1:N,
tstop=tstop,
event=event,
x=x)
return(df)
}
set.seed(42)
# Generate multiple simulations and store results
seeds = 20
results = matrix(nrow=seeds,ncol=4)
colnames(results) = c("OS","GLM","CPH","CRR")
for ( s in 1:seeds ) {
N_tot = 5e3
# Generate weibull survival phenotype associated with CHIP
dat <- simulWeib(N=N_tot, lambda=0.0001, rho=3.5, beta = log(1.4) , rateC=0.01 , x_freq = 0.1)
# Measure effect of CHIP on survival
cs_fit <- coxph(Surv(tstop, event) ~ x , data=dat )
results[s,"OS"] = summary(cs_fit)$coef[1,2]
# Generate weibull AD phenotype
AD_v = runif(n=N_tot)
AD_tstop = (- log(AD_v) / (0.000015))^(1 / 3.5)
AD_case = AD_tstop < dat$tstop
mean(AD_case)
# Measure effect of CHIP on AD with logistic regression
glm_fit = summary( glm( AD_case ~ dat$x , family="binomial" ) )
cat( "GLM:" , exp(glm_fit$coef[2,1]) , '\n' )
results[s,"GLM"] = exp(glm_fit$coef[2,1])
# Generate competing risk regression
dat$cr_event = dat$event*2
dat$cr_event[ AD_case ] = 1
dat$cr_tstop = dat$tstop
dat$cr_tstop[ AD_case ] = AD_tstop[ AD_case ]
# Measure effect of CHIP on AD with Competing Risk
crr_fit = crr(dat$cr_tstop,dat$cr_event,dat$x)
cat( "CRR:" , summary(crr_fit)$coef[1,2] , '\n' )
results[s,"CRR"] = summary(crr_fit)$coef[1,2]
# Measure effect of CHIP on AD with Cause Specific CoxPH
cs_fit <- coxph(Surv(dat$cr_tstop, dat$cr_event == 1) ~ dat$x )
cat( "CoxCS HR:" , summary(cs_fit)$coef[1,2], '\n' )
results[s,"CPH"] = summary(cs_fit)$coef[1,2]
}
# Compute the mean / se of results
apply(results,2,mean)
apply(results,2,sd)/sqrt(nrow(results))
# Plot the distribution of results
df = melt(results)
colnames(df) = c("id","Method","HR")
mu_se <- function(x) {
m <- mean(x)
ymin <- m-sd(x)/sqrt(length(x))
ymax <- m+sd(x)/sqrt(length(x))
return(c(y=m,ymin=ymin,ymax=ymax))
}
ggplot(df, aes(x=Method, y=HR,fill=Method)) + geom_violin() + stat_summary(fun.data=mu_se) +
theme_bw() + scale_fill_brewer(palette="Set2") + theme(legend.position="none") +
scale_x_discrete(labels=c("OS" = "Death ~ CHIP", "GLM" = "GLM: AD ~ CHIP","CPH" = "CPH: AD ~ CHIP","CRR" = "CRR: AD ~ CHIP"))
# Plot the survival curves
clr = brewer.pal(3,"Set1")
kmfit = survfit(Surv(tstop, event) ~ x, data=dat)
plot(kmfit,col=c(clr[2],clr[1]),las=1 , xlab="Time", ylab="Fraction Event Free",lwd=2)
kmfit2 = survfit(Surv(AD_tstop, rep(1,N_tot)) ~ 1)
lines(kmfit2,col=clr[3],conf.int=F)
legend("bottomleft",legend=c("Mortality - No CHIP","Mortality - CHIP","AD"),col=c(clr[2],clr[1],clr[3]),bty="n",pch=19)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment