Last active
November 7, 2021 18:11
-
-
Save turgeonmaxime/c8d80629b5e7f29f59df929c58e2e3e0 to your computer and use it in GitHub Desktop.
Comparing Cox regression, Poisson regression with splines, and case-base sampling with splines on two datasets: lung from survival, ERSPC from casebase
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#---- | |
# Poisson vs casebase | |
# authors: Max Turgeon, Jesse Islam and Sahir Bhatnagar | |
# date: 10/1/2021 | |
#---- | |
set.seed(1952) | |
library(casebase) | |
library(cowplot) | |
library(Epi) | |
library(ggplot2) | |
library(mgcv) | |
library(popEpi) | |
library(splines) | |
library(survival) | |
library(tidyr) | |
nbins <- 24 # Number of time bins for poisson regression | |
# 1st dataset: lung from survival---- | |
# Recode sex to 0/1; status to 0/1 | |
lung <- transform(lung, sex = sex - 1, status = status - 1) | |
# 1. Cox regression | |
cox_lung <- coxph(Surv(time, status) ~ age + sex, | |
data = lung) | |
# 2. Poisson regression | |
# Create lexis object | |
lung_lexis <- Lexis(exit = list(tfe = time), | |
exit.status = factor(status, | |
labels = c("Censored", "Dead")), | |
data = lung) | |
# Define time splits | |
lung_splits <- splitMulti(lung_lexis, tfe = seq(0, max(lung$time), | |
length.out = nbins + 1)) | |
# specify knots for splines | |
time_knots <- c(0, 25, 100, 500, 1000) | |
# fit poisson model | |
poisson_lung <- glm(cbind(lex.Xst == "Dead", | |
lex.dur) ~ Ns(tfe, knots = time_knots) + age + sex, | |
family = poisreg, data = lung_splits) | |
# 3. Case-base sampling | |
cb_lung <- fitSmoothHazard(status ~ pspline(time, df = 2) + age + sex, | |
data = lung) | |
# Estimate hazards for a 60yo man | |
newdata <- data.frame(tfe = seq(5, 995, by = 10), | |
age = 60, sex = 0, offset = 0) | |
newdata$time <- newdata$tfe | |
# Estimate hazards | |
hazard_pois_lung <- exp(predict(poisson_lung, newdata)) | |
hazard_cb_lung <- exp(predict(cb_lung, newdata)) | |
hazard_data <- newdata | |
hazard_data$Poisson <- hazard_pois_lung | |
hazard_data$casebase <- hazard_cb_lung | |
hazard_data <- pivot_longer(hazard_data, | |
cols = c("Poisson", "casebase"), | |
names_to = "Method", values_to = "hazard") | |
# Plot hazard | |
hazard_plot_lung <- ggplot(hazard_data, | |
aes(x = time, y = hazard, color = Method)) + | |
geom_line() + | |
theme_minimal_grid() + | |
theme(legend.position = 'top') | |
ggsave2(filename = "Plot1-4.pdf", | |
plot = hazard_plot_lung, | |
width = 5, height = 5) | |
# 2nd dataset: ERSPC from casebase---- | |
# 1. Cox regression | |
cox_erspc <- coxph(Surv(Follow.Up.Time, DeadOfPrCa) ~ ScrArm, data = ERSPC) | |
# 2. Poisson regression | |
# Create lexis object | |
erspc_lexis <- Lexis(exit = list(tfe = Follow.Up.Time), | |
exit.status = factor(DeadOfPrCa, labels = c(0, 1)), | |
data = ERSPC) | |
# Define time splits | |
erspc_splits <- splitMulti(erspc_lexis, tfe = seq(0, max(ERSPC$Follow.Up.Time), | |
length.out = nbins + 1)) | |
# specify knots for splines | |
time_knots <- seq(0, max(ERSPC$Follow.Up.Time), length.out = 3) | |
# fit poisson | |
poisson_erspc <- glm(cbind(lex.Xst, | |
lex.dur) ~ Ns(tfe, knots = time_knots) + ScrArm, | |
family = poisreg, data = erspc_splits) | |
# 3. Case-base sampling | |
cb_erspc <- fitSmoothHazard(DeadOfPrCa ~ pspline(Follow.Up.Time, df = 2) + ScrArm, | |
data = ERSPC) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment