Skip to content

Instantly share code, notes, and snippets.

@turgeonmaxime
Last active November 7, 2021 18:11
Show Gist options
  • Save turgeonmaxime/c8d80629b5e7f29f59df929c58e2e3e0 to your computer and use it in GitHub Desktop.
Save turgeonmaxime/c8d80629b5e7f29f59df929c58e2e3e0 to your computer and use it in GitHub Desktop.
Comparing Cox regression, Poisson regression with splines, and case-base sampling with splines on two datasets: lung from survival, ERSPC from casebase
#----
# Poisson vs casebase
# authors: Max Turgeon, Jesse Islam and Sahir Bhatnagar
# date: 10/1/2021
#----
set.seed(1952)
library(casebase)
library(cowplot)
library(Epi)
library(ggplot2)
library(mgcv)
library(popEpi)
library(splines)
library(survival)
library(tidyr)
nbins <- 24 # Number of time bins for poisson regression
# 1st dataset: lung from survival----
# Recode sex to 0/1; status to 0/1
lung <- transform(lung, sex = sex - 1, status = status - 1)
# 1. Cox regression
cox_lung <- coxph(Surv(time, status) ~ age + sex,
data = lung)
# 2. Poisson regression
# Create lexis object
lung_lexis <- Lexis(exit = list(tfe = time),
exit.status = factor(status,
labels = c("Censored", "Dead")),
data = lung)
# Define time splits
lung_splits <- splitMulti(lung_lexis, tfe = seq(0, max(lung$time),
length.out = nbins + 1))
# specify knots for splines
time_knots <- c(0, 25, 100, 500, 1000)
# fit poisson model
poisson_lung <- glm(cbind(lex.Xst == "Dead",
lex.dur) ~ Ns(tfe, knots = time_knots) + age + sex,
family = poisreg, data = lung_splits)
# 3. Case-base sampling
cb_lung <- fitSmoothHazard(status ~ pspline(time, df = 2) + age + sex,
data = lung)
# Estimate hazards for a 60yo man
newdata <- data.frame(tfe = seq(5, 995, by = 10),
age = 60, sex = 0, offset = 0)
newdata$time <- newdata$tfe
# Estimate hazards
hazard_pois_lung <- exp(predict(poisson_lung, newdata))
hazard_cb_lung <- exp(predict(cb_lung, newdata))
hazard_data <- newdata
hazard_data$Poisson <- hazard_pois_lung
hazard_data$casebase <- hazard_cb_lung
hazard_data <- pivot_longer(hazard_data,
cols = c("Poisson", "casebase"),
names_to = "Method", values_to = "hazard")
# Plot hazard
hazard_plot_lung <- ggplot(hazard_data,
aes(x = time, y = hazard, color = Method)) +
geom_line() +
theme_minimal_grid() +
theme(legend.position = 'top')
ggsave2(filename = "Plot1-4.pdf",
plot = hazard_plot_lung,
width = 5, height = 5)
# 2nd dataset: ERSPC from casebase----
# 1. Cox regression
cox_erspc <- coxph(Surv(Follow.Up.Time, DeadOfPrCa) ~ ScrArm, data = ERSPC)
# 2. Poisson regression
# Create lexis object
erspc_lexis <- Lexis(exit = list(tfe = Follow.Up.Time),
exit.status = factor(DeadOfPrCa, labels = c(0, 1)),
data = ERSPC)
# Define time splits
erspc_splits <- splitMulti(erspc_lexis, tfe = seq(0, max(ERSPC$Follow.Up.Time),
length.out = nbins + 1))
# specify knots for splines
time_knots <- seq(0, max(ERSPC$Follow.Up.Time), length.out = 3)
# fit poisson
poisson_erspc <- glm(cbind(lex.Xst,
lex.dur) ~ Ns(tfe, knots = time_knots) + ScrArm,
family = poisreg, data = erspc_splits)
# 3. Case-base sampling
cb_erspc <- fitSmoothHazard(DeadOfPrCa ~ pspline(Follow.Up.Time, df = 2) + ScrArm,
data = ERSPC)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment