turgeonmaxime/cox_cb_poisson_comparison.R

## cox_cb_poisson_comparison.R
#----
# Poisson vs casebase
# authors: Max Turgeon, Jesse Islam and Sahir Bhatnagar
# date: 10/1/2021
#----
set.seed(1952)

library(casebase)
library(cowplot)
library(Epi)
library(ggplot2)
library(mgcv)
library(popEpi)
library(splines)
library(survival)
library(tidyr)

nbins <- 24 # Number of time bins for poisson regression

# 1st dataset: lung from survival----
# Recode sex to 0/1; status to 0/1
lung <- transform(lung, sex = sex - 1, status = status - 1)

# 1. Cox regression
cox_lung <- coxph(Surv(time, status) ~ age + sex,
                  data = lung)

# 2. Poisson regression
# Create lexis object
lung_lexis <- Lexis(exit = list(tfe = time),
                    exit.status = factor(status,
                                         labels = c("Censored", "Dead")),
                    data = lung)

# Define time splits
lung_splits <- splitMulti(lung_lexis, tfe = seq(0, max(lung$time),
                                                length.out = nbins + 1))

# specify knots for splines
time_knots <- c(0, 25, 100, 500, 1000)

# fit poisson model
poisson_lung <- glm(cbind(lex.Xst == "Dead",
                          lex.dur) ~ Ns(tfe, knots = time_knots) + age + sex,
                    family = poisreg, data = lung_splits)

# 3. Case-base sampling
cb_lung <- fitSmoothHazard(status ~ pspline(time, df = 2) + age + sex,
                             data = lung)

# Estimate hazards for a 60yo man
newdata <- data.frame(tfe = seq(5, 995, by = 10),
                      age = 60, sex = 0, offset = 0)
newdata$time <- newdata$tfe

# Estimate hazards
hazard_pois_lung <- exp(predict(poisson_lung, newdata))
hazard_cb_lung <- exp(predict(cb_lung, newdata))

hazard_data <- newdata
hazard_data$Poisson <- hazard_pois_lung
hazard_data$casebase <- hazard_cb_lung

hazard_data <- pivot_longer(hazard_data,
                            cols = c("Poisson", "casebase"),
                            names_to = "Method", values_to = "hazard")

# Plot hazard
hazard_plot_lung <- ggplot(hazard_data,
                           aes(x = time, y = hazard, color = Method)) +
    geom_line() +
    theme_minimal_grid() +
    theme(legend.position = 'top')

ggsave2(filename = "Plot1-4.pdf",
        plot = hazard_plot_lung,
        width = 5, height = 5)

# 2nd dataset: ERSPC from casebase----
# 1. Cox regression
cox_erspc <- coxph(Surv(Follow.Up.Time, DeadOfPrCa) ~ ScrArm, data = ERSPC)

# 2. Poisson regression
# Create lexis object
erspc_lexis <- Lexis(exit = list(tfe = Follow.Up.Time),
                     exit.status = factor(DeadOfPrCa, labels = c(0, 1)),
                     data = ERSPC)

# Define time splits
erspc_splits <- splitMulti(erspc_lexis, tfe = seq(0, max(ERSPC$Follow.Up.Time),
                                                  length.out = nbins + 1))

# specify knots for splines
time_knots <- seq(0, max(ERSPC$Follow.Up.Time), length.out = 3)

# fit poisson
poisson_erspc <- glm(cbind(lex.Xst,
                           lex.dur) ~ Ns(tfe, knots = time_knots) + ScrArm,
                     family = poisreg, data = erspc_splits)

# 3. Case-base sampling
cb_erspc <- fitSmoothHazard(DeadOfPrCa ~ pspline(Follow.Up.Time, df = 2) + ScrArm,
                             data = ERSPC)
	#----
	# Poisson vs casebase
	# authors: Max Turgeon, Jesse Islam and Sahir Bhatnagar
	# date: 10/1/2021
	#----
	set.seed(1952)

	library(casebase)
	library(cowplot)
	library(Epi)
	library(ggplot2)
	library(mgcv)
	library(popEpi)
	library(splines)
	library(survival)
	library(tidyr)

	nbins <- 24 # Number of time bins for poisson regression

	# 1st dataset: lung from survival----
	# Recode sex to 0/1; status to 0/1
	lung <- transform(lung, sex = sex - 1, status = status - 1)

	# 1. Cox regression
	cox_lung <- coxph(Surv(time, status) ~ age + sex,
	data = lung)

	# 2. Poisson regression
	# Create lexis object
	lung_lexis <- Lexis(exit = list(tfe = time),
	exit.status = factor(status,
	labels = c("Censored", "Dead")),
	data = lung)

	# Define time splits
	lung_splits <- splitMulti(lung_lexis, tfe = seq(0, max(lung$time),
	length.out = nbins + 1))

	# specify knots for splines
	time_knots <- c(0, 25, 100, 500, 1000)

	# fit poisson model
	poisson_lung <- glm(cbind(lex.Xst == "Dead",
	lex.dur) ~ Ns(tfe, knots = time_knots) + age + sex,
	family = poisreg, data = lung_splits)

	# 3. Case-base sampling
	cb_lung <- fitSmoothHazard(status ~ pspline(time, df = 2) + age + sex,
	data = lung)

	# Estimate hazards for a 60yo man
	newdata <- data.frame(tfe = seq(5, 995, by = 10),
	age = 60, sex = 0, offset = 0)
	newdata$time <- newdata$tfe

	# Estimate hazards
	hazard_pois_lung <- exp(predict(poisson_lung, newdata))
	hazard_cb_lung <- exp(predict(cb_lung, newdata))

	hazard_data <- newdata
	hazard_data$Poisson <- hazard_pois_lung
	hazard_data$casebase <- hazard_cb_lung

	hazard_data <- pivot_longer(hazard_data,
	cols = c("Poisson", "casebase"),
	names_to = "Method", values_to = "hazard")

	# Plot hazard
	hazard_plot_lung <- ggplot(hazard_data,
	aes(x = time, y = hazard, color = Method)) +
	geom_line() +
	theme_minimal_grid() +
	theme(legend.position = 'top')

	ggsave2(filename = "Plot1-4.pdf",
	plot = hazard_plot_lung,
	width = 5, height = 5)

	# 2nd dataset: ERSPC from casebase----
	# 1. Cox regression
	cox_erspc <- coxph(Surv(Follow.Up.Time, DeadOfPrCa) ~ ScrArm, data = ERSPC)

	# 2. Poisson regression
	# Create lexis object
	erspc_lexis <- Lexis(exit = list(tfe = Follow.Up.Time),
	exit.status = factor(DeadOfPrCa, labels = c(0, 1)),
	data = ERSPC)

	# Define time splits
	erspc_splits <- splitMulti(erspc_lexis, tfe = seq(0, max(ERSPC$Follow.Up.Time),
	length.out = nbins + 1))

	# specify knots for splines
	time_knots <- seq(0, max(ERSPC$Follow.Up.Time), length.out = 3)

	# fit poisson
	poisson_erspc <- glm(cbind(lex.Xst,
	lex.dur) ~ Ns(tfe, knots = time_knots) + ScrArm,
	family = poisreg, data = erspc_splits)

	# 3. Case-base sampling
	cb_erspc <- fitSmoothHazard(DeadOfPrCa ~ pspline(Follow.Up.Time, df = 2) + ScrArm,
	data = ERSPC)