Skip to content

Instantly share code, notes, and snippets.

@danlewer
Last active January 27, 2021 10:48
Show Gist options
  • Save danlewer/cb0bd66ca59714b71220e62a43ec58f9 to your computer and use it in GitHub Desktop.
Save danlewer/cb0bd66ca59714b71220e62a43ec58f9 to your computer and use it in GitHub Desktop.
# following the observation in Lewis et al (2005) https://onlinelibrary.wiley.com/doi/abs/10.1002/pds.1115
# that incidence of various events is higher in the months after patients join an electronic database
# this code provides a function for measuring incidence stratified by time after joining a cohort
# -- function reporting incidence stratified by duration after cohort entry --
# reformat dates as integers with an arbitrary origin, e.g. 1970-01-01
# entry = date of cohort entry
# exit = date of cohort exit
# diagnosis = date of event
# interval = length of windows in same base as dates, e.g. days
# nInterval = number of intervals, such that you have *nInterval* windows of duration *interval*
# events are excluded if they are before entry, after exit, or of value NA
Lewisf <- function(entry, exit, diagnosis, interval = 30, nInterval = 30) {
i <- 0:nInterval * interval
e <- diagnosis - entry
e <- e[diagnosis >= entry & diagnosis <= exit & !is.na(diagnosis)]
e <- findInterval(e, i)
e <- factor(e, seq_len(nInterval))
e <- table(e)
s <- exit - entry
z <- sapply(i[-1], function(x) sum(s >= x)) * interval
x <- findInterval(s, i)
x <- factor(x, seq_len(nInterval))
d <- s %% interval
d <- sapply(split(d, x), sum)
f <- rowSums(cbind(z, d))
cbind(events = e, follow_up = f)
}
# -- example data --
n <- 1e5
d <- data.frame(id = seq_len(n), start = sample(100:500, n, replace = T))
d$end <- d$start + sample(200:3000, n, replace = T)
d$event <- d$start + rnbinom(n, 0.7, mu = 1000)
# -- do calculation --
lewis <- Lewisf(entry = d$start, exit = d$end, diagnosis = d$event)
# -- add incidence rates and poisson confidence intervals --
vpc <- function(x, n, base = 365 * 1000) {
f <- function(x, n) c(x/(n/base), poisson.test(x, n/base)$conf.int[1:2])
t(mapply(f, x, n, SIMPLIFY = T))
}
lewis <- vpc(lewis[,1], lewis[,2])
# -- plot --
xs <- seq_len(nrow(lewis))
par(xpd = NA, mar = c(5, 5, 3, 1))
plot(1, type = 'n', xlim = c(0, 30), ylim = c(0, 900), axes = F, xlab = NA, ylab = NA)
rect(xs - 1, 0, xs, lewis[,1], col = "#FBB4AE")
arrows(xs - 0.5, lewis[,2], xs - 0.5, lewis[,3], code = 3, angle = 90, length = 0.05)
axis(1, 0:max(xs), pos = 0, labels = F, tck = -0.01)
axis(1, seq(0, 30, 5), pos = 0)
axis(2, las = 2, pos = 0)
title(ylab = 'Incidence of event per 1,000 patient-years')
title(xlab = 'Months after joining cohort')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment