Last active
January 27, 2021 10:48
-
-
Save danlewer/cb0bd66ca59714b71220e62a43ec58f9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# following the observation in Lewis et al (2005) https://onlinelibrary.wiley.com/doi/abs/10.1002/pds.1115 | |
# that incidence of various events is higher in the months after patients join an electronic database | |
# this code provides a function for measuring incidence stratified by time after joining a cohort | |
# -- function reporting incidence stratified by duration after cohort entry -- | |
# reformat dates as integers with an arbitrary origin, e.g. 1970-01-01 | |
# entry = date of cohort entry | |
# exit = date of cohort exit | |
# diagnosis = date of event | |
# interval = length of windows in same base as dates, e.g. days | |
# nInterval = number of intervals, such that you have *nInterval* windows of duration *interval* | |
# events are excluded if they are before entry, after exit, or of value NA | |
Lewisf <- function(entry, exit, diagnosis, interval = 30, nInterval = 30) { | |
i <- 0:nInterval * interval | |
e <- diagnosis - entry | |
e <- e[diagnosis >= entry & diagnosis <= exit & !is.na(diagnosis)] | |
e <- findInterval(e, i) | |
e <- factor(e, seq_len(nInterval)) | |
e <- table(e) | |
s <- exit - entry | |
z <- sapply(i[-1], function(x) sum(s >= x)) * interval | |
x <- findInterval(s, i) | |
x <- factor(x, seq_len(nInterval)) | |
d <- s %% interval | |
d <- sapply(split(d, x), sum) | |
f <- rowSums(cbind(z, d)) | |
cbind(events = e, follow_up = f) | |
} | |
# -- example data -- | |
n <- 1e5 | |
d <- data.frame(id = seq_len(n), start = sample(100:500, n, replace = T)) | |
d$end <- d$start + sample(200:3000, n, replace = T) | |
d$event <- d$start + rnbinom(n, 0.7, mu = 1000) | |
# -- do calculation -- | |
lewis <- Lewisf(entry = d$start, exit = d$end, diagnosis = d$event) | |
# -- add incidence rates and poisson confidence intervals -- | |
vpc <- function(x, n, base = 365 * 1000) { | |
f <- function(x, n) c(x/(n/base), poisson.test(x, n/base)$conf.int[1:2]) | |
t(mapply(f, x, n, SIMPLIFY = T)) | |
} | |
lewis <- vpc(lewis[,1], lewis[,2]) | |
# -- plot -- | |
xs <- seq_len(nrow(lewis)) | |
par(xpd = NA, mar = c(5, 5, 3, 1)) | |
plot(1, type = 'n', xlim = c(0, 30), ylim = c(0, 900), axes = F, xlab = NA, ylab = NA) | |
rect(xs - 1, 0, xs, lewis[,1], col = "#FBB4AE") | |
arrows(xs - 0.5, lewis[,2], xs - 0.5, lewis[,3], code = 3, angle = 90, length = 0.05) | |
axis(1, 0:max(xs), pos = 0, labels = F, tck = -0.01) | |
axis(1, seq(0, 30, 5), pos = 0) | |
axis(2, las = 2, pos = 0) | |
title(ylab = 'Incidence of event per 1,000 patient-years') | |
title(xlab = 'Months after joining cohort') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment