Skip to content

Instantly share code, notes, and snippets.

@johnmackintosh
Last active June 21, 2022 21:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save johnmackintosh/2077a5719683873a3d6d109ab8243eb4 to your computer and use it in GitHub Desktop.
Save johnmackintosh/2077a5719683873a3d6d109ab8243eb4 to your computer and use it in GitHub Desktop.
answer to question on NHSR slack
library(tidyverse)
library(lubridate)
## each id is a person, and each row is one episode
## the two episode for id 1 overlap!
example_dates <- tibble(
id = c(1,1,2,2),
date_start = ymd(c("2020-01-01", "2020-01-03","2020-04-01", "2020-04-15")),
date_end = ymd(c("2020-01-05", "2020-01-10", "2020-04-04", "2020-04-16"))
) #%>%
#mutate(episode = interval(date_start, date_end))
library(data.table)
DT <- setDT(copy(example_dates))
counter <- function(DT, start, stop, id) {
DT[, .(start = min(start),
stop = max(end)),
by=.(id, group=cumsum(c(1, tail(start, -1) > head(end, -1))))]
}
counter(DT, date_start, date_end, id)
# id group start stop
# 1: 1 1 2020-01-01 2020-01-10
# 2: 2 2 2020-04-01 2020-04-04
# 3: 2 3 2020-04-15 2020-04-16
# tim taylor data
dat <- data.frame(
id = c(1,1,2,2,1),
date_start = ymd(c("2020-01-01", "2020-01-03","2020-04-01", "2020-04-15", "2020-05-01")),
date_end = ymd(c("2020-01-05", "2020-01-10", "2020-04-04", "2020-04-16", "2020-10-01"))
)
dat <- replicate(125000, dat, simplify = FALSE)
dat <- .mapply(
FUN = function(x, y) {
x$id <- x$id + y
x
},
dots = list(x = dat, y = seq_len(125000)),
MoreArgs = NULL
)
dat <- rbindlist(dat)
res <- counter(dat, date_start, date_end, id)
res[id==2]
# id group start stop
# 1: 2 1 2020-01-01 2020-01-10
# 2: 2 4 2020-05-01 2020-10-01
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment