library(tidyverse)
library(lubridate)
T <- tibble(
customer = c("c1", "c2", "c1", "c2", "c2", "c2"),
start_date = ymd(c(
20210107, 20210109, 20210201, 20210225, 20210314, 20210401
)),
end_date = ymd(c(
20210211, 20210309, 20210308, 20210307, 20210405, 20210503
))
)
# Start data
T
#> # A tibble: 6 x 3
#> customer start_date end_date
#> <chr> <date> <date>
#> 1 c1 2021-01-07 2021-02-11
#> 2 c2 2021-01-09 2021-03-09
#> 3 c1 2021-02-01 2021-03-08
#> 4 c2 2021-02-25 2021-03-07
#> 5 c2 2021-03-14 2021-04-05
#> 6 c2 2021-04-01 2021-05-03
cummax_date <- function(date){
as.Date(cummax(as.integer(date)), "1970-01-01")
}
na_to_false <- function(x){
ifelse(is.na(x), FALSE, x)
}
break_indicators <- T %>%
mutate(ID = row_number()) %>%
arrange(customer, start_date) %>%
group_by(customer) %>%
mutate(end_max = cummax_date(end_date),
break_continuous = na_to_false(lag(end_max) < start_date),
break_count = cumsum(break_continuous))
# set-up indicators
print(break_indicators)
#> # A tibble: 6 x 7
#> # Groups: customer [2]
#> customer start_date end_date ID end_max break_continuous break_count
#> <chr> <date> <date> <int> <date> <lgl> <int>
#> 1 c1 2021-01-07 2021-02-11 1 2021-02-11 FALSE 0
#> 2 c1 2021-02-01 2021-03-08 3 2021-03-08 FALSE 0
#> 3 c2 2021-01-09 2021-03-09 2 2021-03-09 FALSE 0
#> 4 c2 2021-02-25 2021-03-07 4 2021-03-09 FALSE 0
#> 5 c2 2021-03-14 2021-04-05 5 2021-04-05 TRUE 1
#> 6 c2 2021-04-01 2021-05-03 6 2021-05-03 FALSE 1
max_breaks_and_customers <- break_indicators %>%
group_by(customer, break_count) %>%
summarise(max_continuous = max(end_date) - min(start_date))
#> `summarise()` has grouped output by 'customer'. You can override using the `.groups` argument.
# continuous by customers and breaks
print(max_breaks_and_customers)
#> # A tibble: 3 x 3
#> # Groups: customer [2]
#> customer break_count max_continuous
#> <chr> <int> <drtn>
#> 1 c1 0 60 days
#> 2 c2 0 59 days
#> 3 c2 1 50 days
## SOLUTION: continuous by customers only
max_breaks_and_customers %>%
summarise(max_continous = max(max_continuous)) %>%
ungroup()
#> # A tibble: 2 x 2
#> customer max_continous
#> <chr> <drtn>
#> 1 c1 60 days
#> 2 c2 59 days
Created on 2021-06-03 by the reprex package (v2.0.0)