Skip to content

Instantly share code, notes, and snippets.

@brshallo
Created June 3, 2021 21:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save brshallo/22681340d5d26f291fc5f349be21ec35 to your computer and use it in GitHub Desktop.
Save brshallo/22681340d5d26f291fc5f349be21ec35 to your computer and use it in GitHub Desktop.
Identify longest continuous engagement by customer

library(tidyverse)
library(lubridate)

T <- tibble(
  customer = c("c1", "c2", "c1", "c2", "c2", "c2"),
  start_date = ymd(c(
    20210107, 20210109, 20210201, 20210225, 20210314, 20210401
  )),
  end_date = ymd(c(
    20210211, 20210309, 20210308, 20210307, 20210405, 20210503
  ))
)

# Start data
T
#> # A tibble: 6 x 3
#>   customer start_date end_date  
#>   <chr>    <date>     <date>    
#> 1 c1       2021-01-07 2021-02-11
#> 2 c2       2021-01-09 2021-03-09
#> 3 c1       2021-02-01 2021-03-08
#> 4 c2       2021-02-25 2021-03-07
#> 5 c2       2021-03-14 2021-04-05
#> 6 c2       2021-04-01 2021-05-03

cummax_date <- function(date){
  as.Date(cummax(as.integer(date)), "1970-01-01")
}

na_to_false <- function(x){
  ifelse(is.na(x), FALSE, x)
}

break_indicators <- T %>% 
  mutate(ID = row_number()) %>% 
  arrange(customer, start_date) %>% 
  group_by(customer) %>% 
  mutate(end_max = cummax_date(end_date),
         break_continuous = na_to_false(lag(end_max) < start_date),
         break_count = cumsum(break_continuous))
# set-up indicators
print(break_indicators)
#> # A tibble: 6 x 7
#> # Groups:   customer [2]
#>   customer start_date end_date      ID end_max    break_continuous break_count
#>   <chr>    <date>     <date>     <int> <date>     <lgl>                  <int>
#> 1 c1       2021-01-07 2021-02-11     1 2021-02-11 FALSE                      0
#> 2 c1       2021-02-01 2021-03-08     3 2021-03-08 FALSE                      0
#> 3 c2       2021-01-09 2021-03-09     2 2021-03-09 FALSE                      0
#> 4 c2       2021-02-25 2021-03-07     4 2021-03-09 FALSE                      0
#> 5 c2       2021-03-14 2021-04-05     5 2021-04-05 TRUE                       1
#> 6 c2       2021-04-01 2021-05-03     6 2021-05-03 FALSE                      1

max_breaks_and_customers <- break_indicators %>% 
  group_by(customer, break_count) %>% 
  summarise(max_continuous = max(end_date) - min(start_date)) 
#> `summarise()` has grouped output by 'customer'. You can override using the `.groups` argument.
# continuous by customers and breaks
print(max_breaks_and_customers)
#> # A tibble: 3 x 3
#> # Groups:   customer [2]
#>   customer break_count max_continuous
#>   <chr>          <int> <drtn>        
#> 1 c1                 0 60 days       
#> 2 c2                 0 59 days       
#> 3 c2                 1 50 days

## SOLUTION: continuous by customers only
max_breaks_and_customers %>% 
  summarise(max_continous = max(max_continuous)) %>% 
  ungroup()
#> # A tibble: 2 x 2
#>   customer max_continous
#>   <chr>    <drtn>       
#> 1 c1       60 days      
#> 2 c2       59 days

Created on 2021-06-03 by the reprex package (v2.0.0)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment