Skip to content

Instantly share code, notes, and snippets.

@brshallo
Created April 19, 2021 17:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save brshallo/0185e12a32e6c1c66b068f3e7d3194db to your computer and use it in GitHub Desktop.
Save brshallo/0185e12a32e6c1c66b068f3e7d3194db to your computer and use it in GitHub Desktop.
library(tidyverse)
library(lubridate)

date <- ymd(20200101) + months(1:7)
company <- c("a", "b")

sim_rw <- function(start = 0, n = 7, mean = 1){
  arima.sim(model = list(order = c(0, 1, 0)), n = n - 1, mean = mean) %>% 
    as.numeric() %>% 
    {. + start}
}

set.seed(1)
data <- crossing(company, date) %>% 
  group_by(company) %>% 
  mutate(index = row_number(),
         series = sim_rw()) %>% 
  ungroup()

print(data, n = 14)
#> # A tibble: 14 x 4
#>    company date       index series
#>    <chr>   <date>     <int>  <dbl>
#>  1 a       2020-02-01     1  0    
#>  2 a       2020-03-01     2  0.374
#>  3 a       2020-04-01     3  1.56 
#>  4 a       2020-05-01     4  1.72 
#>  5 a       2020-06-01     5  4.32 
#>  6 a       2020-07-01     6  5.65 
#>  7 a       2020-08-01     7  5.83 
#>  8 b       2020-02-01     1  0    
#>  9 b       2020-03-01     2  1.49 
#> 10 b       2020-04-01     3  3.23 
#> 11 b       2020-05-01     4  4.80 
#> 12 b       2020-06-01     5  5.50 
#> 13 b       2020-07-01     6  8.01 
#> 14 b       2020-08-01     7  9.40

lag_multiple <- function(x, n_vec, prefix = "lag"){
  map(n_vec, lag, x = x) %>% 
    set_names(paste0(prefix, n_vec)) %>% 
    as_tibble()
}

data_prepped <- data %>% 
  mutate(mo = month(date)) %>% 
  group_by(company) %>% 
  mutate(diff = series - lag(series)) %>% 
  mutate(mean_last3 = slider::slide_dbl(.x = diff,
                                        .i = date,
                                        .f = mean,
                                        .before = 3,
                                        .after = -1)) %>% 
  mutate(lag_multiple(diff, 1:3, prefix = "diff_lag_")) %>% 
  ungroup() %>% 
  na.omit()

data_prepped
#> # A tibble: 6 x 10
#>   company date       index series    mo  diff mean_last3 diff_lag_1 diff_lag_2
#>   <chr>   <date>     <int>  <dbl> <dbl> <dbl>      <dbl>      <dbl>      <dbl>
#> 1 a       2020-06-01     5   4.32     6 2.60       0.574      0.164      1.18 
#> 2 a       2020-07-01     6   5.65     7 1.33       1.31       2.60       0.164
#> 3 a       2020-08-01     7   5.83     8 0.180      1.36       1.33       2.60 
#> 4 b       2020-06-01     5   5.50     6 0.695      1.60       1.58       1.74 
#> 5 b       2020-07-01     6   8.01     7 2.51       1.34       0.695      1.58 
#> 6 b       2020-08-01     7   9.40     8 1.39       1.59       2.51       0.695
#> # ... with 1 more variable: diff_lag_3Â <dbl>

Created on 2021-04-19 by the reprex package (v1.0.0)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment