library(tidyverse)
library(lubridate)
date <- ymd(20200101) + months(1:7)
company <- c("a", "b")
sim_rw <- function(start = 0, n = 7, mean = 1){
arima.sim(model = list(order = c(0, 1, 0)), n = n - 1, mean = mean) %>%
as.numeric() %>%
{. + start}
}
set.seed(1)
data <- crossing(company, date) %>%
group_by(company) %>%
mutate(index = row_number(),
series = sim_rw()) %>%
ungroup()
print(data, n = 14)
#> # A tibble: 14 x 4
#> company date index series
#> <chr> <date> <int> <dbl>
#> 1 a 2020-02-01 1 0
#> 2 a 2020-03-01 2 0.374
#> 3 a 2020-04-01 3 1.56
#> 4 a 2020-05-01 4 1.72
#> 5 a 2020-06-01 5 4.32
#> 6 a 2020-07-01 6 5.65
#> 7 a 2020-08-01 7 5.83
#> 8 b 2020-02-01 1 0
#> 9 b 2020-03-01 2 1.49
#> 10 b 2020-04-01 3 3.23
#> 11 b 2020-05-01 4 4.80
#> 12 b 2020-06-01 5 5.50
#> 13 b 2020-07-01 6 8.01
#> 14 b 2020-08-01 7 9.40
lag_multiple <- function(x, n_vec, prefix = "lag"){
map(n_vec, lag, x = x) %>%
set_names(paste0(prefix, n_vec)) %>%
as_tibble()
}
data_prepped <- data %>%
mutate(mo = month(date)) %>%
group_by(company) %>%
mutate(diff = series - lag(series)) %>%
mutate(mean_last3 = slider::slide_dbl(.x = diff,
.i = date,
.f = mean,
.before = 3,
.after = -1)) %>%
mutate(lag_multiple(diff, 1:3, prefix = "diff_lag_")) %>%
ungroup() %>%
na.omit()
data_prepped
#> # A tibble: 6 x 10
#> company date index series mo diff mean_last3 diff_lag_1 diff_lag_2
#> <chr> <date> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 a 2020-06-01 5 4.32 6 2.60 0.574 0.164 1.18
#> 2 a 2020-07-01 6 5.65 7 1.33 1.31 2.60 0.164
#> 3 a 2020-08-01 7 5.83 8 0.180 1.36 1.33 2.60
#> 4 b 2020-06-01 5 5.50 6 0.695 1.60 1.58 1.74
#> 5 b 2020-07-01 6 8.01 7 2.51 1.34 0.695 1.58
#> 6 b 2020-08-01 7 9.40 8 1.39 1.59 2.51 0.695
#> # ... with 1 more variable: diff_lag_3Â <dbl>
Created on 2021-04-19 by the reprex package (v1.0.0)