Skip to content

Instantly share code, notes, and snippets.

@njtierney
Created November 14, 2024 03:06
Show Gist options
  • Save njtierney/8a41ec73006ce625107f56da825483f5 to your computer and use it in GitHub Desktop.
Save njtierney/8a41ec73006ce625107f56da825483f5 to your computer and use it in GitHub Desktop.
exploring creating year lag
# example lagging code
library(tidyverse)

n <- 100

grid_cov <- expand_grid(
  covariates = c("rainfall", "temperature"),
  years = 2000:2022,
  row = seq_len(n)
) |> 
  mutate(
    value = runif(n())
  )

grid_cov
#> # A tibble: 4,600 × 4
#>    covariates years   row value
#>    <chr>      <int> <int> <dbl>
#>  1 rainfall    2000     1 0.339
#>  2 rainfall    2000     2 0.561
#>  3 rainfall    2000     3 0.124
#>  4 rainfall    2000     4 0.107
#>  5 rainfall    2000     5 0.301
#>  6 rainfall    2000     6 0.748
#>  7 rainfall    2000     7 0.466
#>  8 rainfall    2000     8 0.870
#>  9 rainfall    2000     9 0.191
#> 10 rainfall    2000    10 0.776
#> # ℹ 4,590 more rows

wider_grid_cov <- grid_cov |> 
  pivot_wider(
    names_from = c(covariates, years),
    values_from = value
  )

wider_grid_cov
#> # A tibble: 100 × 47
#>      row rainfall_2000 rainfall_2001 rainfall_2002 rainfall_2003 rainfall_2004
#>    <int>         <dbl>         <dbl>         <dbl>         <dbl>         <dbl>
#>  1     1         0.339       0.113           0.943        0.822          0.723
#>  2     2         0.561       0.840           0.417        0.928          0.859
#>  3     3         0.124       0.195           0.678        0.972          0.408
#>  4     4         0.107       0.233           0.720        0.356          0.780
#>  5     5         0.301       0.382           0.820        0.292          0.330
#>  6     6         0.748       0.00684         0.714        0.310          0.149
#>  7     7         0.466       0.762           0.843        0.0820         0.836
#>  8     8         0.870       0.333           0.354        0.350          0.804
#>  9     9         0.191       0.131           0.860        0.970          0.662
#> 10    10         0.776       0.647           0.445        0.858          0.689
#> # ℹ 90 more rows
#> # ℹ 41 more variables: rainfall_2005 <dbl>, rainfall_2006 <dbl>,
#> #   rainfall_2007 <dbl>, rainfall_2008 <dbl>, rainfall_2009 <dbl>,
#> #   rainfall_2010 <dbl>, rainfall_2011 <dbl>, rainfall_2012 <dbl>,
#> #   rainfall_2013 <dbl>, rainfall_2014 <dbl>, rainfall_2015 <dbl>,
#> #   rainfall_2016 <dbl>, rainfall_2017 <dbl>, rainfall_2018 <dbl>,
#> #   rainfall_2019 <dbl>, rainfall_2020 <dbl>, rainfall_2021 <dbl>, …

dat <- tibble(
  row = seq_len(n),
  obs = runif(n),
  year_start = sample(2000:2022, size = n, replace = TRUE)
)

dat
#> # A tibble: 100 × 3
#>      row    obs year_start
#>    <int>  <dbl>      <int>
#>  1     1 0.137        2003
#>  2     2 0.999        2018
#>  3     3 0.179        2005
#>  4     4 0.249        2022
#>  5     5 0.463        2000
#>  6     6 0.572        2013
#>  7     7 0.924        2017
#>  8     8 0.556        2009
#>  9     9 0.0697       2021
#> 10    10 0.361        2014
#> # ℹ 90 more rows

example_covariates <- left_join(
  dat,
  wider_grid_cov,
  by = "row"
) |> 
  mutate(
    coffee = runif(n())
  )

covariates_to_lag <- c("rainfall", "temperature")
covariates_not_to_lag <- c("coffee")
covariates_to_lag
#> [1] "rainfall"    "temperature"

vec_lags <- 0:3

example_covariates |> 
  select(
    -all_of(c(covariates_not_to_lag, "obs"))
  ) |> 
  pivot_longer(
    cols = -c("row", "year_start"),
    names_to = c("variable", "year"),
    names_sep = "_"
  ) |> 
  pivot_wider(
    names_from = variable,
    values_from = value
  ) |> 
# add an expand.grid with the lags as well
  expand_grid(
    lags = vec_lags
  ) |> 
  relocate(
    lags,
    .after = year_start
  ) |> 
# so whether year_start - lag is equal to that year
  mutate(
    year_lagged = year_start - lags,
    year = as.integer(year),
    .after = lags
  ) |> 
  mutate(
    year_lag_match = year_lagged == year,
    .after = year
  )
#> # A tibble: 9,200 × 8
#>      row year_start  lags year_lagged  year year_lag_match rainfall temperature
#>    <int>      <int> <int>       <int> <int> <lgl>             <dbl>       <dbl>
#>  1     1       2003     0        2003  2000 FALSE             0.339       0.380
#>  2     1       2003     1        2002  2000 FALSE             0.339       0.380
#>  3     1       2003     2        2001  2000 FALSE             0.339       0.380
#>  4     1       2003     3        2000  2000 TRUE              0.339       0.380
#>  5     1       2003     0        2003  2001 FALSE             0.113       0.193
#>  6     1       2003     1        2002  2001 FALSE             0.113       0.193
#>  7     1       2003     2        2001  2001 TRUE              0.113       0.193
#>  8     1       2003     3        2000  2001 FALSE             0.113       0.193
#>  9     1       2003     0        2003  2002 FALSE             0.943       0.775
#> 10     1       2003     1        2002  2002 TRUE              0.943       0.775
#> # ℹ 9,190 more rows

# expand

example_covariates
#> # A tibble: 100 × 50
#>      row    obs year_start rainfall_2000 rainfall_2001 rainfall_2002
#>    <int>  <dbl>      <int>         <dbl>         <dbl>         <dbl>
#>  1     1 0.137        2003         0.339       0.113           0.943
#>  2     2 0.999        2018         0.561       0.840           0.417
#>  3     3 0.179        2005         0.124       0.195           0.678
#>  4     4 0.249        2022         0.107       0.233           0.720
#>  5     5 0.463        2000         0.301       0.382           0.820
#>  6     6 0.572        2013         0.748       0.00684         0.714
#>  7     7 0.924        2017         0.466       0.762           0.843
#>  8     8 0.556        2009         0.870       0.333           0.354
#>  9     9 0.0697       2021         0.191       0.131           0.860
#> 10    10 0.361        2014         0.776       0.647           0.445
#> # ℹ 90 more rows
#> # ℹ 44 more variables: rainfall_2003 <dbl>, rainfall_2004 <dbl>,
#> #   rainfall_2005 <dbl>, rainfall_2006 <dbl>, rainfall_2007 <dbl>,
#> #   rainfall_2008 <dbl>, rainfall_2009 <dbl>, rainfall_2010 <dbl>,
#> #   rainfall_2011 <dbl>, rainfall_2012 <dbl>, rainfall_2013 <dbl>,
#> #   rainfall_2014 <dbl>, rainfall_2015 <dbl>, rainfall_2016 <dbl>,
#> #   rainfall_2017 <dbl>, rainfall_2018 <dbl>, rainfall_2019 <dbl>, …

Created on 2024-11-14 with reprex v2.1.1

Session info
sessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#>  setting  value
#>  version  R version 4.4.2 (2024-10-31)
#>  os       macOS Sonoma 14.5
#>  system   aarch64, darwin20
#>  ui       X11
#>  language (EN)
#>  collate  en_US.UTF-8
#>  ctype    en_US.UTF-8
#>  tz       Australia/Hobart
#>  date     2024-11-14
#>  pandoc   3.2 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/aarch64/ (via rmarkdown)
#> 
#> ─ Packages ───────────────────────────────────────────────────────────────────
#>  package     * version date (UTC) lib source
#>  cli           3.6.3   2024-06-21 [1] CRAN (R 4.4.0)
#>  colorspace    2.1-1   2024-07-26 [1] CRAN (R 4.4.0)
#>  digest        0.6.37  2024-08-19 [1] CRAN (R 4.4.1)
#>  dplyr       * 1.1.4   2023-11-17 [1] CRAN (R 4.4.0)
#>  evaluate      1.0.1   2024-10-10 [1] CRAN (R 4.4.1)
#>  fansi         1.0.6   2023-12-08 [1] CRAN (R 4.4.0)
#>  fastmap       1.2.0   2024-05-15 [1] CRAN (R 4.4.0)
#>  forcats     * 1.0.0   2023-01-29 [1] CRAN (R 4.4.0)
#>  fs            1.6.5   2024-10-30 [1] CRAN (R 4.4.1)
#>  generics      0.1.3   2022-07-05 [1] CRAN (R 4.4.0)
#>  ggplot2     * 3.5.1   2024-04-23 [1] CRAN (R 4.4.0)
#>  glue          1.8.0   2024-09-30 [1] CRAN (R 4.4.1)
#>  gtable        0.3.6   2024-10-25 [1] CRAN (R 4.4.1)
#>  hms           1.1.3   2023-03-21 [1] CRAN (R 4.4.0)
#>  htmltools     0.5.8.1 2024-04-04 [1] CRAN (R 4.4.0)
#>  knitr         1.48    2024-07-07 [1] CRAN (R 4.4.0)
#>  lifecycle     1.0.4   2023-11-07 [1] CRAN (R 4.4.0)
#>  lubridate   * 1.9.3   2023-09-27 [1] CRAN (R 4.4.0)
#>  magrittr      2.0.3   2022-03-30 [1] CRAN (R 4.4.0)
#>  munsell       0.5.1   2024-04-01 [1] CRAN (R 4.4.0)
#>  pillar        1.9.0   2023-03-22 [1] CRAN (R 4.4.0)
#>  pkgconfig     2.0.3   2019-09-22 [1] CRAN (R 4.4.0)
#>  purrr       * 1.0.2   2023-08-10 [1] CRAN (R 4.4.0)
#>  R6            2.5.1   2021-08-19 [1] CRAN (R 4.4.0)
#>  readr       * 2.1.5   2024-01-10 [1] CRAN (R 4.4.0)
#>  reprex        2.1.1   2024-07-06 [1] CRAN (R 4.4.0)
#>  rlang         1.1.4   2024-06-04 [1] CRAN (R 4.4.0)
#>  rmarkdown     2.29    2024-11-04 [1] CRAN (R 4.4.1)
#>  rstudioapi    0.17.1  2024-10-22 [1] CRAN (R 4.4.1)
#>  scales        1.3.0   2023-11-28 [1] CRAN (R 4.4.0)
#>  sessioninfo   1.2.2   2021-12-06 [1] CRAN (R 4.4.0)
#>  stringi       1.8.4   2024-05-06 [1] CRAN (R 4.4.0)
#>  stringr     * 1.5.1   2023-11-14 [1] CRAN (R 4.4.0)
#>  tibble      * 3.2.1   2023-03-20 [1] CRAN (R 4.4.0)
#>  tidyr       * 1.3.1   2024-01-24 [1] CRAN (R 4.4.0)
#>  tidyselect    1.2.1   2024-03-11 [1] CRAN (R 4.4.0)
#>  tidyverse   * 2.0.0   2023-02-22 [1] CRAN (R 4.4.0)
#>  timechange    0.3.0   2024-01-18 [1] CRAN (R 4.4.0)
#>  tzdb          0.4.0   2023-05-12 [1] CRAN (R 4.4.0)
#>  utf8          1.2.4   2023-10-22 [1] CRAN (R 4.4.0)
#>  vctrs         0.6.5   2023-12-01 [1] CRAN (R 4.4.0)
#>  withr         3.0.2   2024-10-28 [1] CRAN (R 4.4.1)
#>  xfun          0.49    2024-10-31 [1] CRAN (R 4.4.1)
#>  yaml          2.3.10  2024-07-26 [1] CRAN (R 4.4.0)
#> 
#>  [1] /Users/nick/Library/R/arm64/4.4/library
#>  [2] /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library
#> 
#> ──────────────────────────────────────────────────────────────────────────────
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment