Skip to content

Instantly share code, notes, and snippets.

@holnburger
Created August 6, 2020 19:23
Show Gist options
  • Save holnburger/a3c3840042eb0c591f1f869c2fa1873c to your computer and use it in GitHub Desktop.
Save holnburger/a3c3840042eb0c591f1f869c2fa1873c to your computer and use it in GitHub Desktop.
library(tidyverse)
library(slider)
final <- tibble::tribble(
~ccode, ~year, ~publicdept, ~lifeexpec, ~gdp_pc, ~cpia, ~xm_gdp, ~polity,
"AFG", 2006L, 23, 58.8, 0.264, 3.33, 103L, -66L,
"AFG", 2007L, 20.1, 59.4, 0.36, 3.17, 102L, -66L,
"AFG", 2008L, 23, 58.8, 0.264, 3.33, 103L, -66L,
"AFG", 2009L, 20.1, 59.4, 0.36, 3.17, 102L, -66L,
"AFG", 2010L, 28, 60, 0.36, 3.33, 103L, -66L,
"AFG", 2011L, 24, 68, 0.36, 3.17, 102L, -66L,
"AFB", 2006L, 19, 66, 0.264, 3.17, 102L, -66L,
"AFB", 2007L, 20, 67, 0.36, 4.17, 103L, -66L,
"AFB", 2008L, 21, 68, 0.456, 5.17, 104L, -66L,
"AFB", 2009L, 22, 69, 0.552, 6.17, 105L, -66L,
"AFB", 2010L, 23, 70, 0.648, 7.17, 106L, -66L,
"AFB", 2011L, 24, 71, 0.744, 8.17, 107L, -66L
)
# calculate mean per ccode with the new summarise across introduced in dyplr 1.0.0
final %>%
group_by(ccode) %>%
summarise(
across(where(is.numeric), mean)
)
# we will use the slider package to create periods every three years
final %>%
group_by(ccode) %>%
mutate(period = paste0(
slide_dbl(year, min, .step = 3L, .after = 2),
"-",
slide_dbl(year, max, .step = 3L, .after = 2)
))
# this gives us nasty NA-NA interfalls every 2nd and 3rd step
# we get rid of this with a nasty hack and fill the values from top
# not really elegant but it works
final %>%
group_by(ccode) %>%
mutate(period = paste0(
slide_dbl(year, min, .step = 3L, .after = 2),
"-",
slide_dbl(year, max, .step = 3L, .after = 2)
)) %>%
mutate(period = ifelse(str_detect(period, "NA"), NA_character_, period)) # hack
# now combine everything
final %>%
group_by(ccode) %>%
mutate(period = paste0(
slide_dbl(year, min, .step = 3L, .after = 2),
"-",
slide_dbl(year, max, .step = 3L, .after = 2)
)) %>%
mutate(period = ifelse(str_detect(period, "NA"), NA_character_, period)) %>%
fill(period) %>% # fill the values from top
select(-year) %>%
group_by(ccode, period) %>%
summarise(
across(where(is.numeric), mean)
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment