Skip to content

Instantly share code, notes, and snippets.

@alexpghayes
Created January 12, 2021 18:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save alexpghayes/ca52c29503b003f2b53c66d835a3f52e to your computer and use it in GitHub Desktop.
Save alexpghayes/ca52c29503b003f2b53c66d835a3f52e to your computer and use it in GitHub Desktop.
library(tidyverse)
library(here)
library(skimr)
library(lubridate)
library(rsample)
library(furrr)
library(arrow)
plan(sequential)
plan(multisession, workers = parallel::detectCores() - 1)
# download_cps("org", cps_directory)
# devtools::install_github("economic/epiextractr")
# arrow::install_arrow(binary = FALSE, minimal = FALSE)
# library(epiextractr)
cps_directory <- here("data/cps")
cps_files <- list.files(cps_directory, full.names = TRUE)[15:17] # 1979 - 2000
cps <- map_dfr(cps_files, read_feather) %>%
mutate(
time = make_date(year = year, month = month)
) %>%
select(time, month, unemp, age, educ, female, married) %>%
mutate_at(vars(unemp, educ, female, married), haven::as_factor) %>%
mutate_at(vars(age), as.integer) %>%
rename(outcome = unemp) %>%
mutate(
partition = as.factor(time)
) %>%
na.omit()
# basic sanity check
sanity_fit <- glm(
formula = outcome ~ age + educ + female + married,
data = head(cps, 100000),
family = binomial()
)
summary(sanity_fit)
# more robust to separated data
calculate_z <- function(data, num_sims = 3) {
global_calibrator <- glm(
outcome ~ score,
data = data,
family = binomial()
)
multi_calibrator <- glm(
outcome ~ score * partition,
data = data,
family = binomial()
)
Lambda <- as.numeric(logLik(multi_calibrator) - logLik(global_calibrator))
simulate_once <- function() {
data$simulated_outcome <- simulate(global_calibrator)$sim_1
simulated_global_calibrator <- glm(
simulated_outcome ~ score,
data = data,
family = binomial()
)
simulated_multi_calibrator <- glm(
simulated_outcome ~ score * partition,
data = data,
family = binomial()
)
simulated_Lambda <- as.numeric(
logLik(simulated_multi_calibrator) -
logLik(simulated_global_calibrator)
)
simulated_Lambda
}
Lambda_star <- map_dbl(1:num_sims, ~simulate_once())
Z <- (Lambda - mean(Lambda_star)) / sd(Lambda_star)
list(Z = Z, Lambda_star = Lambda_star, Lambda = Lambda)
}
calculate_z_cps <- function(data, classifier, num_sims = 3) {
data$score <- predict(classifier, data, type = "response")
calculate_z(data, num_sims)
}
calculate_z_cps(head(cps, 100000), sanity_fit)
sliding_cps <- sliding_period(
data = cps,
lookback = 2,
index = time,
period = "month"
)
fit_model <- function(data) {
glm(outcome ~ age + educ + female + married, data = data, family = binomial())
}
fits <- sliding_cps %>%
mutate(
models = map(splits, ~fit_model(analysis(.x)))
)
sleepy_fits <- fits %>%
mutate(
analysis = map(splits, analysis),
results = future_map2(
analysis, models,
~calculate_z_cps(.x, .y, num_sims = 50),
.options = furrr_options(seed = 27)
)
)
write_rds(sleepy_fits, here("output/cps-fits.rds"))
x <- sleepy_fits %>%
mutate(
z = map_dbl(results, pluck, "Z"),
period = map(splits, ~max(analysis(.x)$time))
)
x$period <- do.call("c", x$period)
ggplot(x) +
aes(period, z) +
geom_line() +
geom_vline(xintercept = as.Date("1994-01-01"), linetype = "dashed") +
theme_minimal()
# documentation on the data pull
# https://economic.github.io/epiextractr/
# https://www2.census.gov/programs-surveys/cps/methodology/PublicUseDocumentation_final.pdf
# https://www.nber.org/system/files/chapters/c8362/c8362.pdf
# model
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment