Last active
September 22, 2020 02:30
-
-
Save jwdink/0f48bbeeb5449640de8666b089367395 to your computer and use it in GitHub Desktop.
regression where predictors are correlated with past values of y
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library('dplyr') | |
library('tidyr') | |
library('pbapply') | |
#' simulate() | |
#' | |
#' @param num_groups Number of groups | |
#' @param num_per_group Number of observations per group | |
#' @param true_coef The true `beta_1` in `Y_t ~ beta_0 + beta_1*X_t` | |
#' @param phi A function that takes Y_t-1,...,Y_t and returns Xt | |
#' | |
#' @return A dataframe with group_id, time, x, y | |
simulate <- function(num_groups, num_per_group, true_coef, phi) { | |
# generate groups, with varying baselines: | |
df_groups <- tibble(group_id = 1:num_groups) %>% | |
mutate(.baseline = rnorm(n = num_groups, mean = 3)) | |
out <- vector(mode = 'list', length = num_per_group) | |
for (t in (1:num_per_group)) { | |
dft <- df_groups %>% | |
mutate(.white_noise = rnorm(n=n()), time = t) | |
if (t > 1) { | |
dft <- dft %>% | |
left_join( | |
bind_rows(out) %>% | |
group_by(group_id) %>% | |
summarize(x = phi(y), .groups='drop'), | |
by = 'group_id' | |
) | |
} else { | |
dft <- dft %>% | |
group_by(group_id) %>% | |
mutate(x = phi(numeric(0))) %>% | |
ungroup() | |
} | |
out[[t]] <- dft %>% mutate(y = exp(.baseline + .white_noise + true_coef * coalesce(log(x),0))) | |
} | |
return(bind_rows(out) %>% select(-starts_with(".")) %>% arrange(group_id, time)) | |
} | |
# Simulation ---------------------------------------------------------------------------------------------------- | |
UNBIASED_PHI <- function(y) exp(rnorm(1)) | |
BIASED_PHI <- function(y) { | |
if (length(y)==0) return(exp(2)) # note: this ends up being critical for HLM, see below | |
return( mean(y) * runif(n = 1, min = .10, max = .20) ) | |
} | |
set.seed(2020-9-21) | |
TRUE_COEF <- 0.0 | |
df_sim <- simulate(num_groups = 5000, num_per_group = 10, true_coef = TRUE_COEF, phi = BIASED_PHI) | |
# Individual Models ----------------------------------------------------------------------------------------------- | |
library('ggplot2') | |
# running a bunch of individual linear-models, we see with BIASED_PHI the models under-estimate the true value | |
# (this is because x is proportional to y_lag_avg, and (in general) `cor(y, y_lag_avg) < 0`) | |
df_sim %>% | |
group_by(group_id) %>% | |
nest() %>% | |
ungroup() %>% | |
mutate(model = pblapply(data, function(.x) broom::tidy(lm(log(y) ~ log(x), data=.x)))) %>% | |
select(-data) %>% | |
unnest(cols=c(model)) %>% | |
ggplot(aes(x=term, y=estimate)) + | |
stat_summary(fun.data = mean_cl_boot) + | |
geom_hline(yintercept = 0) | |
# Mixed-Effects Model ----------------------------------------------------------------------------------------------- | |
library('lme4') | |
# a mixed-effects model can get this right! | |
# (important note: i had previously mistakenly concluded that the model will be biased, because I previously implemented | |
# BIASED_PHI to return NA on the first timestep (i.e., before we have any `ys`). This caused lmer to drop t=0 rows, | |
# which meant that `x` contained information about each group's average that the lmer model didn't have access to when | |
# calculating random-intercepts. this led to an upwardly biased coefficient on `x`) | |
bind_rows(pblapply(X = 1:250, FUN = function(i) { | |
df_sim <- simulate(num_groups = 5000, num_per_group = 10, true_coef = TRUE_COEF, phi = BIASED_PHI) | |
as.data.frame(summary(lmer( | |
log(y) ~ log(x) + ( 1 | group_id), | |
data = df_sim ) | |
)$coefficients) %>% | |
tibble::rownames_to_column('term') | |
})) %>% | |
filter(term=='log(x)') %>% | |
ggplot(aes(x=Estimate, group=abs(`t value`)>1.96, fill=abs(`t value`)>1.96)) + | |
geom_histogram() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment