andrewheiss/bayes_imputation.md

## bayes_imputation.md

      
    Raw
  

              bayes_imputation.md
            
          
    library(tidyverse)
library(brms)
library(Amelia)
library(broom)

set.seed(1234)
data("africa")

# Impute missing data with Amelia (since it handles country/year panel stuff really well)
imp_amelia <- amelia(x = africa, m = 5, cs = "country", ts = "year", logs = "gdp_pc", p2s = 0)

# Nest the imputed models into a single data frame
all_imputations <- bind_rows(imp_amelia$imputations, .id = "m") %>%
  group_by(m) %>%
  nest()
all_imputations
#> # A tibble: 5 x 2
#> # Groups:   m [5]
#>   m     data              
#>   <chr> <list>            
#> 1 imp1  <tibble [120 × 7]>
#> 2 imp2  <tibble [120 × 7]>
#> 3 imp3  <tibble [120 × 7]>
#> 4 imp4  <tibble [120 × 7]>
#> 5 imp5  <tibble [120 × 7]>

# Run lm() on each imputed dataset
models_imputations_freq <- all_imputations %>%
  mutate(model = data %>% map(~ lm(gdp_pc ~ trade + civlib, data = .)),
         tidied = model %>% map(~ tidy(., conf.int = TRUE)),
         glance = model %>% map(~ glance(.)))
models_imputations_freq
#> # A tibble: 5 x 5
#> # Groups:   m [5]
#>   m     data               model  tidied           glance           
#>   <chr> <list>             <list> <list>           <list>           
#> 1 imp1  <tibble [120 × 7]> <lm>   <tibble [3 × 7]> <tibble [1 × 11]>
#> 2 imp2  <tibble [120 × 7]> <lm>   <tibble [3 × 7]> <tibble [1 × 11]>
#> 3 imp3  <tibble [120 × 7]> <lm>   <tibble [3 × 7]> <tibble [1 × 11]>
#> 4 imp4  <tibble [120 × 7]> <lm>   <tibble [3 × 7]> <tibble [1 × 11]>
#> 5 imp5  <tibble [120 × 7]> <lm>   <tibble [3 × 7]> <tibble [1 × 11]>

# Run brm() on each imputed dataset. This feels wildly inefficient though! And it takes forever!
models_imputations_bayes <- all_imputations %>%
  mutate(model = data %>% map(~ brm(gdp_pc ~ trade + civlib, data = .)),
         tidied = model %>% map(~ tidy(.)))
models_imputations_bayes
#> # A tibble: 5 x 4
#> # Groups:   m [5]
#>   m     data               model     tidied          
#>   <chr> <list>             <list>    <list>          
#> 1 imp1  <tibble [120 × 7]> <brmsfit> <df[,5] [5 × 5]>
#> 2 imp2  <tibble [120 × 7]> <brmsfit> <df[,5] [5 × 5]>
#> 3 imp3  <tibble [120 × 7]> <brmsfit> <df[,5] [5 × 5]>
#> 4 imp4  <tibble [120 × 7]> <brmsfit> <df[,5] [5 × 5]>
#> 5 imp5  <tibble [120 × 7]> <brmsfit> <df[,5] [5 × 5]>

# Now I can pull out the estimates from the chains using tidybayes stuff or
# whatever, and then calculate stuff on the posteriors, but I have 5 different
# posteriors now (or 20 chains), and that's a lot?
#
# Surely there's a better way?
^{Created on 2020-06-29 by the reprex package (v0.3.0)}