Last active
December 8, 2020 15:08
-
-
Save ericpgreen/b5290cf081f515e2ee98435c996e515a to your computer and use it in GitHub Desktop.
mrp example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# MRP for non-representative samples | |
# http://research.nivi.io/posts/2020-12-08-mister-p-helps-us-understand-vaccine-hesitancy/ | |
# Eric Green (adapted from https://www.monicaalexander.com/posts/2019-08-07-mrp/) | |
# load packages | |
library(tidyverse) | |
library(ggtext) | |
library(brms) | |
library(tidybayes) | |
set.seed(123456) | |
# notes | |
# ----------------------------------------------------------------------------- | |
# you need two datasets: | |
# 1. population weights from a census or representative survey (pop_df) | |
# 2. data from your non-representative sample (sample_obs) | |
# important!!: the datasets must share in common the variables you will use to | |
# post-stratify | |
# generate example data (you'll bring your own data and will skip this) | |
# ----------------------------------------------------------------------------- | |
# get the population data and custom ggplot function | |
load(url("https://www.dropbox.com/s/0u3lblqfv85hch3/pop_df.RData?dl=1")) | |
pop_df <- pop_df %>% | |
mutate(weight=1) | |
# calculate response frequency in the population | |
overall_pop <- pop_df %>% | |
group_by(dv) %>% | |
count() %>% | |
ungroup() %>% | |
mutate(p = n/sum(n)) %>% | |
mutate(source = "true population") %>% | |
dplyr::select(source, dv, p) | |
# draw a convenience sample that is mostly young men | |
# ---------------------------------------------------- | |
# 50 older men | |
sample_obs_m1 <- pop_df %>% | |
filter(gender=="male") %>% | |
filter(age>35) %>% | |
sample_n(50) | |
# 400 younger men | |
sample_obs_m2 <- pop_df %>% | |
filter(gender=="male") %>% | |
filter(age<=35) %>% | |
sample_n(400) | |
# 45 women | |
sample_obs_f <- pop_df %>% | |
filter(gender=="female") %>% | |
sample_n(45) | |
# 5 individuals who identified as non-binary | |
sample_obs_n <- pop_df %>% | |
filter(gender=="non-binary") %>% | |
sample_n(5) | |
# combine to make a sample of 500 | |
sample_obs <- sample_obs_f %>% | |
bind_rows(sample_obs_m1) %>% | |
bind_rows(sample_obs_m2) %>% | |
bind_rows(sample_obs_n) | |
# your work starts here | |
# ----------------------------------------------------------------------------- | |
# calculate cell proportions in the population data | |
overall_prop <- pop_df %>% | |
group_by(gender, ageCat) %>% # modify for your example | |
summarise(n = sum(weight))%>% # use the weight var provided to you | |
ungroup() %>% | |
dplyr::select(gender, ageCat, # modify for your example | |
n) %>% | |
mutate(overall="overall") %>% | |
group_by(overall) %>% | |
mutate(prop = n/sum(n)) %>% | |
ungroup() | |
# fit oridinal regression | |
# if your outcome is binary follow | |
# https://www.monicaalexander.com/posts/2019-08-07-mrp/ | |
fit <- brm(dv ~ (1|gender) + (1|ageCat), # modify for your example | |
data = sample_obs, | |
family = cumulative("probit"), | |
control = list(adapt_delta = 0.99), | |
cores = 4, # depends on your machine | |
backend = "cmdstanr") # or rstan | |
# get point estimates and credible intervals for ordinal outcome | |
overall_sample_est <- fit %>% | |
add_predicted_draws(newdata = overall_prop, | |
allow_new_levels=TRUE) %>% | |
mutate( | |
dv = .prediction, | |
p = 1 | |
) %>% | |
mutate(.prediction_prop = p*prop) %>% | |
group_by(.draw, dv) %>% | |
summarise(p = sum(.prediction_prop)) %>% | |
group_by(dv) %>% | |
mean_qi(p) %>% | |
# modify for your example | |
mutate(source = "biased sample, adjusted") %>% | |
dplyr::select(source, dv, p) %>% | |
mutate(dv = factor(dv, | |
levels=c("never", "rarely", "sometimes", "often"), | |
labels=c("never", "rarely", "sometimes", "often"), | |
ordered = TRUE)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment