Skip to content

Instantly share code, notes, and snippets.

@ericpgreen
Last active December 8, 2020 15:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ericpgreen/b5290cf081f515e2ee98435c996e515a to your computer and use it in GitHub Desktop.
Save ericpgreen/b5290cf081f515e2ee98435c996e515a to your computer and use it in GitHub Desktop.
mrp example
# MRP for non-representative samples
# http://research.nivi.io/posts/2020-12-08-mister-p-helps-us-understand-vaccine-hesitancy/
# Eric Green (adapted from https://www.monicaalexander.com/posts/2019-08-07-mrp/)
# load packages
library(tidyverse)
library(ggtext)
library(brms)
library(tidybayes)
set.seed(123456)
# notes
# -----------------------------------------------------------------------------
# you need two datasets:
# 1. population weights from a census or representative survey (pop_df)
# 2. data from your non-representative sample (sample_obs)
# important!!: the datasets must share in common the variables you will use to
# post-stratify
# generate example data (you'll bring your own data and will skip this)
# -----------------------------------------------------------------------------
# get the population data and custom ggplot function
load(url("https://www.dropbox.com/s/0u3lblqfv85hch3/pop_df.RData?dl=1"))
pop_df <- pop_df %>%
mutate(weight=1)
# calculate response frequency in the population
overall_pop <- pop_df %>%
group_by(dv) %>%
count() %>%
ungroup() %>%
mutate(p = n/sum(n)) %>%
mutate(source = "true population") %>%
dplyr::select(source, dv, p)
# draw a convenience sample that is mostly young men
# ----------------------------------------------------
# 50 older men
sample_obs_m1 <- pop_df %>%
filter(gender=="male") %>%
filter(age>35) %>%
sample_n(50)
# 400 younger men
sample_obs_m2 <- pop_df %>%
filter(gender=="male") %>%
filter(age<=35) %>%
sample_n(400)
# 45 women
sample_obs_f <- pop_df %>%
filter(gender=="female") %>%
sample_n(45)
# 5 individuals who identified as non-binary
sample_obs_n <- pop_df %>%
filter(gender=="non-binary") %>%
sample_n(5)
# combine to make a sample of 500
sample_obs <- sample_obs_f %>%
bind_rows(sample_obs_m1) %>%
bind_rows(sample_obs_m2) %>%
bind_rows(sample_obs_n)
# your work starts here
# -----------------------------------------------------------------------------
# calculate cell proportions in the population data
overall_prop <- pop_df %>%
group_by(gender, ageCat) %>% # modify for your example
summarise(n = sum(weight))%>% # use the weight var provided to you
ungroup() %>%
dplyr::select(gender, ageCat, # modify for your example
n) %>%
mutate(overall="overall") %>%
group_by(overall) %>%
mutate(prop = n/sum(n)) %>%
ungroup()
# fit oridinal regression
# if your outcome is binary follow
# https://www.monicaalexander.com/posts/2019-08-07-mrp/
fit <- brm(dv ~ (1|gender) + (1|ageCat), # modify for your example
data = sample_obs,
family = cumulative("probit"),
control = list(adapt_delta = 0.99),
cores = 4, # depends on your machine
backend = "cmdstanr") # or rstan
# get point estimates and credible intervals for ordinal outcome
overall_sample_est <- fit %>%
add_predicted_draws(newdata = overall_prop,
allow_new_levels=TRUE) %>%
mutate(
dv = .prediction,
p = 1
) %>%
mutate(.prediction_prop = p*prop) %>%
group_by(.draw, dv) %>%
summarise(p = sum(.prediction_prop)) %>%
group_by(dv) %>%
mean_qi(p) %>%
# modify for your example
mutate(source = "biased sample, adjusted") %>%
dplyr::select(source, dv, p) %>%
mutate(dv = factor(dv,
levels=c("never", "rarely", "sometimes", "often"),
labels=c("never", "rarely", "sometimes", "often"),
ordered = TRUE))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment