Skip to content

Instantly share code, notes, and snippets.

@andrewheiss
Last active February 26, 2020 05:54
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save andrewheiss/dc1fb68700ca98710f215cc50c875929 to your computer and use it in GitHub Desktop.
Save andrewheiss/dc1fb68700ca98710f215cc50c875929 to your computer and use it in GitHub Desktop.
library(tidyverse)

# Make yard size, the presence/absence of a home garden, and attitudes toward the 
# environment all correlated with each other. Home garden will be binary, so I split 
# it based on some threshold later. Attitudes toward the environment will be on a 
# scale of 1-10, so I rescale and ceiling it later

mu <- c(yard_size = 20000, home_garden = 30, attitude_env = 70)
stddev <- c(yard_size = 10000, home_garden = 15, attitude_env = 40)

cor_matrix <- matrix(c(1.0, 0.7, 0.1,
                       0.7, 1.0, 0.9,
                       0.1, 0.9, 1.0),
                     ncol = 3)

cov_matrix <- stddev %*% t(stddev) * cor_matrix

set.seed(1234)
confounders <- MASS::mvrnorm(n = 2000, mu = mu, Sigma = cov_matrix,
                             empirical = TRUE) %>%
  as_tibble() %>% 
  # Adjust generated variables
  mutate(home_garden = home_garden > 35) %>%  # THIS IS CAUSING THE WEIRD/NEAT SPLIT
  mutate(attitude_env = scales::rescale(attitude_env, to = c(1, 10)),
         attitude_env = round(attitude_env, 0),
         yard_size = round(abs(yard_size), 0)) %>% 
  # Make new temperature variable
  mutate(temperature = round(rnorm(2000, mean = 70, sd = 5), 1)) %>%
  # Generate probability of self-selecting into the rain barrel program
  mutate(prob_barrel = (0.5 * attitude_env) + (4 * home_garden) +
           (0.05 * yard_size / 1000) + (0.3 * temperature),
         # Force probability to 0-1 scale
         prob_barrel = scales::rescale(prob_barrel, to = c(0.05, 0.85))) 

head(confounders)
#> # A tibble: 6 x 5
#>   yard_size home_garden attitude_env temperature prob_barrel
#>       <dbl> <lgl>              <dbl>       <dbl>       <dbl>
#> 1      7100 FALSE                  4        70.1       0.283
#> 2     17367 FALSE                  6        69         0.341
#> 3     19499 TRUE                   7        64.8       0.507
#> 4     33378 FALSE                  1        70.1       0.274
#> 5      6011 FALSE                  5        68.5       0.281
#> 6     12578 FALSE                  5        70.8       0.332

ggplot(confounders, aes(x = temperature, y = prob_barrel)) +
  geom_point() +
  geom_smooth(method = "lm")
#> `geom_smooth()` using formula 'y ~ x'

Created on 2020-02-25 by the reprex package (v0.3.0)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment