Skip to content

Instantly share code, notes, and snippets.

@bschneidr
Last active February 25, 2021 20:26
Show Gist options
  • Save bschneidr/5c15f1374c5a45b93de63d6add6c621b to your computer and use it in GitHub Desktop.
Save bschneidr/5c15f1374c5a45b93de63d6add6c621b to your computer and use it in GitHub Desktop.
Visualizing Ratio GREG Estimator
library(magrittr)
library(tidyverse)
library(readr)
library(readxl)
library(schneidr)
theme_set(theme_schneidr(base_font_family = 'sans',
titles_font_family = 'sans'))
# Generate population
x <- rnorm(n = 50, mean = 5, sd = 1)
y <- 2*x + rnorm(n = 50, mean = 0, sd = 1)
r_squared <- summary(lm(y ~ -1 + x))$adj.r.squared
Y_total <- sum(y)
X_total <- sum(x)
Y_mean <- mean(y)
X_mean <- mean(x)
# Generate samples
S <- combn(x = 1:50, m = 3, simplify = FALSE)
sample_estimates <- lapply(S, function(s) {
c('xbar' = mean(x[s]),
'ybar' = mean(y[s]))
})
sample_estimates <- do.call(rbind, sample_estimates)
sample_estimates <- as.data.frame(sample_estimates)
sample_estimates <- sample_estimates %>%
mutate(b = ybar/xbar,
y_ratio = X_total * b,
ybar_ratio = X_mean * b)
# Plot the results
plot_data <- sample_estimates %>%
sample_n(size = 30) %>%
mutate(sample_index = row_number()) %>%
select(sample_index, xbar, ybar, ybar_ratio) %>%
mutate(which_better = case_when(abs(ybar - Y_mean) > abs(ybar_ratio - Y_mean) ~ 'ratio',
abs(ybar - Y_mean) < abs(ybar_ratio - Y_mean) ~ 'mean')) %>%
tidyr::gather(key = 'estimator', value = 'estimate',
matches('ybar'))
plot_data %>%
mutate(xbar = case_when(estimator == "ybar_ratio" ~ X_mean,
TRUE ~ xbar)) %>%
arrange(estimator) %>%
ggplot(aes(x = xbar, y = estimate)) +
geom_hline(yintercept = Y_mean) +
annotate('text', y = 10, x = Y_mean, vjust = -0.5,
label = "True mean of Y") +
geom_vline(xintercept = X_mean) +
annotate('text', y = 20, x = X_mean, hjust = 1,
label = "True mean of X ") +
geom_point(data = plot_data %>%
filter(estimator == "ybar"),
aes(x = xbar, y = estimate, group = sample_index)) +
geom_path(aes(group = sample_index,
color = which_better),
arrow = arrow(length = unit(0.075, 'inches'))) +
geom_abline(intercept = 0, slope = Y_mean/X_mean) +
annotate('text', x = 10, y = 10 * (Y_mean/X_mean), hjust = -0.25,
label = "True ratio of means") +
scale_x_continuous(limits = c(0, 15)) +
scale_y_continuous(limits = c(0, 25)) +
labs(
y = 'estimate of\nY-bar',
x = 'x-bar',
title = "Simple means versus ratio (GREG) estimates",
subtitle = "True means (vertical and horizontal lines)\nSample means (points)\ncorresponding GREG estimate given by arrow"
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment