cmrnp/CLT_30_demo.Rmd

## CLT_30_demo.Rmd
---
title: "CLT rule of thumb demo"
author: "Cameron Patrick"
date: "2024-03-15"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(
  echo = TRUE,
  message = FALSE,
  warning = FALSE,
  dpi = 300
)
```

```{r}
library(tidyverse)
library(glue)
library(gt)
library(cowplot)
theme_set(theme_cowplot(font_size = 11, rel_small = 9/11, rel_tiny = 9/11))
set.seed(98765)
```

## Distribution of sample means

```{r}
get_data <- function(n, k, rdist) {
  tibble(
    x = rdist(k*n)
  ) %>%
    group_by(grp = floor((row_number()-1) / k)) %>%
    summarise(conf.low = mean(x) - qt(0.975, k - 1)*sd(x)/sqrt(k),
              conf.high = mean(x) + qt(0.975, k - 1)*sd(x)/sqrt(k),
              x = mean(x))
}

dat <- tribble(
  ~dist_name, ~dist_fn, ~true_mean,
  "Uniform", \(n) runif(n, 0, 1), 0.5,
  "Bern(0.1)", \(n) rbinom(n, 1, 0.1), 0.1,
  "Exp(1)", \(n) rexp(n, 1), 1,
  "Pois(5)", \(n) rpois(n, 5), 5,
  "Lognormal(0,1)", \(n) rlnorm(n, 0, 1), exp(0.5),
) %>%
  mutate(dist_name = fct_inorder(dist_name)) %>%
  expand_grid(k = c(1, 10, 30, 100)) %>%
  rowwise(everything()) %>%
  reframe(get_data(10000, k, dist_fn))
```

```{r, fig.width = 8, fig.height = 8}
dat %>%
  mutate(panel_label = fct_inorder(glue("{dist_name}, n = {k}"))) %>%
  ggplot(aes(x = x)) +
  geom_histogram() +
  scale_y_continuous(breaks = NULL) +
  scale_x_continuous(breaks = NULL) +
  facet_wrap(vars(panel_label), nrow = length(unique(dat$dist_name)), scales = "free") +
  panel_border() +
  labs(x = NULL, y = NULL, title = "CLT just kicked in yo") +
  theme(plot.title.position = "plot")
ggsave("CLT_30_demo.png", width = 8, height = 8, dpi = 600, bg = "white")
```

## Coverage of interval estimates

```{r}
dat2 <- tribble(
  ~dist_name, ~dist_fn, ~true_mean,
  "Uniform", \(n) runif(n, 0, 1), 0.5,
  "Bern(0.1)", \(n) rbinom(n, 1, 0.1), 0.1,
  "Exp(1)", \(n) rexp(n, 1), 1,
  "Pois(5)", \(n) rpois(n, 5), 5,
  "Lognormal(0,1)", \(n) rlnorm(n, 0, 1), exp(0.5),
) %>%
  mutate(dist_name = fct_inorder(dist_name)) %>%
  expand_grid(k = c(10, 30, 60, 100, 200, 300)) %>%
  rowwise(everything()) %>%
  reframe(get_data(10000, k, dist_fn))
```

```{r}
tails2 <- dat2 %>%
  group_by(dist_name, k) %>%
  summarise(Q2.5 = quantile(x, 0.025),
            Q97.5 = quantile(x, 0.975),
            norm_Q2.5 = mean(x) - qt(0.975, first(k) - 1)*sd(x),
            norm_Q97.5 = mean(x) + qt(0.975, first(k) - 1)*sd(x),
            coverage = mean(true_mean >= conf.low & true_mean <= conf.high))
tails2 %>%
  gt() %>%
  fmt_number(columns = Q2.5:norm_Q97.5, decimals = 3) %>%
  fmt_number(columns = coverage, decimals = 3)
```

```{r, fig.width = 6, fig.height = 4}
ggplot(tails2, aes(x = k, y = coverage, group = dist_name, colour = dist_name)) +
  geom_line() +
  geom_point() +
  scale_x_continuous(breaks = unique(tails2$k)) +
  scale_y_continuous(limits = c(0.5, 1.0)) +
  geom_hline(yintercept = 0.95, linetype = "dotted") +
  labs(x = "n (sample size)", y = "Coverage of nominal 95% CI", colour = NULL) +
  theme(legend.position = c(0.05, 0.05), legend.justification = c(0, 0))
ggsave("CLT_30_coverage.png", width = 6, height = 4, dpi = 600, bg = "white")
```
	---
	title: "CLT rule of thumb demo"
	author: "Cameron Patrick"
	date: "2024-03-15"
	output: html_document
	---

	```{r setup, include=FALSE}
	knitr::opts_chunk$set(
	echo = TRUE,
	message = FALSE,
	warning = FALSE,
	dpi = 300
	)
	```

	```{r}
	library(tidyverse)
	library(glue)
	library(gt)
	library(cowplot)
	theme_set(theme_cowplot(font_size = 11, rel_small = 9/11, rel_tiny = 9/11))
	set.seed(98765)
	```

	## Distribution of sample means

	```{r}
	get_data <- function(n, k, rdist) {
	tibble(
	x = rdist(k*n)
	) %>%
	group_by(grp = floor((row_number()-1) / k)) %>%
	summarise(conf.low = mean(x) - qt(0.975, k - 1)*sd(x)/sqrt(k),
	conf.high = mean(x) + qt(0.975, k - 1)*sd(x)/sqrt(k),
	x = mean(x))
	}

	dat <- tribble(
	~dist_name, ~dist_fn, ~true_mean,
	"Uniform", \(n) runif(n, 0, 1), 0.5,
	"Bern(0.1)", \(n) rbinom(n, 1, 0.1), 0.1,
	"Exp(1)", \(n) rexp(n, 1), 1,
	"Pois(5)", \(n) rpois(n, 5), 5,
	"Lognormal(0,1)", \(n) rlnorm(n, 0, 1), exp(0.5),
	) %>%
	mutate(dist_name = fct_inorder(dist_name)) %>%
	expand_grid(k = c(1, 10, 30, 100)) %>%
	rowwise(everything()) %>%
	reframe(get_data(10000, k, dist_fn))
	```

	```{r, fig.width = 8, fig.height = 8}
	dat %>%
	mutate(panel_label = fct_inorder(glue("{dist_name}, n = {k}"))) %>%
	ggplot(aes(x = x)) +
	geom_histogram() +
	scale_y_continuous(breaks = NULL) +
	scale_x_continuous(breaks = NULL) +
	facet_wrap(vars(panel_label), nrow = length(unique(dat$dist_name)), scales = "free") +
	panel_border() +
	labs(x = NULL, y = NULL, title = "CLT just kicked in yo") +
	theme(plot.title.position = "plot")
	ggsave("CLT_30_demo.png", width = 8, height = 8, dpi = 600, bg = "white")
	```

	## Coverage of interval estimates

	```{r}
	dat2 <- tribble(
	~dist_name, ~dist_fn, ~true_mean,
	"Uniform", \(n) runif(n, 0, 1), 0.5,
	"Bern(0.1)", \(n) rbinom(n, 1, 0.1), 0.1,
	"Exp(1)", \(n) rexp(n, 1), 1,
	"Pois(5)", \(n) rpois(n, 5), 5,
	"Lognormal(0,1)", \(n) rlnorm(n, 0, 1), exp(0.5),
	) %>%
	mutate(dist_name = fct_inorder(dist_name)) %>%
	expand_grid(k = c(10, 30, 60, 100, 200, 300)) %>%
	rowwise(everything()) %>%
	reframe(get_data(10000, k, dist_fn))
	```

	```{r}
	tails2 <- dat2 %>%
	group_by(dist_name, k) %>%
	summarise(Q2.5 = quantile(x, 0.025),
	Q97.5 = quantile(x, 0.975),
	norm_Q2.5 = mean(x) - qt(0.975, first(k) - 1)*sd(x),
	norm_Q97.5 = mean(x) + qt(0.975, first(k) - 1)*sd(x),
	coverage = mean(true_mean >= conf.low & true_mean <= conf.high))
	tails2 %>%
	gt() %>%
	fmt_number(columns = Q2.5:norm_Q97.5, decimals = 3) %>%
	fmt_number(columns = coverage, decimals = 3)
	```

	```{r, fig.width = 6, fig.height = 4}
	ggplot(tails2, aes(x = k, y = coverage, group = dist_name, colour = dist_name)) +
	geom_line() +
	geom_point() +
	scale_x_continuous(breaks = unique(tails2$k)) +
	scale_y_continuous(limits = c(0.5, 1.0)) +
	geom_hline(yintercept = 0.95, linetype = "dotted") +
	labs(x = "n (sample size)", y = "Coverage of nominal 95% CI", colour = NULL) +
	theme(legend.position = c(0.05, 0.05), legend.justification = c(0, 0))
	ggsave("CLT_30_coverage.png", width = 6, height = 4, dpi = 600, bg = "white")
	```