Skip to content

Instantly share code, notes, and snippets.

@RyanGreenup
Created April 4, 2024 05:59
Show Gist options
  • Save RyanGreenup/fe6771611080af214f7b1f4d6427a4a1 to your computer and use it in GitHub Desktop.
Save RyanGreenup/fe6771611080af214f7b1f4d6427a4a1 to your computer and use it in GitHub Desktop.
Week 5 Bootstrap

Notes

Main Bootstrap

## This is my population
n = 30
pop <- iris[,1]
hist(pop)

mean(pop)


sampling_dist <- replicate(10^4, {
    samp <- sample(pop, n)
    mean(samp)
})

p <- 5 / 100
u <- 1 - p/2
l <- 1 - u


## 95% probability that a sample has a mean between this value
## likewise, 95% probability that a sample captures pop parameter
## between this value
hist(sampling_dist)
(q <- quantile(sampling_dist, c(l, u)))
abline(v = q[1], col="red", lwd=3)
abline(v = q[2], col="red", lwd=3)


mean(sampling_dist)
mean(pop)

var(sampling_dist)
var(pop) / n



## We got this sample
set.seed(1234)
samp <- sample(pop, n)


boot_sample_means <- replicate(10^4, {
    boot_samp <- sample(samp, replace = TRUE)
    mean(boot_samp)
})

## 95% probability that this sample captured the population parameter
hist(boot_sample_means)
(q <- quantile(boot_sample_means, c(l, u)))
abline(v = q[1], col="red", lwd=3)
abline(v = q[2], col="red", lwd=3)

Workshop Explanation

Means

Population Mean

Let

  • $X = \left{x_1, x_2, x_3, \ldots \right}$ be the pop
    • $m$ be some sample of $X$

$$ \mu = \frac{1}{N} \sum^n_{i=1} \left[ x_i \right] $$

Sample Mean

$$ \overline{x} = \frac{1}{N} \sum^n_{i=1} \left[ m_i \right] $$

Mean of Means

$$ \begin{aligned} \overline{x} &= \frac{1}{N} \sum^n_{i=1} \left[ \overline{x}{ij} \right] \ &= \frac{1}{N} \sum^n{i=1} \left[ \frac{1}{N} \sum^n_{i=1} \left[ m_{ij} \right] \right] \ &= \mu \end{aligned} $$

Variance

Population

$$ \sigma = \mathrm{E}\left[ \left(X - \mathrm{E}\left(X\right)\right)^2 \right] $$

Sample -- Base's Correction

$$ \frac{1}{n-1} \sum^n_{i=1} \left[ \left(x_i - \overline{x}\right)^2 \right] $$

Sampling Distibution

$$ \sigma_{\overline{x}} = \frac{\sigma}{\sqrt{n}} $$

Where:

  • $\sigma$ is defined as above
  • $n$ is sample size

Workshop

library(tidyverse)
library(Lock5Data)


d <- Lock5Data::NutritionStudy

head(d)
str(d)
glimpse(d)


## Plot it ---------------------------------------------------------------------
## Tidyverse....................................................................
## Choose vitamin and fat
(d <- tibble(d) |>
    select(Vitamin, Fat) |> # <- d[,c("Vitamin", "Fat")]
    mutate(Vitamin = factor(Vitamin))) # <- factor(d$Vitamin)
d |>
    ggplot(aes(x = Vitamin, y = Fat, col = Vitamin)) +
    geom_jitter(width = 0.052, height = 0) +
    geom_boxplot() +
    theme_classic() +
    labs(title = "Fat for different Vitamins")

## Base Plot ...................................................................

d <- d[, c("Vitamin", "Fat")]
d$Vitamin <- factor(d$Vitamin)

cols <- 1:3
boxplot(Fat ~ Vitamin, d, col = 1:3, main = "Fat for Vitamins")

## Part 2 ----------------------------------------------------------------------
## Tidyverse....................................................................

(d <- Lock5Data::NutritionStudy |>
    select(Fat, Calories)) |>
    glimpse()

d |>
    ggplot(aes(x = Fat, y = Calories, col = Calories)) +
    geom_point() +
    stat_smooth(se = FALSE)


## Base Plot ...................................................................
##       Y   ~ X
f <- Calories ~ Fat
## Create a plot
plot(f, d,
    main = "Calories given Fat", col = "royalblue",
    cex = 1, # Size
    pch = 19 # Symbol
)
## Create a Linear Regression
mod <- lm(f, d)
## Add the model over the top
abline(mod, col = "red", lty = "dashed", lwd = 2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment