RyanGreenup/wk6_pvals.r

## wk6_pvals.r
library(Lock5Data)
library(tidyverse)
dplyr::glimpse(Lock5Data::NutritionStudy)


## Get X
table(X <- Lock5Data::NutritionStudy$Smoke)
## Encode as "Does this person Smoke
table(X <- (X == "Yes"))
## How many people smoke
mean(X)
mean(!X)


## Hyptothesis
## H_0: ρ = 0.2
## H_a: ρ ≢ 0.2
##     H_α: ρ ≥ 0.2
##     H_α: ρ ≤ 0.2


n <- length(X)

## We can take a sample of something with a population of  ρ = 0.2
s <- sample(c(rep(0, 8), rep(1, 2)))
## Or
s <- sample(c(0, 1), replace = TRUE, prob = c(0.8, 0.2))
## Or
s <- mean(rbinom(length(X), 1, prob = 0.2))

## Now we can take that and get the proportion in our sample
mean(s)


## This can then be wrapped into a function to perform one experiment
sample_02_pop <- function() {
    mean(rbinom(length(X), 1, prob = 0.2))
}
sample_02_pop()

## If we run that many times we can get a distribution of sample proportions
sample_proportions <- replicate(10^4, sample_02_pop())

## The number of false positives we would see is the p-value because
## we assumed the null hypothesis
(pval <- mean(sample_proportions < mean(X)))

alpha <- 0.05

if (pval < alpha) {
    cat("Reject H0 at ", alpha)
} else {
    cat("Insufficient evidence to reject H0 at ", alpha)
}
	library(Lock5Data)
	library(tidyverse)
	dplyr::glimpse(Lock5Data::NutritionStudy)


	## Get X
	table(X <- Lock5Data::NutritionStudy$Smoke)
	## Encode as "Does this person Smoke
	table(X <- (X == "Yes"))
	## How many people smoke
	mean(X)
	mean(!X)


	## Hyptothesis
	## H_0: ρ = 0.2
	## H_a: ρ ≢ 0.2
	## H_α: ρ ≥ 0.2
	## H_α: ρ ≤ 0.2


	n <- length(X)

	## We can take a sample of something with a population of ρ = 0.2
	s <- sample(c(rep(0, 8), rep(1, 2)))
	## Or
	s <- sample(c(0, 1), replace = TRUE, prob = c(0.8, 0.2))
	## Or
	s <- mean(rbinom(length(X), 1, prob = 0.2))

	## Now we can take that and get the proportion in our sample
	mean(s)


	## This can then be wrapped into a function to perform one experiment
	sample_02_pop <- function() {
	mean(rbinom(length(X), 1, prob = 0.2))
	}
	sample_02_pop()

	## If we run that many times we can get a distribution of sample proportions
	sample_proportions <- replicate(10^4, sample_02_pop())

	## The number of false positives we would see is the p-value because
	## we assumed the null hypothesis
	(pval <- mean(sample_proportions < mean(X)))

	alpha <- 0.05

	if (pval < alpha) {
	cat("Reject H0 at ", alpha)
	} else {
	cat("Insufficient evidence to reject H0 at ", alpha)
	}