FrankRuns/white-noise.R

## white-noise.R

# Replicate Bob's results from this LinkedIn post:
# https://www.linkedin.com/posts/bob-wilson-77a22ab_people-sometimes-say-ab-testing-requires-activity-7152792859878871040-X1Sr?utm_source=share&utm_medium=member_desktop


### Implement Fisher's Exact Test

# Create the contingency table
contingency_table <- matrix(c(0, 4, 7, 3), nrow = 2)
dimnames(contingency_table) <- list(c("Control", "Treatment"),
                                    c("Successes", "Failures"))

# Fisher's Exact Test is used here to determine if there are nonrandom
# associations between two categorical variables in our contingency table.
fisher_test_result <- fisher.test(contingency_table, alternative = "greater")
p_value <- fisher_test_result$p.value


### Implement Bob's hypergeometric distribution approach

# The hypergeometric distribution helps in understanding the likelihood of a
# specific number of successes in a sample, given the total number of successes
# and failures in the population.
# Parameters for the hypergeometric distribution
M <- 14 # Total number of objects (total trials)
n <- 3  # Total number of Type I objects (total successes)
N <- 7  # Total number of objects drawn (trials in one group)

# The number of successes in the treatment group
x <- 3

# Calculate the one-sided p-value using the hypergeometric distribution
p_value <- phyper(x - 1, n, M - n, N, lower.tail = FALSE)


### Bayesian approach inspecting treatment data

# In the Bayesian approach, we start with prior beliefs about the probability
# of success (here, a 2:1 prior), and then update these beliefs based on the
# observed data (successes and trials).

# Define the parameters (with 2:1 prior)
prior_a <- 2
prior_b <- 1
successes <- 4
trials <- 7

# Calculate the posterior parameters
posterior_a <- prior_a + successes
posterior_b <- prior_b + trials - successes

# Create a sequence of probabilities for plotting the posterior
probabilities <- seq(0, 1, length.out = 100)

# Compute the posterior distribution
posterior <- dbeta(probabilities, posterior_a, posterior_b)

# Plot the posterior distribution
plot(probabilities, posterior, type = 'l', col = 'blue',
     xlab = 'Probability of Effectiveness', ylab = 'Density',
     main = 'Posterior Distribution of White Noise Machine Effectiveness')

# Add a line for the prior distribution for comparison
lines(probabilities, dbeta(probabilities, prior_a, prior_b), col = 'red')
legend('topright', legend = c('Posterior', 'Prior'), col = c('blue', 'red'), lty = 1)

# How much area under my prior curve?

# Calculating the area to the left of 0.5
area_left_of_0_5 <- pbeta(0.5, prior_a, prior_b)

# Calculating the area to the right of 0.5
area_right_of_0_5 <- 1 - area_left_of_0_5

# Printing the results
cat("Area to the left of 0.5: ", area_left_of_0_5, "\n")
cat("Area to the right of 0.5: ", area_right_of_0_5, "\n")

# How much area under my posterior curve?

# Your existing code for posterior parameters
posterior_a <- prior_a + successes
posterior_b <- prior_b + trials - successes

# Calculating the area to the left of 0.5 for the posterior distribution
posterior_area_left_of_0_5 <- pbeta(0.5, posterior_a, posterior_b)

# Calculating the area to the right of 0.5 for the posterior distribution
posterior_area_right_of_0_5 <- 1 - posterior_area_left_of_0_5

# Printing the results
cat("Posterior distribution - Area to the left of 0.5: ", posterior_area_left_of_0_5, "\n")
cat("Posterior distribution - Area to the right of 0.5: ", posterior_area_right_of_0_5, "\n")


### Bayesian approach compare treatment and control (with prior of 2:1)

# Prior parameters
prior_a <- 2
prior_b <- 1

# Data
treatment_successes <- 4
treatment_trials <- 7
control_successes <- 0
control_trials <- 7

# Posterior parameters for treatment and control
posterior_a_treatment <- prior_a + treatment_successes
posterior_b_treatment <- prior_b + treatment_trials - treatment_successes
posterior_a_control <- prior_a + control_successes
posterior_b_control <- prior_b + control_trials - control_successes

# Probability sequence for plotting
probabilities <- seq(0, 1, length.out = 100)

# Posterior distributions
posterior_treatment <- dbeta(probabilities, posterior_a_treatment, posterior_b_treatment)
posterior_control <- dbeta(probabilities, posterior_a_control, posterior_b_control)

# Find the maximum density values for both distributions
max_density_treatment <- max(posterior_treatment)
max_density_control <- max(posterior_control)
max_density <- max(max_density_treatment, max_density_control)

# Adjust ylim to include the maximum density value
plot(probabilities, posterior_treatment, type = 'l', col = 'blue',
     xlab = 'Probability of Effectiveness', ylab = 'Density',
     main = 'Posterior Distributions',
     ylim = c(0, max_density * 1.1)) # Adding a 10% buffer to ensure visibility

lines(probabilities, posterior_control, col = 'red')
legend('topright', legend = c('Treatment', 'Control'), col = c('blue', 'red'), lty = 1)

	# Replicate Bob's results from this LinkedIn post:
	# https://www.linkedin.com/posts/bob-wilson-77a22ab_people-sometimes-say-ab-testing-requires-activity-7152792859878871040-X1Sr?utm_source=share&utm_medium=member_desktop


	### Implement Fisher's Exact Test

	# Create the contingency table
	contingency_table <- matrix(c(0, 4, 7, 3), nrow = 2)
	dimnames(contingency_table) <- list(c("Control", "Treatment"),
	c("Successes", "Failures"))

	# Fisher's Exact Test is used here to determine if there are nonrandom
	# associations between two categorical variables in our contingency table.
	fisher_test_result <- fisher.test(contingency_table, alternative = "greater")
	p_value <- fisher_test_result$p.value


	### Implement Bob's hypergeometric distribution approach

	# The hypergeometric distribution helps in understanding the likelihood of a
	# specific number of successes in a sample, given the total number of successes
	# and failures in the population.
	# Parameters for the hypergeometric distribution
	M <- 14 # Total number of objects (total trials)
	n <- 3 # Total number of Type I objects (total successes)
	N <- 7 # Total number of objects drawn (trials in one group)

	# The number of successes in the treatment group
	x <- 3

	# Calculate the one-sided p-value using the hypergeometric distribution
	p_value <- phyper(x - 1, n, M - n, N, lower.tail = FALSE)


	### Bayesian approach inspecting treatment data

	# In the Bayesian approach, we start with prior beliefs about the probability
	# of success (here, a 2:1 prior), and then update these beliefs based on the
	# observed data (successes and trials).

	# Define the parameters (with 2:1 prior)
	prior_a <- 2
	prior_b <- 1
	successes <- 4
	trials <- 7

	# Calculate the posterior parameters
	posterior_a <- prior_a + successes
	posterior_b <- prior_b + trials - successes

	# Create a sequence of probabilities for plotting the posterior
	probabilities <- seq(0, 1, length.out = 100)

	# Compute the posterior distribution
	posterior <- dbeta(probabilities, posterior_a, posterior_b)

	# Plot the posterior distribution
	plot(probabilities, posterior, type = 'l', col = 'blue',
	xlab = 'Probability of Effectiveness', ylab = 'Density',
	main = 'Posterior Distribution of White Noise Machine Effectiveness')

	# Add a line for the prior distribution for comparison
	lines(probabilities, dbeta(probabilities, prior_a, prior_b), col = 'red')
	legend('topright', legend = c('Posterior', 'Prior'), col = c('blue', 'red'), lty = 1)

	# How much area under my prior curve?

	# Calculating the area to the left of 0.5
	area_left_of_0_5 <- pbeta(0.5, prior_a, prior_b)

	# Calculating the area to the right of 0.5
	area_right_of_0_5 <- 1 - area_left_of_0_5

	# Printing the results
	cat("Area to the left of 0.5: ", area_left_of_0_5, "\n")
	cat("Area to the right of 0.5: ", area_right_of_0_5, "\n")

	# How much area under my posterior curve?

	# Your existing code for posterior parameters
	posterior_a <- prior_a + successes
	posterior_b <- prior_b + trials - successes

	# Calculating the area to the left of 0.5 for the posterior distribution
	posterior_area_left_of_0_5 <- pbeta(0.5, posterior_a, posterior_b)

	# Calculating the area to the right of 0.5 for the posterior distribution
	posterior_area_right_of_0_5 <- 1 - posterior_area_left_of_0_5

	# Printing the results
	cat("Posterior distribution - Area to the left of 0.5: ", posterior_area_left_of_0_5, "\n")
	cat("Posterior distribution - Area to the right of 0.5: ", posterior_area_right_of_0_5, "\n")


	### Bayesian approach compare treatment and control (with prior of 2:1)

	# Prior parameters
	prior_a <- 2
	prior_b <- 1

	# Data
	treatment_successes <- 4
	treatment_trials <- 7
	control_successes <- 0
	control_trials <- 7

	# Posterior parameters for treatment and control
	posterior_a_treatment <- prior_a + treatment_successes
	posterior_b_treatment <- prior_b + treatment_trials - treatment_successes
	posterior_a_control <- prior_a + control_successes
	posterior_b_control <- prior_b + control_trials - control_successes

	# Probability sequence for plotting
	probabilities <- seq(0, 1, length.out = 100)

	# Posterior distributions
	posterior_treatment <- dbeta(probabilities, posterior_a_treatment, posterior_b_treatment)
	posterior_control <- dbeta(probabilities, posterior_a_control, posterior_b_control)

	# Find the maximum density values for both distributions
	max_density_treatment <- max(posterior_treatment)
	max_density_control <- max(posterior_control)
	max_density <- max(max_density_treatment, max_density_control)

	# Adjust ylim to include the maximum density value
	plot(probabilities, posterior_treatment, type = 'l', col = 'blue',
	xlab = 'Probability of Effectiveness', ylab = 'Density',
	main = 'Posterior Distributions',
	ylim = c(0, max_density * 1.1)) # Adding a 10% buffer to ensure visibility

	lines(probabilities, posterior_control, col = 'red')
	legend('topright', legend = c('Treatment', 'Control'), col = c('blue', 'red'), lty = 1)