Skip to content

Instantly share code, notes, and snippets.

@FrankRuns
Created January 23, 2024 11:20
Show Gist options
  • Save FrankRuns/b8413d6194af859a05640ccc07d96dfa to your computer and use it in GitHub Desktop.
Save FrankRuns/b8413d6194af859a05640ccc07d96dfa to your computer and use it in GitHub Desktop.
Calculations supporting substack post called Analytics, now what?
# Replicate Bob's results from this LinkedIn post:
# https://www.linkedin.com/posts/bob-wilson-77a22ab_people-sometimes-say-ab-testing-requires-activity-7152792859878871040-X1Sr?utm_source=share&utm_medium=member_desktop
### Implement Fisher's Exact Test
# Create the contingency table
contingency_table <- matrix(c(0, 4, 7, 3), nrow = 2)
dimnames(contingency_table) <- list(c("Control", "Treatment"),
c("Successes", "Failures"))
# Fisher's Exact Test is used here to determine if there are nonrandom
# associations between two categorical variables in our contingency table.
fisher_test_result <- fisher.test(contingency_table, alternative = "greater")
p_value <- fisher_test_result$p.value
### Implement Bob's hypergeometric distribution approach
# The hypergeometric distribution helps in understanding the likelihood of a
# specific number of successes in a sample, given the total number of successes
# and failures in the population.
# Parameters for the hypergeometric distribution
M <- 14 # Total number of objects (total trials)
n <- 3 # Total number of Type I objects (total successes)
N <- 7 # Total number of objects drawn (trials in one group)
# The number of successes in the treatment group
x <- 3
# Calculate the one-sided p-value using the hypergeometric distribution
p_value <- phyper(x - 1, n, M - n, N, lower.tail = FALSE)
### Bayesian approach inspecting treatment data
# In the Bayesian approach, we start with prior beliefs about the probability
# of success (here, a 2:1 prior), and then update these beliefs based on the
# observed data (successes and trials).
# Define the parameters (with 2:1 prior)
prior_a <- 2
prior_b <- 1
successes <- 4
trials <- 7
# Calculate the posterior parameters
posterior_a <- prior_a + successes
posterior_b <- prior_b + trials - successes
# Create a sequence of probabilities for plotting the posterior
probabilities <- seq(0, 1, length.out = 100)
# Compute the posterior distribution
posterior <- dbeta(probabilities, posterior_a, posterior_b)
# Plot the posterior distribution
plot(probabilities, posterior, type = 'l', col = 'blue',
xlab = 'Probability of Effectiveness', ylab = 'Density',
main = 'Posterior Distribution of White Noise Machine Effectiveness')
# Add a line for the prior distribution for comparison
lines(probabilities, dbeta(probabilities, prior_a, prior_b), col = 'red')
legend('topright', legend = c('Posterior', 'Prior'), col = c('blue', 'red'), lty = 1)
# How much area under my prior curve?
# Calculating the area to the left of 0.5
area_left_of_0_5 <- pbeta(0.5, prior_a, prior_b)
# Calculating the area to the right of 0.5
area_right_of_0_5 <- 1 - area_left_of_0_5
# Printing the results
cat("Area to the left of 0.5: ", area_left_of_0_5, "\n")
cat("Area to the right of 0.5: ", area_right_of_0_5, "\n")
# How much area under my posterior curve?
# Your existing code for posterior parameters
posterior_a <- prior_a + successes
posterior_b <- prior_b + trials - successes
# Calculating the area to the left of 0.5 for the posterior distribution
posterior_area_left_of_0_5 <- pbeta(0.5, posterior_a, posterior_b)
# Calculating the area to the right of 0.5 for the posterior distribution
posterior_area_right_of_0_5 <- 1 - posterior_area_left_of_0_5
# Printing the results
cat("Posterior distribution - Area to the left of 0.5: ", posterior_area_left_of_0_5, "\n")
cat("Posterior distribution - Area to the right of 0.5: ", posterior_area_right_of_0_5, "\n")
### Bayesian approach compare treatment and control (with prior of 2:1)
# Prior parameters
prior_a <- 2
prior_b <- 1
# Data
treatment_successes <- 4
treatment_trials <- 7
control_successes <- 0
control_trials <- 7
# Posterior parameters for treatment and control
posterior_a_treatment <- prior_a + treatment_successes
posterior_b_treatment <- prior_b + treatment_trials - treatment_successes
posterior_a_control <- prior_a + control_successes
posterior_b_control <- prior_b + control_trials - control_successes
# Probability sequence for plotting
probabilities <- seq(0, 1, length.out = 100)
# Posterior distributions
posterior_treatment <- dbeta(probabilities, posterior_a_treatment, posterior_b_treatment)
posterior_control <- dbeta(probabilities, posterior_a_control, posterior_b_control)
# Find the maximum density values for both distributions
max_density_treatment <- max(posterior_treatment)
max_density_control <- max(posterior_control)
max_density <- max(max_density_treatment, max_density_control)
# Adjust ylim to include the maximum density value
plot(probabilities, posterior_treatment, type = 'l', col = 'blue',
xlab = 'Probability of Effectiveness', ylab = 'Density',
main = 'Posterior Distributions',
ylim = c(0, max_density * 1.1)) # Adding a 10% buffer to ensure visibility
lines(probabilities, posterior_control, col = 'red')
legend('topright', legend = c('Treatment', 'Control'), col = c('blue', 'red'), lty = 1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment