Created
April 6, 2024 17:26
-
-
Save MarcPer/2f826529bbb5a7e32ec1962a1fee771e to your computer and use it in GitHub Desktop.
Coin tossing statistics
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Power analysis for coin tossing | |
# H1: coin is rigged (p1 > 0.5). Probability of heads in the alternative hypothesis. | |
# H0: coin is fair (p0 = 0.5). Probability of heads in the null hypothesis. | |
import math | |
import itertools | |
def binom_coeff(n, k): | |
return math.factorial(n) / (math.factorial(k) * math.factorial(n - k)) | |
# Return a generator for the binomial distribution, given a probability p and number of trials N | |
def binomial(p, N): | |
return (binom_coeff(N, k) * p**k * (1 - p)**(N - k) for k in range(N + 1)) | |
# one-sided significance level; our H1 hypothesis is about heads only | |
alpha = 0.05 | |
# Return the number of heads k such that the probability of observing k or more heads is less than alpha. | |
def significant_k(p, N): | |
significant_k = None | |
sum = 0 | |
# Due to the distribution's symmetry, we can calculate k for which the probability of observing k or less tails is less than alpha. | |
for k, coeff in enumerate(binomial(1-p, N)): | |
sum += coeff | |
if sum > alpha: | |
break | |
significant_k = k | |
if significant_k is None: | |
return None | |
# Return the number of corresponding heads | |
return N - significant_k | |
def statistical_power(p0, p1, N): | |
p0_sig_k = significant_k(p0, N) | |
if p0_sig_k is None: | |
return 0 | |
return sum([binom_coeff(N, k) * p1**k * (1 - p1)**(N - k) for k in range(p0_sig_k, N + 1)]) | |
def print_power_data(p0, p1, num_tosses): | |
print(f"Power for p0 = {p0}, p1 = {p1}, num_tosses = {num_tosses}: {statistical_power(p0, p1, num_tosses)}") | |
significants = (statistical_power(p0, p1, N) for N in itertools.count(start=1)) | |
for i, power in enumerate(significants): | |
if power >= 0.8: | |
print(f"Power is 0.8 or higher with {i+1} coin tosses for p1 = {p1}") | |
break | |
# prevent infinite loop | |
if i > 10000: | |
print("No power of 0.8 or higher found") | |
break | |
print_power_data(0.5, 0.55, 100) | |
print_power_data(0.5, 0.6, 100) | |
print_power_data(0.5, 0.7, 100) | |
# Results | |
# Power for p0 = 0.5, p1 = 0.55, num_tosses = 100: 0.24149057549224337 | |
# Power is 0.8 or higher with 620 coin tosses for p1 = 0.55 | |
# Power for p0 = 0.5, p1 = 0.6, num_tosses = 100: 0.6225326761221722 | |
# Power is 0.8 or higher with 158 coin tosses for p1 = 0.6 | |
# Power for p0 = 0.5, p1 = 0.7, num_tosses = 100: 0.9928264374006265 | |
# Power is 0.8 or higher with 37 coin tosses for p1 = 0.7 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Statistical significance analysis for coin toss, | |
# from first principles. | |
# At the end, the p-value calculation is also checked with scipy. | |
import numpy as np | |
import scipy.stats as stats | |
import matplotlib.pyplot as plt | |
# Return array of probabilities for each k following a binomial distribution, | |
# given a probability p and number of trials N | |
def binomial(p, N): | |
def binom_coeff(n, k): | |
return np.math.factorial(n) / (np.math.factorial(k) * np.math.factorial(n - k)) | |
return [binom_coeff(N, k) * p**k * (1 - p)**(N - k) for k in range(N + 1)] | |
N = 20 # Number of coin tosses | |
p = 0.5 # Probability of heads in the null hypothesis | |
# Most likely number of heads | |
max_x = np.argmax(binomial(p, N)) | |
# We use a two-tailed test | |
significance = 0.10 | |
# Find value of k where the probability of observing k or more heads is less than significance / 2 | |
significant_k = None | |
for k in range(N + 1): | |
if sum(binomial(p, N)[k:]) < significance / 2: | |
significant_k = k | |
break | |
# by symmetry, the non-significant range is the same on both sides of max_x | |
distance = significant_k - max_x | |
# Plot binomial distribution; color the bars red if they are outside the significant range | |
for i in range(N + 1): | |
if i <= max_x - distance or i >= max_x + distance: | |
plt.bar(i, binomial(p, N)[i], color='red') | |
else: | |
plt.bar(i, binomial(p, N)[i], color='darkblue') | |
plt.xlabel('Number of heads', fontsize=20) | |
plt.ylabel('Probability', fontsize=20) | |
plt.xticks(fontsize=15) | |
plt.title("two-tailed significance level = 0.10", fontsize=25) | |
plt.show() | |
# Calculate the probability of observing k or more heads, or k or more tails | |
def p_value(k, p, N): | |
return 2*sum(binomial(p, N)[k:]) | |
# p-values for different outcomes on the amount of heads drawn | |
for x in range(21): | |
print(f"({x} HEADS) p_value={p_value(x, p, N)}") | |
# Double-check using scipy | |
print(f"Scipy result = {stats.binomtest(14, N, p=0.5, alternative='two-sided')}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment