Skip to content

Instantly share code, notes, and snippets.

@MarcPer
Created April 6, 2024 17:26
Show Gist options
  • Save MarcPer/2f826529bbb5a7e32ec1962a1fee771e to your computer and use it in GitHub Desktop.
Save MarcPer/2f826529bbb5a7e32ec1962a1fee771e to your computer and use it in GitHub Desktop.
Coin tossing statistics
# Power analysis for coin tossing
# H1: coin is rigged (p1 > 0.5). Probability of heads in the alternative hypothesis.
# H0: coin is fair (p0 = 0.5). Probability of heads in the null hypothesis.
import math
import itertools
def binom_coeff(n, k):
return math.factorial(n) / (math.factorial(k) * math.factorial(n - k))
# Return a generator for the binomial distribution, given a probability p and number of trials N
def binomial(p, N):
return (binom_coeff(N, k) * p**k * (1 - p)**(N - k) for k in range(N + 1))
# one-sided significance level; our H1 hypothesis is about heads only
alpha = 0.05
# Return the number of heads k such that the probability of observing k or more heads is less than alpha.
def significant_k(p, N):
significant_k = None
sum = 0
# Due to the distribution's symmetry, we can calculate k for which the probability of observing k or less tails is less than alpha.
for k, coeff in enumerate(binomial(1-p, N)):
sum += coeff
if sum > alpha:
break
significant_k = k
if significant_k is None:
return None
# Return the number of corresponding heads
return N - significant_k
def statistical_power(p0, p1, N):
p0_sig_k = significant_k(p0, N)
if p0_sig_k is None:
return 0
return sum([binom_coeff(N, k) * p1**k * (1 - p1)**(N - k) for k in range(p0_sig_k, N + 1)])
def print_power_data(p0, p1, num_tosses):
print(f"Power for p0 = {p0}, p1 = {p1}, num_tosses = {num_tosses}: {statistical_power(p0, p1, num_tosses)}")
significants = (statistical_power(p0, p1, N) for N in itertools.count(start=1))
for i, power in enumerate(significants):
if power >= 0.8:
print(f"Power is 0.8 or higher with {i+1} coin tosses for p1 = {p1}")
break
# prevent infinite loop
if i > 10000:
print("No power of 0.8 or higher found")
break
print_power_data(0.5, 0.55, 100)
print_power_data(0.5, 0.6, 100)
print_power_data(0.5, 0.7, 100)
# Results
# Power for p0 = 0.5, p1 = 0.55, num_tosses = 100: 0.24149057549224337
# Power is 0.8 or higher with 620 coin tosses for p1 = 0.55
# Power for p0 = 0.5, p1 = 0.6, num_tosses = 100: 0.6225326761221722
# Power is 0.8 or higher with 158 coin tosses for p1 = 0.6
# Power for p0 = 0.5, p1 = 0.7, num_tosses = 100: 0.9928264374006265
# Power is 0.8 or higher with 37 coin tosses for p1 = 0.7
# Statistical significance analysis for coin toss,
# from first principles.
# At the end, the p-value calculation is also checked with scipy.
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
# Return array of probabilities for each k following a binomial distribution,
# given a probability p and number of trials N
def binomial(p, N):
def binom_coeff(n, k):
return np.math.factorial(n) / (np.math.factorial(k) * np.math.factorial(n - k))
return [binom_coeff(N, k) * p**k * (1 - p)**(N - k) for k in range(N + 1)]
N = 20 # Number of coin tosses
p = 0.5 # Probability of heads in the null hypothesis
# Most likely number of heads
max_x = np.argmax(binomial(p, N))
# We use a two-tailed test
significance = 0.10
# Find value of k where the probability of observing k or more heads is less than significance / 2
significant_k = None
for k in range(N + 1):
if sum(binomial(p, N)[k:]) < significance / 2:
significant_k = k
break
# by symmetry, the non-significant range is the same on both sides of max_x
distance = significant_k - max_x
# Plot binomial distribution; color the bars red if they are outside the significant range
for i in range(N + 1):
if i <= max_x - distance or i >= max_x + distance:
plt.bar(i, binomial(p, N)[i], color='red')
else:
plt.bar(i, binomial(p, N)[i], color='darkblue')
plt.xlabel('Number of heads', fontsize=20)
plt.ylabel('Probability', fontsize=20)
plt.xticks(fontsize=15)
plt.title("two-tailed significance level = 0.10", fontsize=25)
plt.show()
# Calculate the probability of observing k or more heads, or k or more tails
def p_value(k, p, N):
return 2*sum(binomial(p, N)[k:])
# p-values for different outcomes on the amount of heads drawn
for x in range(21):
print(f"({x} HEADS) p_value={p_value(x, p, N)}")
# Double-check using scipy
print(f"Scipy result = {stats.binomtest(14, N, p=0.5, alternative='two-sided')}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment