MarcPer/coin_toss_power.py

## coin_toss_power.py
# Power analysis for coin tossing
# H1: coin is rigged (p1 > 0.5). Probability of heads in the alternative hypothesis.
# H0: coin is fair (p0 = 0.5). Probability of heads in the null hypothesis.
import math
import itertools


def binom_coeff(n, k):
    return math.factorial(n) / (math.factorial(k) * math.factorial(n - k))

# Return a generator for the binomial distribution, given a probability p and number of trials N
def binomial(p, N):
    return (binom_coeff(N, k) * p**k * (1 - p)**(N - k) for k in range(N + 1))

# one-sided significance level; our H1 hypothesis is about heads only
alpha = 0.05

# Return the number of heads k such that the probability of observing k or more heads is less than alpha.
def significant_k(p, N):
    significant_k = None
    sum = 0

    # Due to the distribution's symmetry, we can calculate k for which the probability of observing k or less tails is less than alpha.
    for k, coeff in enumerate(binomial(1-p, N)):
        sum += coeff
        if sum > alpha:
            break
        significant_k = k

    if significant_k is None:
        return None

    # Return the number of corresponding heads
    return N - significant_k

def statistical_power(p0, p1, N):
    p0_sig_k = significant_k(p0, N)
    if p0_sig_k is None:
        return 0

    return sum([binom_coeff(N, k) * p1**k * (1 - p1)**(N - k) for k in range(p0_sig_k, N + 1)])


def print_power_data(p0, p1, num_tosses):
    print(f"Power for p0 = {p0}, p1 = {p1}, num_tosses = {num_tosses}: {statistical_power(p0, p1, num_tosses)}")

    significants = (statistical_power(p0, p1, N) for N in itertools.count(start=1))
    for i, power in enumerate(significants):
        if power >= 0.8:
            print(f"Power is 0.8 or higher with {i+1} coin tosses for p1 = {p1}")
            break

        # prevent infinite loop
        if i > 10000:
            print("No power of 0.8 or higher found")
            break

print_power_data(0.5, 0.55, 100)
print_power_data(0.5, 0.6, 100)
print_power_data(0.5, 0.7, 100)


# Results
# Power for p0 = 0.5, p1 = 0.55, num_tosses = 100: 0.24149057549224337
# Power is 0.8 or higher with 620 coin tosses for p1 = 0.55
# Power for p0 = 0.5, p1 = 0.6, num_tosses = 100: 0.6225326761221722
# Power is 0.8 or higher with 158 coin tosses for p1 = 0.6
# Power for p0 = 0.5, p1 = 0.7, num_tosses = 100: 0.9928264374006265
# Power is 0.8 or higher with 37 coin tosses for p1 = 0.7

## coin_toss_significance.py
# Statistical significance analysis for coin toss,
# from first principles.
# At the end, the p-value calculation is also checked with scipy.

import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt

# Return array of probabilities for each k following a binomial distribution,
# given a probability p and number of trials N
def binomial(p, N):
    def binom_coeff(n, k):
        return np.math.factorial(n) / (np.math.factorial(k) * np.math.factorial(n - k))

    return [binom_coeff(N, k) * p**k * (1 - p)**(N - k) for k in range(N + 1)]


N = 20 # Number of coin tosses
p = 0.5 # Probability of heads in the null hypothesis

# Most likely number of heads
max_x = np.argmax(binomial(p, N))

# We use a two-tailed test
significance = 0.10

# Find value of k where the probability of observing k or more heads is less than significance / 2
significant_k = None
for k in range(N + 1):
    if sum(binomial(p, N)[k:]) < significance / 2:
        significant_k = k
        break

# by symmetry, the non-significant range is the same on both sides of max_x
distance = significant_k - max_x

# Plot binomial distribution; color the bars red if they are outside the significant range
for i in range(N + 1):
    if i <= max_x - distance or i >= max_x + distance:
        plt.bar(i, binomial(p, N)[i], color='red')
    else:
        plt.bar(i, binomial(p, N)[i], color='darkblue')

plt.xlabel('Number of heads', fontsize=20)
plt.ylabel('Probability', fontsize=20)
plt.xticks(fontsize=15)
plt.title("two-tailed significance level = 0.10", fontsize=25)
plt.show()

# Calculate the probability of observing k or more heads, or k or more tails
def p_value(k, p, N):
    return 2*sum(binomial(p, N)[k:])

# p-values for different outcomes on the amount of heads drawn
for x in range(21):
    print(f"({x} HEADS) p_value={p_value(x, p, N)}")

# Double-check using scipy
print(f"Scipy result = {stats.binomtest(14, N, p=0.5, alternative='two-sided')}")
	# Power analysis for coin tossing
	# H1: coin is rigged (p1 > 0.5). Probability of heads in the alternative hypothesis.
	# H0: coin is fair (p0 = 0.5). Probability of heads in the null hypothesis.
	import math
	import itertools


	def binom_coeff(n, k):
	return math.factorial(n) / (math.factorial(k) * math.factorial(n - k))

	# Return a generator for the binomial distribution, given a probability p and number of trials N
	def binomial(p, N):
	return (binom_coeff(N, k) * p*k (1 - p)**(N - k) for k in range(N + 1))

	# one-sided significance level; our H1 hypothesis is about heads only
	alpha = 0.05

	# Return the number of heads k such that the probability of observing k or more heads is less than alpha.
	def significant_k(p, N):
	significant_k = None
	sum = 0

	# Due to the distribution's symmetry, we can calculate k for which the probability of observing k or less tails is less than alpha.
	for k, coeff in enumerate(binomial(1-p, N)):
	sum += coeff
	if sum > alpha:
	break
	significant_k = k

	if significant_k is None:
	return None

	# Return the number of corresponding heads
	return N - significant_k

	def statistical_power(p0, p1, N):
	p0_sig_k = significant_k(p0, N)
	if p0_sig_k is None:
	return 0

	return sum([binom_coeff(N, k) * p1*k (1 - p1)**(N - k) for k in range(p0_sig_k, N + 1)])


	def print_power_data(p0, p1, num_tosses):
	print(f"Power for p0 = {p0}, p1 = {p1}, num_tosses = {num_tosses}: {statistical_power(p0, p1, num_tosses)}")

	significants = (statistical_power(p0, p1, N) for N in itertools.count(start=1))
	for i, power in enumerate(significants):
	if power >= 0.8:
	print(f"Power is 0.8 or higher with {i+1} coin tosses for p1 = {p1}")
	break

	# prevent infinite loop
	if i > 10000:
	print("No power of 0.8 or higher found")
	break

	print_power_data(0.5, 0.55, 100)
	print_power_data(0.5, 0.6, 100)
	print_power_data(0.5, 0.7, 100)


	# Results
	# Power for p0 = 0.5, p1 = 0.55, num_tosses = 100: 0.24149057549224337
	# Power is 0.8 or higher with 620 coin tosses for p1 = 0.55
	# Power for p0 = 0.5, p1 = 0.6, num_tosses = 100: 0.6225326761221722
	# Power is 0.8 or higher with 158 coin tosses for p1 = 0.6
	# Power for p0 = 0.5, p1 = 0.7, num_tosses = 100: 0.9928264374006265
	# Power is 0.8 or higher with 37 coin tosses for p1 = 0.7
	# Statistical significance analysis for coin toss,
	# from first principles.
	# At the end, the p-value calculation is also checked with scipy.

	import numpy as np
	import scipy.stats as stats
	import matplotlib.pyplot as plt

	# Return array of probabilities for each k following a binomial distribution,
	# given a probability p and number of trials N
	def binomial(p, N):
	def binom_coeff(n, k):
	return np.math.factorial(n) / (np.math.factorial(k) * np.math.factorial(n - k))

	return [binom_coeff(N, k) * p*k (1 - p)**(N - k) for k in range(N + 1)]


	N = 20 # Number of coin tosses
	p = 0.5 # Probability of heads in the null hypothesis

	# Most likely number of heads
	max_x = np.argmax(binomial(p, N))

	# We use a two-tailed test
	significance = 0.10

	# Find value of k where the probability of observing k or more heads is less than significance / 2
	significant_k = None
	for k in range(N + 1):
	if sum(binomial(p, N)[k:]) < significance / 2:
	significant_k = k
	break

	# by symmetry, the non-significant range is the same on both sides of max_x
	distance = significant_k - max_x

	# Plot binomial distribution; color the bars red if they are outside the significant range
	for i in range(N + 1):
	if i <= max_x - distance or i >= max_x + distance:
	plt.bar(i, binomial(p, N)[i], color='red')
	else:
	plt.bar(i, binomial(p, N)[i], color='darkblue')

	plt.xlabel('Number of heads', fontsize=20)
	plt.ylabel('Probability', fontsize=20)
	plt.xticks(fontsize=15)
	plt.title("two-tailed significance level = 0.10", fontsize=25)
	plt.show()

	# Calculate the probability of observing k or more heads, or k or more tails
	def p_value(k, p, N):
	return 2*sum(binomial(p, N)[k:])

	# p-values for different outcomes on the amount of heads drawn
	for x in range(21):
	print(f"({x} HEADS) p_value={p_value(x, p, N)}")

	# Double-check using scipy
	print(f"Scipy result = {stats.binomtest(14, N, p=0.5, alternative='two-sided')}")