pbstark/aus_16_audit.py

## aus_16_audit.py
# Calculate various hypotheticals for the 2016 Australian Senatorial election, for Tasmanian results.
# P.B. Stark, 17 September 2016

from __future__ import division, print_function
import math
import numpy as np
import scipy as sp
from scipy import stats  # distributions
from scipy import special # special functions
from scipy import random # random variables, distributions, etc.
from scipy.optimize import brentq
from scipy.stats import (binom, hypergeom)
import permute.utils

cl = 0.95  # confidence level for these calculations
alpha = 1-cl # corresponding risk limit

valid_votes = 339159   # total votes in Tasmania
invalid_votes = 12221  # invalid votes, blank ballots, etc.
ballots = valid_votes + invalid_votes # total ballots

marginv = 141 # votes separating runner-up from getting a seat
margindv = marginv/ballots  # "diluted" margin in votes
                            # (i.e., if an error can decrease the margin by 1 or 2)

marginb = 71  # minimum number ballots that would need to have errors to alter the outcome
margindb = marginb/ballots  # "diluted" margin in ballots

print('upper bound on the diluted margin in ballots:', margindb)
print('upper bound on the diluted margin in votes:', margindv)

# Measured risk based on observing no errors.
# for sample of size n, chance of observing no errors if the true error rate is large
# enough to change the outcome is (1-marginp)^n

smallest_sample = 1000
largest_sample = 16000
sample_increment = 1000

print('sample size, measured risk if no errors are observed in the sample:')
for n in np.arange(smallest_sample, largest_sample+1, sample_increment):
    print(n, (1-margindb)**n)

# upper bounds on the error rate for various sample sizes
x = 0

print('sample size, upper 95% confidence bound for error rate if no errors are observed in the sample:')
for n in np.arange(smallest_sample, largest_sample+1, sample_increment):
    print(n, permute.utils.binom_conf_interval(n, x, cl=cl, alternative="upper"))

# Suppose that x errors are observed in a sample of 2500 ballots.
# lower confidence bounds on population error rate for various k

print('errors observed in a sample of 2500 ballots, lower 95% confidence bound on error rate:')
n = 2500
for x in np.arange(1, 11):
    print(x, permute.utils.binom_conf_interval(n, x, cl=cl, alternative="lower"))

# Initial sample size for a risk-limiting audit, assuming the true error rates are zero

def minSampleSize(ballots, u, alpha=0.05, gamma=0.95):
    '''
    find smallest sample size for risk-limit alpha, using cushion gamma \in (0,1)
    1/alpha = (gamma/(1-1/(ballots*u))+1-gamma)**n
    Input:
        ballots: number of ballots cast in the contest
        u:       upper bound on overstatement per ballot
        gamma:   hedge against finding a ballot that attains the upper bound. Larger values give
                 less protection
        alpha:   risk limit
    '''
    return math.ceil(math.log(1.0/alpha) / math.log(gamma/(1.0-1.0/(ballots*u)) + 1.0 - gamma))

print('Initial sample size for RLA with risk limit', alpha, ':', \
      minSampleSize(ballots, 2/marginv, alpha=alpha))
	# Calculate various hypotheticals for the 2016 Australian Senatorial election, for Tasmanian results.
	# P.B. Stark, 17 September 2016

	from __future__ import division, print_function
	import math
	import numpy as np
	import scipy as sp
	from scipy import stats # distributions
	from scipy import special # special functions
	from scipy import random # random variables, distributions, etc.
	from scipy.optimize import brentq
	from scipy.stats import (binom, hypergeom)
	import permute.utils

	cl = 0.95 # confidence level for these calculations
	alpha = 1-cl # corresponding risk limit

	valid_votes = 339159 # total votes in Tasmania
	invalid_votes = 12221 # invalid votes, blank ballots, etc.
	ballots = valid_votes + invalid_votes # total ballots

	marginv = 141 # votes separating runner-up from getting a seat
	margindv = marginv/ballots # "diluted" margin in votes
	# (i.e., if an error can decrease the margin by 1 or 2)

	marginb = 71 # minimum number ballots that would need to have errors to alter the outcome
	margindb = marginb/ballots # "diluted" margin in ballots

	print('upper bound on the diluted margin in ballots:', margindb)
	print('upper bound on the diluted margin in votes:', margindv)

	# Measured risk based on observing no errors.
	# for sample of size n, chance of observing no errors if the true error rate is large
	# enough to change the outcome is (1-marginp)^n

	smallest_sample = 1000
	largest_sample = 16000
	sample_increment = 1000

	print('sample size, measured risk if no errors are observed in the sample:')
	for n in np.arange(smallest_sample, largest_sample+1, sample_increment):
	print(n, (1-margindb)**n)

	# upper bounds on the error rate for various sample sizes
	x = 0

	print('sample size, upper 95% confidence bound for error rate if no errors are observed in the sample:')
	for n in np.arange(smallest_sample, largest_sample+1, sample_increment):
	print(n, permute.utils.binom_conf_interval(n, x, cl=cl, alternative="upper"))

	# Suppose that x errors are observed in a sample of 2500 ballots.
	# lower confidence bounds on population error rate for various k

	print('errors observed in a sample of 2500 ballots, lower 95% confidence bound on error rate:')
	n = 2500
	for x in np.arange(1, 11):
	print(x, permute.utils.binom_conf_interval(n, x, cl=cl, alternative="lower"))

	# Initial sample size for a risk-limiting audit, assuming the true error rates are zero

	def minSampleSize(ballots, u, alpha=0.05, gamma=0.95):
	'''
	find smallest sample size for risk-limit alpha, using cushion gamma \in (0,1)
	1/alpha = (gamma/(1-1/(ballotsu))+1-gamma)*n
	Input:
	ballots: number of ballots cast in the contest
	u: upper bound on overstatement per ballot
	gamma: hedge against finding a ballot that attains the upper bound. Larger values give
	less protection
	alpha: risk limit
	'''
	return math.ceil(math.log(1.0/alpha) / math.log(gamma/(1.0-1.0/(ballots*u)) + 1.0 - gamma))

	print('Initial sample size for RLA with risk limit', alpha, ':', \
	minSampleSize(ballots, 2/marginv, alpha=alpha))