Skip to content

Instantly share code, notes, and snippets.

@pbstark
Created September 17, 2016 21:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pbstark/58653bbc26f269d4588ea7cd5b2e12bf to your computer and use it in GitHub Desktop.
Save pbstark/58653bbc26f269d4588ea7cd5b2e12bf to your computer and use it in GitHub Desktop.
calculations for hypothetical scenarios in auditing the 2106 Australian senatorial election, Tasmanian portion
# Calculate various hypotheticals for the 2016 Australian Senatorial election, for Tasmanian results.
# P.B. Stark, 17 September 2016
from __future__ import division, print_function
import math
import numpy as np
import scipy as sp
from scipy import stats # distributions
from scipy import special # special functions
from scipy import random # random variables, distributions, etc.
from scipy.optimize import brentq
from scipy.stats import (binom, hypergeom)
import permute.utils
cl = 0.95 # confidence level for these calculations
alpha = 1-cl # corresponding risk limit
valid_votes = 339159 # total votes in Tasmania
invalid_votes = 12221 # invalid votes, blank ballots, etc.
ballots = valid_votes + invalid_votes # total ballots
marginv = 141 # votes separating runner-up from getting a seat
margindv = marginv/ballots # "diluted" margin in votes
# (i.e., if an error can decrease the margin by 1 or 2)
marginb = 71 # minimum number ballots that would need to have errors to alter the outcome
margindb = marginb/ballots # "diluted" margin in ballots
print('upper bound on the diluted margin in ballots:', margindb)
print('upper bound on the diluted margin in votes:', margindv)
# Measured risk based on observing no errors.
# for sample of size n, chance of observing no errors if the true error rate is large
# enough to change the outcome is (1-marginp)^n
smallest_sample = 1000
largest_sample = 16000
sample_increment = 1000
print('sample size, measured risk if no errors are observed in the sample:')
for n in np.arange(smallest_sample, largest_sample+1, sample_increment):
print(n, (1-margindb)**n)
# upper bounds on the error rate for various sample sizes
x = 0
print('sample size, upper 95% confidence bound for error rate if no errors are observed in the sample:')
for n in np.arange(smallest_sample, largest_sample+1, sample_increment):
print(n, permute.utils.binom_conf_interval(n, x, cl=cl, alternative="upper"))
# Suppose that x errors are observed in a sample of 2500 ballots.
# lower confidence bounds on population error rate for various k
print('errors observed in a sample of 2500 ballots, lower 95% confidence bound on error rate:')
n = 2500
for x in np.arange(1, 11):
print(x, permute.utils.binom_conf_interval(n, x, cl=cl, alternative="lower"))
# Initial sample size for a risk-limiting audit, assuming the true error rates are zero
def minSampleSize(ballots, u, alpha=0.05, gamma=0.95):
'''
find smallest sample size for risk-limit alpha, using cushion gamma \in (0,1)
1/alpha = (gamma/(1-1/(ballots*u))+1-gamma)**n
Input:
ballots: number of ballots cast in the contest
u: upper bound on overstatement per ballot
gamma: hedge against finding a ballot that attains the upper bound. Larger values give
less protection
alpha: risk limit
'''
return math.ceil(math.log(1.0/alpha) / math.log(gamma/(1.0-1.0/(ballots*u)) + 1.0 - gamma))
print('Initial sample size for RLA with risk limit', alpha, ':', \
minSampleSize(ballots, 2/marginv, alpha=alpha))
@nealmcb
Copy link

nealmcb commented Jan 21, 2019

Thanks for this code!
Running it in I get ImportError: No module named permute.utils. The Python code for the permute module seems to be described in http://statlab.github.io/permute/permute.pdf
Is the code available somewhere? Or at least the permute.utils.binom_conf_interval() method?
Also, is it written for Python 3, Python 2, or both?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment