Created
June 19, 2018 06:05
-
-
Save nealmcb/b297237c50421f2a75d5aee682cd656d to your computer and use it in GitHub Desktop.
Demo of Ron Rivest's Election Audit Sampling with Tickets
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Demo of Ron Rivest's Election Audit Sampling with Tickets" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Ron Rivest has proposed an approach, called \"sampling with tickets\", allowing a risk-limiting audit to proceed independently in different counties in parallel, or a single county at different points in time, while still achieving an easy, efficient non-stratified sample.\n", | |
"\n", | |
"This is a quick demo of Ron's approach, in Python 3" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import random\n", | |
"import hashlib\n", | |
"import heapq\n", | |
"import collections" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"Ballot = collections.namedtuple('Ballot', 'ticket, id')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def sha256(value):\n", | |
" \"\"\"Return the SHA-256 hash of the given string as a hex digest\n", | |
"\n", | |
" >>> sha256('')\n", | |
" 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855'\n", | |
" \"\"\"\n", | |
"\n", | |
" # for python 2, may need: if isinstance(value, unicode): value = value.encode('utf-8')\n", | |
" return hashlib.sha256(value.encode('utf-8')).hexdigest()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def setup_ballots(N, seed):\n", | |
" \"return a heap of ballots\"\n", | |
"\n", | |
" ballots = []\n", | |
"\n", | |
" for ballot_id in range(N):\n", | |
" heapq.heappush(ballots, Ballot(\n", | |
" int(sha256(seed + str(ballot_id)), 16) * 2 ** -256,\n", | |
" ballot_id))\n", | |
"\n", | |
" return ballots" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def sample_ballots(ballot_spec, seed, sample_sizes):\n", | |
" \"\"\"For each round size in sample_sizes, return a sample with replacement of the given size\n", | |
" ballot_spec is either an integer or (someday) the name of a manifest file.\n", | |
" sample_sizes is an array of round sizes.\n", | |
"\n", | |
" TODO: if \"ballots\" is a file name, interpret it as a manifest\n", | |
" TODO: Each round should be sorted by id.\n", | |
" \"\"\"\n", | |
"\n", | |
" if isinstance(ballot_spec, int):\n", | |
" ballots = setup_ballots(ballot_spec, seed)\n", | |
"\n", | |
" # Maintain the array \"ballots\" as a heap with the invariant that all ballots\n", | |
" # are uniformly distributed over the range [z, 1.0], where z is the ticket\n", | |
" # number of the most recent sample.\n", | |
" \n", | |
" samples = []\n", | |
" for sample_size in sample_sizes:\n", | |
" sample = []\n", | |
" for _ in range (sample_size):\n", | |
" next_selection = heapq.heappop(ballots)\n", | |
" sample.append(next_selection)\n", | |
" next_selection = Ballot(random.uniform(next_selection.ticket, 1.0), next_selection.id)\n", | |
" heapq.heappush(ballots, next_selection)\n", | |
" samples.append(sample)\n", | |
"\n", | |
" merged_samples = [sample for round in samples for sample in round]\n", | |
" counts = collections.Counter([ballot.id for ballot in merged_samples])\n", | |
" dups = [(id, count) for id, count in counts.items() if count > 1]\n", | |
" print(\"Note: dups ([id, count), ...]: %s\" % dups)\n", | |
" \n", | |
" return samples" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Sampling with replacement" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Note: dups ([id, count), ...]: [(2, 2)]\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"[[Ballot(ticket=0.04100190558301588, id=13),\n", | |
" Ballot(ticket=0.0423833995335369, id=19),\n", | |
" Ballot(ticket=0.07352435971093178, id=4),\n", | |
" Ballot(ticket=0.11101993350291864, id=10),\n", | |
" Ballot(ticket=0.1775780699245684, id=2)],\n", | |
" [Ballot(ticket=0.19977805644581484, id=14),\n", | |
" Ballot(ticket=0.22148899861362173, id=16),\n", | |
" Ballot(ticket=0.24878913499709562, id=9),\n", | |
" Ballot(ticket=0.32328121459631143, id=2),\n", | |
" Ballot(ticket=0.3306671180518882, id=18)]]" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"sample_ballots(20, \"12345\", [5, 5])" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Sampling without replacement" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[Ballot(ticket=0.014349416384859667, id=4),\n", | |
" Ballot(ticket=0.11322107598015423, id=16),\n", | |
" Ballot(ticket=0.1352993540969171, id=0),\n", | |
" Ballot(ticket=0.14647280636254867, id=13),\n", | |
" Ballot(ticket=0.19160349460439044, id=5),\n", | |
" Ballot(ticket=0.19489100529351447, id=15),\n", | |
" Ballot(ticket=0.2669856231270792, id=14),\n", | |
" Ballot(ticket=0.2747320876434722, id=3),\n", | |
" Ballot(ticket=0.29058138956157303, id=19),\n", | |
" Ballot(ticket=0.32285949040380824, id=1)]" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"ballots = setup_ballots(20, \"123\")\n", | |
"sample = heapq.nsmallest(10, ballots)\n", | |
"sample" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.5" | |
}, | |
"toc": { | |
"colors": { | |
"hover_highlight": "#DAA520", | |
"running_highlight": "#FF0000", | |
"selected_highlight": "#FFD700" | |
}, | |
"moveMenuLeft": true, | |
"nav_menu": { | |
"height": "95.234388px", | |
"width": "253.984188px" | |
}, | |
"navigate_menu": true, | |
"number_sections": true, | |
"sideBar": true, | |
"threshold": 4, | |
"toc_cell": false, | |
"toc_section_display": "block", | |
"toc_window_display": false, | |
"widenNotebook": false | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment