Skip to content

Instantly share code, notes, and snippets.

/ Secret

Created May 12, 2013 07:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anonymous/4eb3bce078ccd5c8e9a6 to your computer and use it in GitHub Desktop.
Save anonymous/4eb3bce078ccd5c8e9a6 to your computer and use it in GitHub Desktop.
# Author notes: I tried my best to not use numpy or scipy, in attempt to keep things simple and understandable
# However, numpy does provide the very excellent binomial function, of which Python doesn't come with
# Whilst the rest of the code could be written using Numpy in a far more efficient manner
# It's been kept simple for those unfamiliar.
# All you need to know is binomial() performs a binomial trial with a probability
from __future__ import division
from numpy.random import binomial
import csv
import sys
import os
import math
import random
import bisect
import pprint
winningRatioPeakDict = {
'SPR.csv' : {
'low': 0.35 ,
'mid': 0.45 ,
'high': 0.55,
'mean': 0.4919155,
'median': 0.5013012,
'SPR_P_only.csv': {
'low': 0.45 ,
'mid': 0.55 ,
'high': 0.65,
'mean': 0.4951695,
'median': 0.4816654
'low': 0.35 ,
'mid': 0.45 ,
'high': 0.55,
'mean': 0.4878147,
'median': 0.496434
turnoutRatioPeakDict = {
'SPR.csv': {
'low': 0.85,
'mid': 0.87,
'high': 0.89,
'mean': 0.8455734,
'median': 0.8535894
'SPR_P_only.csv': {
'low': 0.775,
'mid': 0.825,
'high': 0.875,
'mean': 0.8354881,
'median': 0.8477099
'SPR_N_only.csv': {
'low': 0.85,
'mid': 0.87,
'high': 0.89,
'mean': 0.85,
'median': 0.8558019
WINNING_RATIO_PEAK = winningRatioPeakDict[sys.argv[1]][sys.argv[3]] # argv[1] is filename, argv[3] takes 'high', 'low', 'mid', 'mean', 'median'
WINNING_RATIO_PEAK = winningRatioPeakDict['SPR.csv']['mid']
TURNOUT_RATIO_PEAK = turnoutRatioPeakDict[sys.argv[1]][sys.argv[3]]
TURNOUT_RATIO_PEAK = turnoutRatioPeakDict['SPR.csv']['mid']
FI = float(sys.argv[4])
FE = float(sys.argv[5])
FI = 0
FE = 0
def readFile():
f = open(sys.argv[1])
electorates = []
reader = csv.DictReader(f)
for row in reader:
d = {}
# manual coercion
for k,v in row.items():
d[k] = float(v)
except ValueError:
d[k] = v
return electorates
def getLeftSD(electorates, peak=WINNING_RATIO_PEAK):
# get all electorates left of the peak
newList = []
for electorate in electorates:
if electorate['WinningRatio'] < peak:
totalVoters = sum([e['RegisteredVoters'] for e in newList])
expectedWinningRatio = peak # mean can be used as well
vi = [e['WinningRatio'] for e in newList]
leftVariance = map(lambda x: (x - expectedWinningRatio) ** 2, vi)
leftSD = math.sqrt((sum(leftVariance)/len(leftVariance)))
except ZeroDivisionError:
return 0 # hack!
return leftSD
def getRightSD(electorates, peak=WINNING_RATIO_PEAK):
# get all electorates right of the peak
newList = []
for electorate in electorates:
if electorate['WinningRatio'] > peak:
totalVoters = sum([e['RegisteredVoters'] for e in newList])
expectedWinningRatio = peak # mean can be used as well
vi = [e['WinningRatio'] for e in newList]
rightVariance = map(lambda x: (x - expectedWinningRatio) ** 2, vi)
rightSD = math.sqrt((sum(rightVariance)/len(rightVariance)))
return rightSD
def getTurnoutSD(electorates, peak = TURNOUT_RATIO_PEAK):
a = [e['TurnoutRatio'] for e in electorates]
turnoutVariance = map(lambda x: (x - peak) ** 2, a)
turnoutSD = math.sqrt((sum(turnoutVariance)/len(turnoutVariance)))
return turnoutSD
def incrementalFraud(rightSD, turnOut, registeredVoters, votedForWinning):
fraudIntensity = 0
while fraudIntensity <= 0 or fraudIntensity > 1:
fraudIntensity = random.gauss(0, rightSD)
fraudulentVoteCount = registeredVoters * (turnOut * votedForWinning + fraudIntensity * (1 - turnOut) + fraudIntensity * (1 - votedForWinning) * turnOut)
return int(fraudulentVoteCount)
def extremeFraud(registeredVoters, winningRatio):
votedForWinning = int(registeredVoters * winningRatio)
votedForOpposition = registeredVoters - votedForWinning
if votedForOpposition > votedForWinning:
votedForWinning = votedForOpposition
def simulate(electorates, fi=FI, fe=FE, turnoutPeak = TURNOUT_RATIO_PEAK, winningPeak=WINNING_RATIO_PEAK):
turnoutSD = getTurnoutSD(electorates)
leftSD = getLeftSD(electorates)
rightSD = getRightSD(electorates)
simulatedElectorates = []
for electorate in electorates:
simulatedElectorate = {}
originalVotedForWinning = electorate['VotedForWinning']
registeredVoters = electorate['RegisteredVoters']
turnoutRatio = random.gauss(turnoutPeak, turnoutSD)
winningRatio = random.gauss(winningPeak, leftSD)
willIncrementalFraudHappen = binomial(n=1, p=fi)
willExtremeFraudHappen = binomial(n=1, p=fe)
newVotedForWinning = originalVotedForWinning
if willIncrementalFraudHappen: # binomial returns a 1
newVotedForWinning = incrementalFraud(rightSD, turnoutRatio, registeredVoters, winningRatio)
if willExtremeFraudHappen:
additionalFraudVotes = extremeFraud(registeredVoters, winningRatio)
newVotedForWinning += additionalFraudVotes
simulatedElectorate = {
'SEAT': electorate['SEAT'],
#'Type': electorate['Type'],
'RegisteredVoters': registeredVoters,
'VoterTurnout': int(turnoutRatio * registeredVoters), # can't expect 0.5 of a person
'TurnoutRatio': turnoutRatio,
'VotedForWinning': newVotedForWinning,
'WinningRatio': newVotedForWinning/registeredVoters,
#'WinningParty.1': electorate['WinningParty.1'],
#'WinningParty.2': electorate['WinningParty.2']
simulatedElectorate['Rescaled'] = math.log(math.sqrt((registeredVoters - newVotedForWinning) ** 2)/newVotedForWinning)
simulatedElectorate['Rescaled'] = "ERROR" # yet another too tired to find out why hack
return simulatedElectorates
if __name__ == '__main__':
electorates = readFile()
fieldNames = simulatedElectorates[0].keys()
g = open(sys.argv[2], 'w')
writer = csv.DictWriter(g, fieldnames=fieldNames)
writer.writerow(dict(zip(fieldNames, fieldNames)))
for electorate in simulatedElectorates:
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment