dodger487/ranked_choice_201906.py

## ranked_choice_201906.py
# 2019-06-24
# Python 3 plz

"""What would the result of a ranked choice election be given the data from
Data For Progress's survey?

https://www.dataforprogress.org/blog/2019/6/24/who-else-are-the-major-candidates-supporters-considering
http://filesforprogress.org/datasets/june_19_omni/
"""

from collections import Counter

import pandas as pd


df = pd.read_csv("dfp_RV_omni_062019.csv")


# I can't figure out how to line up the data with the numbers
# they reported in the website :shrug emoji:
# Just going to do my best

# This doesn't work, nly gives us 386 voters, not 476.
# df = df[(df.pid3 == 1) & (df.primaryvote.isin({1, 2}))]

# This doesn't really work either but seems closer to right.
# Only gets 373 voters, not 476.
df = df[df.TOP5_RANK_1 != 999]


# Map obtuse integers to lovely strings
map_to_names = {
    1 : "Joe Biden",
    2 : "Bernie Sanders",
    3 : "Kamala Harris",
    4 : "Beto O’Rourke",
    5 : "Cory Booker",
    6 : "Amy Klobuchar",
    7 : "Elizabeth Warren",
    8 : "John Hickenlooper",
    9 : "Kirsten Gillibrand",
    10 : "John Delaney",
    11 : "Julián Castro",
    13 : "Bill DeBlasio   ",
    14 : "Tulsi Gabbard",
    15 : "Pete Buttigieg",
    16 : "Jay Inslee",
    17 : "Tim Ryan",
    18 : "Seth Moulton",
    19 : "Eric Swalwell",
    20 : "Andrew Yang",
    21 : "Marianne Williamson",
    22 : "Mike Gravel",
    23 : "Steve Bullock",
    24 : "Michael Bennet",
    25 : "Wayne Messam",
}

# Just looking at top 10 rankings
columns = ["TOP5_RANK_" + str(i) for i in range(1, 11)]

for column in columns:
  df[column] = df[column].map(map_to_names)


# Find this problem easier to think about as a list of lists...
# Each item in this list is a list of candidates in order of preference.
votes = [list(i[1].values) for i in df[columns].iterrows()]


# Ranked Choice Voting (I think):
# If there is a majority going to one candidate, that candidates wins.
# While there is no majority, eliminate the least popular candidate.
# Voters voting for that candidate now have their second choice as their top
# choice.
# Repeat until someone has a majority.

# Handy functions

def get_top_candidate(votes):
  return Counter([v[0] for v in votes]).most_common(1)[0]

def get_bottom_candidate(votes):
  return Counter([v[0] for v in votes]).most_common()[-1]

def update_votes(votes, eliminated_candidate):
  remove_cand = lambda ballot: [b for b in ballot if b != eliminated_candidate]
  return list(map(remove_cand, votes))

def remove_empty_ballots(votes):
  return [v for v in votes if len(v) > 0]


# Remove nans from list
for v in votes:
  while pd.np.nan in v:
    v.remove(pd.np.nan)
votes = remove_empty_ballots(votes)

N = len(votes)


while True:
  top_candidate, top_votes = get_top_candidate(votes)

  if top_votes / N > .5:
    print("We have a winner!")
    print(top_candidate, "wins with", top_votes, "votes.")
    break

  bottom_candidate, _ = get_bottom_candidate(votes)

  msg = ("Top candidate {cand} has only {top_votes} votes, {pct:.2f}. "
         "Eliminating {bottom_candidate}")
  print(msg.format(cand=top_candidate, top_votes=top_votes,
                   pct=top_votes/N, bottom_candidate=bottom_candidate))

  votes = update_votes(votes, bottom_candidate)
  votes = remove_empty_ballots(votes)
	# 2019-06-24
	# Python 3 plz

	"""What would the result of a ranked choice election be given the data from
	Data For Progress's survey?

	https://www.dataforprogress.org/blog/2019/6/24/who-else-are-the-major-candidates-supporters-considering
	http://filesforprogress.org/datasets/june_19_omni/
	"""

	from collections import Counter

	import pandas as pd


	df = pd.read_csv("dfp_RV_omni_062019.csv")


	# I can't figure out how to line up the data with the numbers
	# they reported in the website :shrug emoji:
	# Just going to do my best

	# This doesn't work, nly gives us 386 voters, not 476.
	# df = df[(df.pid3 == 1) & (df.primaryvote.isin({1, 2}))]

	# This doesn't really work either but seems closer to right.
	# Only gets 373 voters, not 476.
	df = df[df.TOP5_RANK_1 != 999]


	# Map obtuse integers to lovely strings
	map_to_names = {
	1 : "Joe Biden",
	2 : "Bernie Sanders",
	3 : "Kamala Harris",
	4 : "Beto O’Rourke",
	5 : "Cory Booker",
	6 : "Amy Klobuchar",
	7 : "Elizabeth Warren",
	8 : "John Hickenlooper",
	9 : "Kirsten Gillibrand",
	10 : "John Delaney",
	11 : "Julián Castro",
	13 : "Bill DeBlasio ",
	14 : "Tulsi Gabbard",
	15 : "Pete Buttigieg",
	16 : "Jay Inslee",
	17 : "Tim Ryan",
	18 : "Seth Moulton",
	19 : "Eric Swalwell",
	20 : "Andrew Yang",
	21 : "Marianne Williamson",
	22 : "Mike Gravel",
	23 : "Steve Bullock",
	24 : "Michael Bennet",
	25 : "Wayne Messam",
	}

	# Just looking at top 10 rankings
	columns = ["TOP5_RANK_" + str(i) for i in range(1, 11)]

	for column in columns:
	df[column] = df[column].map(map_to_names)


	# Find this problem easier to think about as a list of lists...
	# Each item in this list is a list of candidates in order of preference.
	votes = [list(i[1].values) for i in df[columns].iterrows()]


	# Ranked Choice Voting (I think):
	# If there is a majority going to one candidate, that candidates wins.
	# While there is no majority, eliminate the least popular candidate.
	# Voters voting for that candidate now have their second choice as their top
	# choice.
	# Repeat until someone has a majority.

	# Handy functions

	def get_top_candidate(votes):
	return Counter([v[0] for v in votes]).most_common(1)[0]

	def get_bottom_candidate(votes):
	return Counter([v[0] for v in votes]).most_common()[-1]

	def update_votes(votes, eliminated_candidate):
	remove_cand = lambda ballot: [b for b in ballot if b != eliminated_candidate]
	return list(map(remove_cand, votes))

	def remove_empty_ballots(votes):
	return [v for v in votes if len(v) > 0]


	# Remove nans from list
	for v in votes:
	while pd.np.nan in v:
	v.remove(pd.np.nan)
	votes = remove_empty_ballots(votes)

	N = len(votes)


	while True:
	top_candidate, top_votes = get_top_candidate(votes)

	if top_votes / N > .5:
	print("We have a winner!")
	print(top_candidate, "wins with", top_votes, "votes.")
	break

	bottom_candidate, _ = get_bottom_candidate(votes)

	msg = ("Top candidate {cand} has only {top_votes} votes, {pct:.2f}. "
	"Eliminating {bottom_candidate}")
	print(msg.format(cand=top_candidate, top_votes=top_votes,
	pct=top_votes/N, bottom_candidate=bottom_candidate))

	votes = update_votes(votes, bottom_candidate)
	votes = remove_empty_ballots(votes)