Skip to content

Instantly share code, notes, and snippets.

@cjdd3b
Last active December 24, 2015 07:49
Show Gist options
  • Save cjdd3b/6766232 to your computer and use it in GitHub Desktop.
Save cjdd3b/6766232 to your computer and use it in GitHub Desktop.
Shows crude similarities of voting histories between members of Congress using roll call vote matrices from Poole, McCarty and Lewis: http://www.voteview.com/dwnl.htm. Uses vectorized operations to make similarity calculations happen super fast.
'''
compare.py
Quickly produces a pairwise similarity matrix of lawmakers' roll call votes, given
an input *.ord matrix file from Poole, McCarty and Lewis: http://www.voteview.com/dwnl.htm
'''
import numpy, string
from scipy.spatial.distance import cdist
########## HELPERS ##########
class LookerUpper(object):
'''
Helps look up pairwise similarity scores by member, given an input name.
'''
def __init__(self, names, matrix):
self.names = names
self.matrix = matrix
def lookup(self, name):
for i, j in enumerate(self.matrix[self.names.index(name)]):
yield '%s: %s' % (self.names[i], j)
def lookup_pair(self, name1, name2):
name1_idx = self.names.index(name1)
name2_idx = self.names.index(name2)
return '%s -> %s: %s' % (name1, name2, self.matrix[name1_idx, name2_idx])
def check(status):
'''
Zero out any non-votes, for Jaccard.
'''
if int(status) in [7, 8, 9]:
return 0
return status
def cleansplit(str):
'''
Split fixed-width input file. Sigh.
'''
return map(string.strip, [str[:12], str[12:20], str[:20:25], str[25:36], str[36:]])
########## MAIN ##########
if __name__ == '__main__':
with open('hou112kh.ord.txt', 'rU') as infile:
rows = [cleansplit(a) for a in infile.readlines()] # Parse fixed-width .ord input file
names = [r[3] for r in rows] # r[3] is the name code
# Build a numpy matrix from all the vote data in the input file.
data = numpy.matrix([[check(i) for i in list(r[4])] for r in rows])
# Calculate vectorized pairwise similarity between all senators using Jaccard distance,
# which is just a measure of what percentage of each lawmaker's votes lined up with another's.
# Subtracting from 1.0 just turns the measure into a similarity score rather than a distance.
similarities = 1.0 - cdist(data, data, 'jaccard')
# Print similarities, given a lookup name
looker_upper = LookerUpper(names, similarities)
for i in looker_upper.lookup('BOEHNER'):
print i
# And here's a lookup of two names
print looker_upper.lookup_pair('MCCAUL', 'CANTOR')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment