Chase Davis cjdd3b

## columbiacrime.html
<!DOCTYPE html>
<html>

<head>
    <title>Leaflet Example</title>
    <meta name="viewport" content="width=device-width, initial-scale=1.0">

    <!-- Import Leaflet assets -->
    <link rel="stylesheet" href="http://leafletjs.com/dist/leaflet.css" />
    <script src="http://leafletjs.com/dist/leaflet.js"></script>

## minhasher.py
import random

class MinHasher(object):
    def __init__(self, n, universe_size, seed=None):
        if seed != None: random.seed(seed)
        self.hash_functions = [self._create_random_hash_function(universe_size) for i in range(n)]

    def _create_random_hash_function(self, universe_size):
        a = random.randint(0, universe_size)
        b = random.randint(0, universe_size)

## csvjoin.py
import csv, os

# This chunk iterates through all of the csv files in a directory, turns them
# into 2-dimensional arrays (lists of lists), and puts all those arrays into
# a list called "tables"

tables = []

# Loop over all files in the current directory (which is what "." means)
for f in os.listdir('.'):

## dates-output.json
{
  "took": 14,
  "timed_out": false,
  "_shards": {
    "total": 6,
    "successful": 6,
    "failed": 0
  },
  "hits": {
    "total": 1419,

## company-detail.json
{
  "took": 2,
  "timed_out": false,
  "_shards": {
    "total": 6,
    "successful": 6,
    "failed": 0
  },
  "hits": {
    "total": 1,

## gist:1714081
name|id
1911 United|C00508200
50 State Strategy|C00502633
9-9-9 FUND|C00504241
Accountability 2010|C00489641
AFL-CIO Workers' Voices PAC|C00484287
Alaskans Standing Together|C00489385
America for the People|C00497081
America Get Up|C00494278
America Votes Action Fund|C00492520

## gist:5886658
AMAZON_ACCESS_KEY = 'WHATEVER'
AMAZON_SECRET_KEY = 'SECRET_WHATEVER'

# I'm old-school, so I like the AWS-S3 gem. It's just a lightweight wrapper around Amazon's API.
# https://github.com/marcel/aws-s3
require "aws/s3"
include AWS::S3

def publish_json!(bucket='int.nyt.com', path='applications/represent-json/', filename='foo.json')

## gist:5922941
{
   "discipline_id":"AS",
   "discipline_name":"Alpine Skiing",
   "results":
   [
      {
         "id":"ASM010",
         "name":"Men's Downhill",
         "competitor_type":"ATH",
         "results":

## nnsearch.py
import numpy

def get_similar(vec, matrix, K=10):
    # Set up the query vector and the whole dataset for K-nearest neighbors query
    qvector = numpy.array([vec]).transpose()
    alldata = numpy.array(matrix).transpose()

    # You can't get more neighbors than there are entities
    ndata = alldata.shape[1]
    K = K if K < ndata else ndata

## compare.py
'''
compare.py

Quickly produces a pairwise similarity matrix of lawmakers' roll call votes, given
an input *.ord matrix file from Poole, McCarty and Lewis: http://www.voteview.com/dwnl.htm
'''

import numpy, string
from scipy.spatial.distance import cdist
	<!DOCTYPE html>
	<html>

	<head>
	<title>Leaflet Example</title>
	<meta name="viewport" content="width=device-width, initial-scale=1.0">

	<!-- Import Leaflet assets -->
	<link rel="stylesheet" href="http://leafletjs.com/dist/leaflet.css" />
	<script src="http://leafletjs.com/dist/leaflet.js"></script>
	import random

	class MinHasher(object):
	def __init__(self, n, universe_size, seed=None):
	if seed != None: random.seed(seed)
	self.hash_functions = [self._create_random_hash_function(universe_size) for i in range(n)]

	def _create_random_hash_function(self, universe_size):
	a = random.randint(0, universe_size)
	b = random.randint(0, universe_size)
	import csv, os

	# This chunk iterates through all of the csv files in a directory, turns them
	# into 2-dimensional arrays (lists of lists), and puts all those arrays into
	# a list called "tables"

	tables = []

	# Loop over all files in the current directory (which is what "." means)
	for f in os.listdir('.'):
	{
	"took": 14,
	"timed_out": false,
	"_shards": {
	"total": 6,
	"successful": 6,
	"failed": 0
	},
	"hits": {
	"total": 1419,
	{
	"took": 2,
	"timed_out": false,
	"_shards": {
	"total": 6,
	"successful": 6,
	"failed": 0
	},
	"hits": {
	"total": 1,
	name\|id
	1911 United\|C00508200
	50 State Strategy\|C00502633
	9-9-9 FUND\|C00504241
	Accountability 2010\|C00489641
	AFL-CIO Workers' Voices PAC\|C00484287
	Alaskans Standing Together\|C00489385
	America for the People\|C00497081
	America Get Up\|C00494278
	America Votes Action Fund\|C00492520
	AMAZON_ACCESS_KEY = 'WHATEVER'
	AMAZON_SECRET_KEY = 'SECRET_WHATEVER'

	# I'm old-school, so I like the AWS-S3 gem. It's just a lightweight wrapper around Amazon's API.
	# https://github.com/marcel/aws-s3
	require "aws/s3"
	include AWS::S3

	def publish_json!(bucket='int.nyt.com', path='applications/represent-json/', filename='foo.json')
	{
	"discipline_id":"AS",
	"discipline_name":"Alpine Skiing",
	"results":
	[
	{
	"id":"ASM010",
	"name":"Men's Downhill",
	"competitor_type":"ATH",
	"results":
	import numpy

	def get_similar(vec, matrix, K=10):
	# Set up the query vector and the whole dataset for K-nearest neighbors query
	qvector = numpy.array([vec]).transpose()
	alldata = numpy.array(matrix).transpose()

	# You can't get more neighbors than there are entities
	ndata = alldata.shape[1]
	K = K if K < ndata else ndata
	'''
	compare.py

	Quickly produces a pairwise similarity matrix of lawmakers' roll call votes, given
	an input *.ord matrix file from Poole, McCarty and Lewis: http://www.voteview.com/dwnl.htm
	'''

	import numpy, string
	from scipy.spatial.distance import cdist