Skip to content

Instantly share code, notes, and snippets.

<!DOCTYPE html>
<html>
<head>
<title>Leaflet Example</title>
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<!-- Import Leaflet assets -->
<link rel="stylesheet" href="http://leafletjs.com/dist/leaflet.css" />
<script src="http://leafletjs.com/dist/leaflet.js"></script>
import random
class MinHasher(object):
def __init__(self, n, universe_size, seed=None):
if seed != None: random.seed(seed)
self.hash_functions = [self._create_random_hash_function(universe_size) for i in range(n)]
def _create_random_hash_function(self, universe_size):
a = random.randint(0, universe_size)
b = random.randint(0, universe_size)
@cjdd3b
cjdd3b / csvjoin.py
Created April 2, 2015 21:54
CSV-flattening code for Harsh's research
import csv, os
# This chunk iterates through all of the csv files in a directory, turns them
# into 2-dimensional arrays (lists of lists), and puts all those arrays into
# a list called "tables"
tables = []
# Loop over all files in the current directory (which is what "." means)
for f in os.listdir('.'):
{
"took": 14,
"timed_out": false,
"_shards": {
"total": 6,
"successful": 6,
"failed": 0
},
"hits": {
"total": 1419,
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 6,
"successful": 6,
"failed": 0
},
"hits": {
"total": 1,
@cjdd3b
cjdd3b / gist:1714081
Created February 1, 2012 00:05
Super PAC IDs from OpenSecrets
name|id
1911 United|C00508200
50 State Strategy|C00502633
9-9-9 FUND|C00504241
Accountability 2010|C00489641
AFL-CIO Workers' Voices PAC|C00484287
Alaskans Standing Together|C00489385
America for the People|C00497081
America Get Up|C00494278
America Votes Action Fund|C00492520
@cjdd3b
cjdd3b / gist:5886658
Last active December 19, 2015 02:58
Simple S3 writes in Ruby
AMAZON_ACCESS_KEY = 'WHATEVER'
AMAZON_SECRET_KEY = 'SECRET_WHATEVER'
# I'm old-school, so I like the AWS-S3 gem. It's just a lightweight wrapper around Amazon's API.
# https://github.com/marcel/aws-s3
require "aws/s3"
include AWS::S3
def publish_json!(bucket='int.nyt.com', path='applications/represent-json/', filename='foo.json')
{
"discipline_id":"AS",
"discipline_name":"Alpine Skiing",
"results":
[
{
"id":"ASM010",
"name":"Men's Downhill",
"competitor_type":"ATH",
"results":
@cjdd3b
cjdd3b / nnsearch.py
Last active December 20, 2015 12:49
import numpy
def get_similar(vec, matrix, K=10):
# Set up the query vector and the whole dataset for K-nearest neighbors query
qvector = numpy.array([vec]).transpose()
alldata = numpy.array(matrix).transpose()
# You can't get more neighbors than there are entities
ndata = alldata.shape[1]
K = K if K < ndata else ndata
@cjdd3b
cjdd3b / compare.py
Last active December 24, 2015 07:49
Shows crude similarities of voting histories between members of Congress using roll call vote matrices from Poole, McCarty and Lewis: http://www.voteview.com/dwnl.htm. Uses vectorized operations to make similarity calculations happen super fast.
'''
compare.py
Quickly produces a pairwise similarity matrix of lawmakers' roll call votes, given
an input *.ord matrix file from Poole, McCarty and Lewis: http://www.voteview.com/dwnl.htm
'''
import numpy, string
from scipy.spatial.distance import cdist