Skip to content

Instantly share code, notes, and snippets.

def goodness_measure_factory(self, missing_column_index, k):
CUTOFF_PERCENT = 0.9
def goodness_measure(some_weights):
# Do the following num_tests times: Split the known rows into
# a training set and a test set. Then use the metric based
# on some_weights, train on the training set, and test on the
# testing set; record how successful you were in classifying as
# a percent of attempts, and average that over num_tests to
# get your goodness measure.
@classmethod
def get_random_weights(cls, row_length):
weights = []
room_left = 1.0
for j in range(row_length - 2):
weight_j = random.uniform(0, room_left)
weights.append(weight_j)
room_left -= weight_j
weights.append(room_left)
random.shuffle(weights)
@classmethod
def transition(cls, some_weights):
candidate = some_weights[:]
a,b = random.sample(range(len(candidate)), 2)
m = min(.05, 1 - candidate[a], candidate[b])
candidate[a] += m
candidate[b] -= m
return candidate
class SimpleMCMC(object):
def __init__(self, start_state, transition, goodness_measure):
self.present_state = start_state
self.transition = transition
self.goodness_measure = goodness_measure
self.present_goodness = self.goodness_measure(self.present_state)
def take_step(self):
# Use the transition function to find a candidate for the new state.
# Compute the euclidean distance between vectors v and w.
euclid = lambda w,v : (sum((wi - vi)**2 for wi,vi in zip(w,v)))**.5
# Quick-Sort the list l
qsort = lambda l : [x for x in l[1:] if x < l[0]] + [l[0]] + [x for x in l[1:] if x >= l[0]]
# Flatten the list l. E.g. [1, [2], [[3, [4]]]] -> [1,2,3,4]. Warning: O(n^2)
flatten = lambda l : sum(flatten(x) if isinstance(x, list) else [x] for x in l, [])
# compute the product of the elements in the list l.
import numpy as np
import random
import time
import sys
def timeit(function, args, num_iterations):
start = time.time()
for i in range(num_iterations):
function(*args)
end = time.time()
def main():
all_rows = get_all_rows()
num_trials = 20
total_mistakes = 0
for trial in range(num_trials):
model = NearestNeighbor()
random.shuffle(all_rows)
import os
import sys
path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../..'))
if not path in sys.path:
sys.path.insert(1, path)
del path
import knn
from knn.nearest_neighbor1 import NearestNeighbor
import random
import twitter_grab
SCREEN_NAMES = [
'GineokwKoenig',
'RealNichelle',
'TheRealNimoy',
'WilliamShatner',
'GeorgeTakei',
import urllib2
import json
import time
class QueryBuilder(object):
BASE_URL = 'https://api.twitter.com/1/statuses/user_timeline.json?'
DEFAULT_MAX_COUNT = 200
def __init__(self):
self._query_params = dict()