Slater Stich sl8r000

## goodness.py
    def goodness_measure_factory(self, missing_column_index, k):
        CUTOFF_PERCENT = 0.9
        def goodness_measure(some_weights):
            # Do the following num_tests times: Split the known rows into
            # a training set and a test set. Then use the metric based
            # on some_weights, train on the training set, and test on the
            # testing set; record how successful you were in classifying as
            # a percent of attempts, and average that over num_tests to
            # get your goodness measure.


## weight_trainer.py
    @classmethod
    def get_random_weights(cls, row_length):
        weights = []
        room_left = 1.0
        for j in range(row_length - 2):
            weight_j = random.uniform(0, room_left)
            weights.append(weight_j)
            room_left -= weight_j
        weights.append(room_left)
        random.shuffle(weights)

## transition.py
    @classmethod
    def transition(cls, some_weights):
        candidate = some_weights[:]
        a,b = random.sample(range(len(candidate)), 2)
        m = min(.05, 1 - candidate[a], candidate[b])
        candidate[a] += m
        candidate[b] -= m
        return candidate

## simple_markov.py
class SimpleMCMC(object):
    def __init__(self, start_state, transition, goodness_measure):
        self.present_state = start_state
        self.transition = transition
        self.goodness_measure = goodness_measure

        self.present_goodness = self.goodness_measure(self.present_state)

    def take_step(self):
        # Use the transition function to find a candidate for the new state.

## one_liners.py
# Compute the euclidean distance between vectors v and w.
euclid = lambda w,v : (sum((wi - vi)**2 for wi,vi in zip(w,v)))**.5

# Quick-Sort the list l
qsort = lambda l : [x for x in l[1:] if x < l[0]] + [l[0]] + [x for x in l[1:] if x >= l[0]]

# Flatten the list l. E.g. [1, [2], [[3, [4]]]] -> [1,2,3,4]. Warning: O(n^2)
flatten = lambda l : sum(flatten(x) if isinstance(x, list) else [x] for x in l, [])

# compute the product of the elements in the list l.

## distance_time_test.py
import numpy as np
import random
import time
import sys

def timeit(function, args, num_iterations):
    start = time.time()
    for i in range(num_iterations):
        function(*args)
    end = time.time()

## reengage.py
def main():
    all_rows = get_all_rows()


    num_trials = 20
    total_mistakes = 0
    for trial in range(num_trials):
        model = NearestNeighbor()
        random.shuffle(all_rows)


## engage.py
import os
import sys
path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../..'))
if not path in sys.path:
    sys.path.insert(1, path)
del path

import knn
from knn.nearest_neighbor1 import NearestNeighbor

## make_data.py
import random

import twitter_grab

SCREEN_NAMES = [
                'GineokwKoenig',
                'RealNichelle',
                'TheRealNimoy',
                'WilliamShatner',
                'GeorgeTakei',

## twitter_grab.py
import urllib2
import json
import time

class QueryBuilder(object):
    BASE_URL = 'https://api.twitter.com/1/statuses/user_timeline.json?'
    DEFAULT_MAX_COUNT = 200

    def __init__(self):
        self._query_params = dict()
	def goodness_measure_factory(self, missing_column_index, k):
	CUTOFF_PERCENT = 0.9
	def goodness_measure(some_weights):
	# Do the following num_tests times: Split the known rows into
	# a training set and a test set. Then use the metric based
	# on some_weights, train on the training set, and test on the
	# testing set; record how successful you were in classifying as
	# a percent of attempts, and average that over num_tests to
	# get your goodness measure.
	@classmethod
	def get_random_weights(cls, row_length):
	weights = []
	room_left = 1.0
	for j in range(row_length - 2):
	weight_j = random.uniform(0, room_left)
	weights.append(weight_j)
	room_left -= weight_j
	weights.append(room_left)
	random.shuffle(weights)
	@classmethod
	def transition(cls, some_weights):
	candidate = some_weights[:]
	a,b = random.sample(range(len(candidate)), 2)
	m = min(.05, 1 - candidate[a], candidate[b])
	candidate[a] += m
	candidate[b] -= m
	return candidate
	class SimpleMCMC(object):
	def __init__(self, start_state, transition, goodness_measure):
	self.present_state = start_state
	self.transition = transition
	self.goodness_measure = goodness_measure

	self.present_goodness = self.goodness_measure(self.present_state)

	def take_step(self):
	# Use the transition function to find a candidate for the new state.
	# Compute the euclidean distance between vectors v and w.
	euclid = lambda w,v : (sum((wi - vi)2 for wi,vi in zip(w,v))).5

	# Quick-Sort the list l
	qsort = lambda l : [x for x in l[1:] if x < l[0]] + [l[0]] + [x for x in l[1:] if x >= l[0]]

	# Flatten the list l. E.g. [1, [2], [[3, [4]]]] -> [1,2,3,4]. Warning: O(n^2)
	flatten = lambda l : sum(flatten(x) if isinstance(x, list) else [x] for x in l, [])

	# compute the product of the elements in the list l.
	import numpy as np
	import random
	import time
	import sys

	def timeit(function, args, num_iterations):
	start = time.time()
	for i in range(num_iterations):
	function(*args)
	end = time.time()
	def main():
	all_rows = get_all_rows()


	num_trials = 20
	total_mistakes = 0
	for trial in range(num_trials):
	model = NearestNeighbor()
	random.shuffle(all_rows)
	import os
	import sys
	path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../..'))
	if not path in sys.path:
	sys.path.insert(1, path)
	del path

	import knn
	from knn.nearest_neighbor1 import NearestNeighbor
	import random

	import twitter_grab

	SCREEN_NAMES = [
	'GineokwKoenig',
	'RealNichelle',
	'TheRealNimoy',
	'WilliamShatner',
	'GeorgeTakei',
	import urllib2
	import json
	import time

	class QueryBuilder(object):
	BASE_URL = 'https://api.twitter.com/1/statuses/user_timeline.json?'
	DEFAULT_MAX_COUNT = 200

	def __init__(self):
	self._query_params = dict()