This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def goodness_measure_factory(self, missing_column_index, k): | |
CUTOFF_PERCENT = 0.9 | |
def goodness_measure(some_weights): | |
# Do the following num_tests times: Split the known rows into | |
# a training set and a test set. Then use the metric based | |
# on some_weights, train on the training set, and test on the | |
# testing set; record how successful you were in classifying as | |
# a percent of attempts, and average that over num_tests to | |
# get your goodness measure. | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@classmethod | |
def get_random_weights(cls, row_length): | |
weights = [] | |
room_left = 1.0 | |
for j in range(row_length - 2): | |
weight_j = random.uniform(0, room_left) | |
weights.append(weight_j) | |
room_left -= weight_j | |
weights.append(room_left) | |
random.shuffle(weights) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@classmethod | |
def transition(cls, some_weights): | |
candidate = some_weights[:] | |
a,b = random.sample(range(len(candidate)), 2) | |
m = min(.05, 1 - candidate[a], candidate[b]) | |
candidate[a] += m | |
candidate[b] -= m | |
return candidate |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class SimpleMCMC(object): | |
def __init__(self, start_state, transition, goodness_measure): | |
self.present_state = start_state | |
self.transition = transition | |
self.goodness_measure = goodness_measure | |
self.present_goodness = self.goodness_measure(self.present_state) | |
def take_step(self): | |
# Use the transition function to find a candidate for the new state. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Compute the euclidean distance between vectors v and w. | |
euclid = lambda w,v : (sum((wi - vi)**2 for wi,vi in zip(w,v)))**.5 | |
# Quick-Sort the list l | |
qsort = lambda l : [x for x in l[1:] if x < l[0]] + [l[0]] + [x for x in l[1:] if x >= l[0]] | |
# Flatten the list l. E.g. [1, [2], [[3, [4]]]] -> [1,2,3,4]. Warning: O(n^2) | |
flatten = lambda l : sum(flatten(x) if isinstance(x, list) else [x] for x in l, []) | |
# compute the product of the elements in the list l. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import random | |
import time | |
import sys | |
def timeit(function, args, num_iterations): | |
start = time.time() | |
for i in range(num_iterations): | |
function(*args) | |
end = time.time() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def main(): | |
all_rows = get_all_rows() | |
num_trials = 20 | |
total_mistakes = 0 | |
for trial in range(num_trials): | |
model = NearestNeighbor() | |
random.shuffle(all_rows) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../..')) | |
if not path in sys.path: | |
sys.path.insert(1, path) | |
del path | |
import knn | |
from knn.nearest_neighbor1 import NearestNeighbor |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import twitter_grab | |
SCREEN_NAMES = [ | |
'GineokwKoenig', | |
'RealNichelle', | |
'TheRealNimoy', | |
'WilliamShatner', | |
'GeorgeTakei', |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import urllib2 | |
import json | |
import time | |
class QueryBuilder(object): | |
BASE_URL = 'https://api.twitter.com/1/statuses/user_timeline.json?' | |
DEFAULT_MAX_COUNT = 200 | |
def __init__(self): | |
self._query_params = dict() |