-
-
Save DRMacIver/48da8108956665f93c98 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from random import Random | |
fixed_random = Random(666) | |
def gen_matrix(n): | |
result = np.zeros(dtype=int, shape=(n, n)) | |
order = range(n) | |
fixed_random.shuffle(order) | |
for t, i in enumerate(order): | |
for j in order[t+1:]: | |
result[i, j] = fixed_random.randint(0, 100) | |
result[j, i] = fixed_random.randint(0, 70) | |
return result | |
def weight(matrix, order): | |
n = len(order) | |
return sum( | |
matrix[order[j], order[i]] | |
for i in xrange(n) | |
for j in xrange(i+1, n) | |
) | |
def shuffle_order(m): | |
n = m.shape[0] | |
result = range(n) | |
fixed_random.shuffle(result) | |
return result | |
def in_wrong_order(matrix, i, j): | |
return matrix[j, i] > matrix[i, j] | |
def locally_kemenize(matrix, ordered): | |
for i in xrange(1, len(ordered)): | |
j = i | |
while j > 0: | |
if in_wrong_order( | |
matrix, | |
ordered[j-1], ordered[j] | |
): | |
ordered[j], ordered[j-1] = ordered[j-1], ordered[j] | |
j -= 1 | |
else: | |
break | |
def improved_shuffle(matrix): | |
attempt = shuffle_order(matrix) | |
locally_kemenize(matrix, attempt) | |
w1 = weight(matrix, attempt) | |
backwards = list(attempt) | |
backwards.reverse() | |
locally_kemenize(matrix, backwards) | |
w2 = weight(matrix, backwards) | |
if w1 <= w2: | |
return attempt | |
else: | |
return backwards | |
def score_of_value_at_index(matrix, order, x, i): | |
return sum( | |
matrix[x, y] for y in order[:i] | |
) + sum( | |
matrix[y, x] for y in order[i:] | |
) | |
def insertion_order(matrix): | |
remaining = range(matrix.shape[0]) | |
fixed_random.shuffle(remaining) | |
order = [remaining.pop()] | |
for x in remaining: | |
best_index = -1 | |
best_score = float('inf') | |
for i in xrange(len(remaining) + 1): | |
score_at_i = score_of_value_at_index(matrix, order, x, i) | |
if score_at_i < best_score: | |
best_score = score_at_i | |
best_index = i | |
order.insert(best_index, x) | |
return order | |
def kwik_sort(matrix, elements=None): | |
if elements is None: | |
elements = range(matrix.shape[0]) | |
elements = list(elements) | |
if len(elements) <= 1: | |
return elements | |
pivot = fixed_random.choice(elements) | |
left = [] | |
right = [] | |
for x in elements: | |
if x != pivot: | |
if in_wrong_order(matrix, x, pivot): | |
right.append(x) | |
else: | |
left.append(x) | |
return kwik_sort(matrix, left) + [pivot] + kwik_sort(matrix, right) | |
def item_ordering_from_pair_ordering(n, pairs): | |
order = np.zeros(dtype=bool, shape=(n, n)) | |
for i, j in pairs: | |
if order[i, j] or order[j, i]: | |
continue | |
order[i, j] = True | |
for k in xrange(n): | |
if order[k, i]: | |
order[k, j] = True | |
elif order[j, k]: | |
order[i, k] = True | |
result = range(n) | |
def compare(x, y): | |
if order[x, y]: | |
return -1 | |
if order[y, x]: | |
return 1 | |
if x < y: | |
return -1 | |
if y > x: | |
return 1 | |
return 0 | |
result.sort(compare) | |
return result | |
def ranked_pairs(matrix): | |
n = matrix.shape[0] | |
pairs = [ | |
(i, j) | |
for i in xrange(n) | |
for j in xrange(n) | |
if i != j | |
] | |
pairs.sort(key=matrix.__getitem__, reverse=True) | |
return item_ordering_from_pair_ordering(n, pairs) | |
def smoothed_indegree(matrix): | |
n = matrix.shape[0] | |
results = range(n) | |
results.sort( | |
key=lambda i: sum( | |
matrix[j, i] for j in xrange(n) if i != j | |
) | |
) | |
locally_kemenize(matrix, results) | |
return results | |
def print_metrics(name, metrics): | |
metrics = np.array(metrics) | |
print "%s: %.2f +/- %.2f. 1%%=%.2f, 50%%=%.2f, 99%%=%.2f" % ( | |
name, metrics.mean(), metrics.std(), | |
np.percentile(metrics, 1), | |
np.percentile(metrics, 50), | |
np.percentile(metrics, 99), | |
) | |
scores_by_name = {} | |
def evaluate(matrices, f, name): | |
scores = [] | |
for m in matrices: | |
n = m.shape[0] | |
ordering = f(m) | |
assert len(ordering) == n | |
assert sorted(ordering) == range(n), set(range(n)) - set(ordering) | |
scores.append(weight(m, ordering)) | |
scores_by_name[name] = np.array(scores) | |
print_metrics(name, scores) | |
def p_value_left_is_better(left, right, repetitions=500): | |
threshold = left.mean() - right.mean() | |
n_left = len(left) | |
pooled = np.array(list(left) + list(right)) | |
under = 0 | |
for _ in xrange(repetitions): | |
fixed_random.shuffle(pooled) | |
score = pooled[:n_left].mean() - pooled[n_left:].mean() | |
if score <= threshold: | |
under += 1 | |
return float(under + 1) / (repetitions + 1) | |
if __name__ == '__main__': | |
matrices = [ | |
gen_matrix(100) | |
for _ in xrange(100) | |
] | |
evaluate(matrices, shuffle_order, "Simple shuffle") | |
evaluate(matrices, improved_shuffle, "Improved shuffle") | |
evaluate(matrices, smoothed_indegree, "Smoothed indegree") | |
evaluate(matrices, insertion_order, "Insertion order") | |
evaluate(matrices, kwik_sort, "Kwik Sort") | |
evaluate(matrices, ranked_pairs, "Ranked pairs") | |
comparisons = [] | |
false_discovery_rate = 0.01 | |
for left_name, left_vals in scores_by_name.items(): | |
for right_name, right_vals in scores_by_name.items(): | |
if left_name != right_name: | |
comparisons.append(( | |
p_value_left_is_better(left_vals, right_vals), | |
left_name, right_name | |
)) | |
comparisons.sort() | |
m = len(comparisons) | |
threshold = 0 | |
for i, (p, l, r) in enumerate(comparisons, 1): | |
if p <= i * false_discovery_rate / m: | |
threshold = p | |
messages = [] | |
for p, l, r in comparisons: | |
if p <= threshold: | |
messages.append("%s < %s (p=%.4f)" % (l, r, p)) | |
messages.sort() | |
print "Pairwise comparisons:" | |
for m in messages: | |
print m |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment