Skip to content

Instantly share code, notes, and snippets.

@DRMacIver
Last active August 29, 2015 14:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save DRMacIver/48da8108956665f93c98 to your computer and use it in GitHub Desktop.
Save DRMacIver/48da8108956665f93c98 to your computer and use it in GitHub Desktop.
import numpy as np
from random import Random
fixed_random = Random(666)
def gen_matrix(n):
result = np.zeros(dtype=int, shape=(n, n))
order = range(n)
fixed_random.shuffle(order)
for t, i in enumerate(order):
for j in order[t+1:]:
result[i, j] = fixed_random.randint(0, 100)
result[j, i] = fixed_random.randint(0, 70)
return result
def weight(matrix, order):
n = len(order)
return sum(
matrix[order[j], order[i]]
for i in xrange(n)
for j in xrange(i+1, n)
)
def shuffle_order(m):
n = m.shape[0]
result = range(n)
fixed_random.shuffle(result)
return result
def in_wrong_order(matrix, i, j):
return matrix[j, i] > matrix[i, j]
def locally_kemenize(matrix, ordered):
for i in xrange(1, len(ordered)):
j = i
while j > 0:
if in_wrong_order(
matrix,
ordered[j-1], ordered[j]
):
ordered[j], ordered[j-1] = ordered[j-1], ordered[j]
j -= 1
else:
break
def improved_shuffle(matrix):
attempt = shuffle_order(matrix)
locally_kemenize(matrix, attempt)
w1 = weight(matrix, attempt)
backwards = list(attempt)
backwards.reverse()
locally_kemenize(matrix, backwards)
w2 = weight(matrix, backwards)
if w1 <= w2:
return attempt
else:
return backwards
def score_of_value_at_index(matrix, order, x, i):
return sum(
matrix[x, y] for y in order[:i]
) + sum(
matrix[y, x] for y in order[i:]
)
def insertion_order(matrix):
remaining = range(matrix.shape[0])
fixed_random.shuffle(remaining)
order = [remaining.pop()]
for x in remaining:
best_index = -1
best_score = float('inf')
for i in xrange(len(remaining) + 1):
score_at_i = score_of_value_at_index(matrix, order, x, i)
if score_at_i < best_score:
best_score = score_at_i
best_index = i
order.insert(best_index, x)
return order
def kwik_sort(matrix, elements=None):
if elements is None:
elements = range(matrix.shape[0])
elements = list(elements)
if len(elements) <= 1:
return elements
pivot = fixed_random.choice(elements)
left = []
right = []
for x in elements:
if x != pivot:
if in_wrong_order(matrix, x, pivot):
right.append(x)
else:
left.append(x)
return kwik_sort(matrix, left) + [pivot] + kwik_sort(matrix, right)
def item_ordering_from_pair_ordering(n, pairs):
order = np.zeros(dtype=bool, shape=(n, n))
for i, j in pairs:
if order[i, j] or order[j, i]:
continue
order[i, j] = True
for k in xrange(n):
if order[k, i]:
order[k, j] = True
elif order[j, k]:
order[i, k] = True
result = range(n)
def compare(x, y):
if order[x, y]:
return -1
if order[y, x]:
return 1
if x < y:
return -1
if y > x:
return 1
return 0
result.sort(compare)
return result
def ranked_pairs(matrix):
n = matrix.shape[0]
pairs = [
(i, j)
for i in xrange(n)
for j in xrange(n)
if i != j
]
pairs.sort(key=matrix.__getitem__, reverse=True)
return item_ordering_from_pair_ordering(n, pairs)
def smoothed_indegree(matrix):
n = matrix.shape[0]
results = range(n)
results.sort(
key=lambda i: sum(
matrix[j, i] for j in xrange(n) if i != j
)
)
locally_kemenize(matrix, results)
return results
def print_metrics(name, metrics):
metrics = np.array(metrics)
print "%s: %.2f +/- %.2f. 1%%=%.2f, 50%%=%.2f, 99%%=%.2f" % (
name, metrics.mean(), metrics.std(),
np.percentile(metrics, 1),
np.percentile(metrics, 50),
np.percentile(metrics, 99),
)
scores_by_name = {}
def evaluate(matrices, f, name):
scores = []
for m in matrices:
n = m.shape[0]
ordering = f(m)
assert len(ordering) == n
assert sorted(ordering) == range(n), set(range(n)) - set(ordering)
scores.append(weight(m, ordering))
scores_by_name[name] = np.array(scores)
print_metrics(name, scores)
def p_value_left_is_better(left, right, repetitions=500):
threshold = left.mean() - right.mean()
n_left = len(left)
pooled = np.array(list(left) + list(right))
under = 0
for _ in xrange(repetitions):
fixed_random.shuffle(pooled)
score = pooled[:n_left].mean() - pooled[n_left:].mean()
if score <= threshold:
under += 1
return float(under + 1) / (repetitions + 1)
if __name__ == '__main__':
matrices = [
gen_matrix(100)
for _ in xrange(100)
]
evaluate(matrices, shuffle_order, "Simple shuffle")
evaluate(matrices, improved_shuffle, "Improved shuffle")
evaluate(matrices, smoothed_indegree, "Smoothed indegree")
evaluate(matrices, insertion_order, "Insertion order")
evaluate(matrices, kwik_sort, "Kwik Sort")
evaluate(matrices, ranked_pairs, "Ranked pairs")
comparisons = []
false_discovery_rate = 0.01
for left_name, left_vals in scores_by_name.items():
for right_name, right_vals in scores_by_name.items():
if left_name != right_name:
comparisons.append((
p_value_left_is_better(left_vals, right_vals),
left_name, right_name
))
comparisons.sort()
m = len(comparisons)
threshold = 0
for i, (p, l, r) in enumerate(comparisons, 1):
if p <= i * false_discovery_rate / m:
threshold = p
messages = []
for p, l, r in comparisons:
if p <= threshold:
messages.append("%s < %s (p=%.4f)" % (l, r, p))
messages.sort()
print
print "Pairwise comparisons:"
for m in messages:
print m
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment