Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@ajschumacher
Last active May 30, 2018 12:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ajschumacher/bdfecc8bdd48cad1b701b6cf4d647f4c to your computer and use it in GitHub Desktop.
Save ajschumacher/bdfecc8bdd48cad1b701b6cf4d647f4c to your computer and use it in GitHub Desktop.
import random
import math
def mean(elements):
return sum(elements) / len(elements)
def euclidean_dist(first, second):
assert len(first) == len(second)
return sum((f - s)**2 for f, s in zip(first, second))**0.5
def nonzero_distances(element, others, distance=euclidean_dist):
# nonzero so that we don't ever take log(0) later
distances = [distance(element, other) for other in others]
return [distance for distance in distances if distance != 0]
def lid_mle(element, others, k, adj=0): # adj=-1 is correct; adj=-2 is unbiased
dists = sorted(nonzero_distances(element, others))[:k]
assert len(dists) == k
max_dist = dists.pop() # relies on sorting
return (k + adj) / sum(math.log(max_dist) - math.log(dist) for dist in dists)
def random_space(n, d, bounds=(-1, 1)):
return [[random.uniform(*bounds) for _ in range(d)] for _ in range(n)]
mean([lid_mle([0, 0], random_space(100, 2), k=20, adj=0) for _ in range(1000)])
## 2.21897037721
mean([lid_mle([0, 0], random_space(100, 2), k=20, adj=-1) for _ in range(1000)])
## 2.110546885273464
mean([lid_mle([0, 0], random_space(100, 2), k=20, adj=-2) for _ in range(1000)])
## 2.0037728978735405
# UPDATE 2018-05-30
# The arithmetic mean is the wrong mean for these estimates.
# As the derivation of the MLE shows, the harmonic mean is appropriate.
# The derivation and experiment both show that the "adjustment" by -1,
# which really just ensures that a real harmonic mean is taken, is correct.
# I don't see evidence for the adjustment by -2 being unbiased.
def harmonic_mean(elements):
return 1. / mean([1. / element for element in elements])
harmonic_mean([lid_mle([0, 0], random_space(100, 2), k=20, adj=0) for _ in range(1000)])
## 2.12435934448712
harmonic_mean([lid_mle([0, 0], random_space(100, 2), k=20, adj=-1) for _ in range(1000)])
## 1.9940765878401865
harmonic_mean([lid_mle([0, 0], random_space(100, 2), k=20, adj=-1) for _ in range(1000)])
## 2.0045411415543963
harmonic_mean([lid_mle([0, 0], random_space(100, 2), k=20, adj=-2) for _ in range(1000)])
## 1.897686738399365
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment