Skip to content

Instantly share code, notes, and snippets.

@alexklibisz
Created May 24, 2018 05:02
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save alexklibisz/cae40875b73583ea6f78d9ebdc7e328a to your computer and use it in GitHub Desktop.
Save alexklibisz/cae40875b73583ea6f78d9ebdc7e328a to your computer and use it in GitHub Desktop.
ElastiK-Nearest-Neighbors LSH Example
import numpy as np
def make_lsh_model(nb_tables, nb_bits, nb_dimensions, vector_sample):
# vector_sample: np arr w/ shape (2 * nb_tables * nb_tables, nb_dimensions).
# normals, midpoints: np arrs w/ shape (nb_bits, nb_dimensions)
# thresholds: np arrs w/ shape (nb_bits)
# all_normals, all_thresholds: lists w/ one normal, one threshold per table.
all_normals, all_thresholds = [], []
for i in range(0, len(vector_sample), 2 * nb_bits):
vector_sample_a = vector_sample[i:i + nb_bits]
vector_sample_b = vector_sample[i + nb_bits: i + 2 * nb_bits]
midpoints = (vector_sample_a + vector_sample_b) / 2
normals = vector_sample_a - midpoints
thresholds = np.zeros(nb_bits)
for j in range(nb_bits):
thresholds[j] = normals[j].dot(midpoints[j])
all_normals.append(normals)
all_thresholds.append(thresholds)
return all_normals, all_thresholds
def get_lsh_hashes(vec, all_normals, all_thresholds):
# vec: np arr w/ shape (nb_dimensions, )
# hashes: one hash per table.
hashes = dict()
for normal, thresholds in zip(all_normals, all_thresholds):
hsh = 0
dot = vec.dot(normal.T) # shape (nb_bits,)
for i, (d, t) in enumerate(zip(dot, thresholds)):
if d > t:
hsh += i ** 2
hashes[len(hashes)] = hsh
return hashes
if __name__ == "__main__":
nb_tabs = 10
nb_bits = 8
nb_dims = 20
vector_sample = np.random.normal(0, 3, (2 * nb_tabs * nb_bits, nb_dims))
all_normals, all_thresholds = make_lsh_model(
nb_tabs, nb_bits, nb_dims, vector_sample)
vec = np.random.normal(0, 3, (nb_dims,))
hashes = get_lsh_hashes(vec, all_normals, all_thresholds)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment