Skip to content

Instantly share code, notes, and snippets.

@asw456
Forked from maheshakya/compare_ANN.py
Last active August 29, 2015 14:10
Show Gist options
  • Save asw456/4fd430f4b93299b5622d to your computer and use it in GitHub Desktop.
Save asw456/4fd430f4b93299b5622d to your computer and use it in GitHub Desktop.
import time
import numpy as np
from sklearn.datasets.samples_generator import make_blobs
from sklearn.neighbors import LSHForest
from sklearn.neighbors import NearestNeighbors
from annoy import AnnoyIndex
from pyflann import FLANN
n_iter = 50
n_neighbors = 100
rng = np.random.RandomState(42)
n_samples_n_features_pairs = [(1000, 100), (1000, 500), (10000, 100), (10000, 500), (10000, 1000)]
average_query_times_lshf = []
average_query_times_flann = []
average_query_times_annoy = []
accuracies_lshf = []
accuracies_annoy = []
accuracies_flann = []
build_time_lshf = 0
build_time_flann = 0
build_time_annoy = 0
# Calculate the average query time
for j, pair in enumerate(n_samples_n_features_pairs):
print "----------------------------------------------------------------------------------------"
print "n_samples: ", pair[0], "n_features: ", pair[1]
X, labels_true = make_blobs(n_samples=pair[0], n_features=pair[1],
centers=10, cluster_std=5,
random_state=0)
#Initialize NearestNeighbors
nbrs = NearestNeighbors(n_neighbors=n_neighbors)
nbrs.fit(X)
# Initialize LSHForest
lshf = LSHForest(n_candidates=50, n_neighbors=n_neighbors)
t0 = time.time()
lshf.fit(X)
build_time_lshf = time.time() - t0
print "LSHF index build time: ", build_time_lshf
# Initialize ANNOY
annoy = AnnoyIndex(pair[1], metric = 'euclidean')
t0 = time.time()
for i in range(pair[0]):
annoy.add_item(i, X[i].tolist())
annoy.build(pair[1])
build_time_annoy = time.time() - t0
print "ANNOY index build time: ", build_time_annoy
# Initialize FLANN
flann = FLANN()
t0 = time.time()
flann.build_index(X)
build_time_flann = time.time() - t0
print "FLANN index build time: ", build_time_flann
average_time_lshf = 0
average_time_annoy = 0
average_time_flann = 0
accuracy_lshf = 0
accuracy_annoy = 0
accuracy_flann = 0
for i in range(n_iter):
query = X[rng.randint(0, pair[0])]
# LSHF query
t0 = time.time()
approx_neighbors_lshf = lshf.kneighbors(query,
return_distance=False)[0]
T = time.time() - t0
average_time_lshf = average_time_lshf + T
# ANNOY query
t0 = time.time()
approx_neighbors_annoy = annoy.get_nns_by_vector(query.tolist(), n_neighbors)
T = time.time() - t0
average_time_annoy = average_time_annoy + T
# FLANN query
t0 = time.time()
approx_neighbors_flann, distance = flann.nn_index(query, n_neighbors)
T = time.time() - t0
average_time_flann = average_time_flann + T
# NearestNeighbors query
neighbors_exact = nbrs.kneighbors(query, return_distance=False)
# Calculate accuracies
intersection = np.intersect1d(approx_neighbors_lshf,
neighbors_exact).shape[0]
ratio = intersection/float(n_neighbors)
accuracy_lshf = accuracy_lshf + ratio
intersection = np.intersect1d(approx_neighbors_annoy,
neighbors_exact).shape[0]
ratio = intersection/float(n_neighbors)
accuracy_annoy = accuracy_annoy + ratio
intersection = np.intersect1d(approx_neighbors_flann[0],
neighbors_exact).shape[0]
ratio = intersection/float(n_neighbors)
accuracy_flann = accuracy_flann + ratio
average_query_times_lshf.append(average_time_lshf/float(n_iter))
accuracies_lshf.append(accuracy_lshf/float(n_iter))
average_query_times_annoy.append(average_time_annoy/float(n_iter))
accuracies_annoy.append(accuracy_annoy/float(n_iter))
average_query_times_flann.append(average_time_flann/float(n_iter))
accuracies_flann.append(accuracy_flann/float(n_iter))
print "LSHF average query time: ", average_query_times_lshf[j], ", Average accuracy: ", accuracies_lshf[j]
print "ANNOY average query time: ", average_query_times_annoy[j], ", Average accuracy: ", accuracies_annoy[j]
print "FLANN average query time: ", average_query_times_flann[j], ", Average accuracy: ", accuracies_flann[j]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment