Skip to content

Instantly share code, notes, and snippets.

@hotchpotch
Created November 18, 2023 08:44
Show Gist options
  • Save hotchpotch/57d508beb09bbf1e18bee79293ac2ee5 to your computer and use it in GitHub Desktop.
Save hotchpotch/57d508beb09bbf1e18bee79293ac2ee5 to your computer and use it in GitHub Desktop.
IVS, HNSW, PQ benchmark
# base: https://github.com/facebookresearch/faiss/blob/main/benchs/bench_gpu_sift1m.py
# base code License: MIT License
#
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import os
import time
import numpy as np
import pandas as pd
import pdb
import faiss
from datasets import load_sift1M
def evaluate(index, xq, gt, k):
nq = xq.shape[0]
t0 = time.time()
D, I = index.search(xq, k) # noqa: E741
t1 = time.time()
recalls = {}
top_n = [1, 2, 3, 5, 10, 100]
for i in top_n:
recalls[i] = (I[:, :i] == gt[:, :1]).sum() / float(nq)
return (t1 - t0) * 1000.0 / nq, recalls
df = pd.DataFrame(
columns=[
"name",
"expantion_type",
"expantion",
"time(ms)",
"memory(MB)",
"R@1",
"R@2",
"R@3",
"R@5",
"R@10",
"R@100",
]
)
xb, xq, xt, gt = load_sift1M()
print("load data")
print(xb.shape, xq.shape, xt.shape, gt.shape)
# debug
# xb = xb[:100000]
nq, d = xq.shape
# we need only a StandardGpuResources per GPU
res = faiss.StandardGpuResources()
target_indexes = [
"IVF16384,Flat",
"IVF4096,PQ32",
"IVF8192,PQ32",
"IVF16384,PQ32",
"IVF1024,PQ64",
"IVF2048,PQ64",
"IVF4096,PQ64",
"IVF8192,PQ64",
"IVF16384,PQ64",
"HNSW",
"HNSW32,PQ32",
"HNSW32,PQ64",
"HNSW64,PQ32",
"HNSW64,PQ64",
"HNSW64,PQ128",
]
for index_type in target_indexes:
print("# ", index_type)
is_hnsw = "HNSW" in index_type
index = faiss.index_factory(d, index_type)
co = faiss.GpuClonerOptions()
# here we are using a 64-byte PQ, so we must set the lookup tables to
# 16 bit float (this is due to the limited temporary memory).
co.useFloat16 = True
index = faiss.index_cpu_to_gpu(res, 0, index, co)
index.train(xt)
index.add(xb)
# warmup
index.search(xq, 123)
if faiss.get_num_gpus() > 0:
index_cpu = faiss.index_gpu_to_cpu(index)
else:
index_cpu = index
index_serialized = faiss.serialize_index(index_cpu)
size_in_bytes = len(index_serialized)
size_in_mbytes = float(size_in_bytes) / 1024 / 1024
# use index on CPU
index = index_cpu
results = []
for lexpantion in range(6):
expantion = 16 << lexpantion
if is_hnsw:
index.hnsw.efSearch = expantion
else:
index.nprobe = expantion
t, r = evaluate(index, xq, gt, 100)
results.append(
{
"name": index_type,
"expantion_type": "efSearch" if is_hnsw else "nprobe",
"expantion": expantion,
"time(ms)": f"{t:.3f}", # %.3f ms
"memory(MB)": f"{size_in_mbytes:.2f}",
"R@1": r[1],
"R@2": r[2],
"R@3": r[3],
"R@5": r[5],
"R@10": r[10],
"R@100": r[100],
}
)
target_df = pd.DataFrame(results)
print(target_df.to_markdown(index=False))
df = pd.concat([df, target_df], axis=0)
print(df.to_markdown(index=False))
df.to_csv("result.csv", index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment