hotchpotch/bench_gpu_sift1m_ivf_hnsw.py

## bench_gpu_sift1m_ivf_hnsw.py
# base: https://github.com/facebookresearch/faiss/blob/main/benchs/bench_gpu_sift1m.py
# base code License: MIT License
#
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

import os
import time
import numpy as np
import pandas as pd
import pdb

import faiss
from datasets import load_sift1M


def evaluate(index, xq, gt, k):
    nq = xq.shape[0]
    t0 = time.time()
    D, I = index.search(xq, k)  # noqa: E741
    t1 = time.time()

    recalls = {}
    top_n = [1, 2, 3, 5, 10, 100]
    for i in top_n:
        recalls[i] = (I[:, :i] == gt[:, :1]).sum() / float(nq)

    return (t1 - t0) * 1000.0 / nq, recalls


df = pd.DataFrame(
    columns=[
        "name",
        "expantion_type",
        "expantion",
        "time(ms)",
        "memory(MB)",
        "R@1",
        "R@2",
        "R@3",
        "R@5",
        "R@10",
        "R@100",
    ]
)

xb, xq, xt, gt = load_sift1M()
print("load data")
print(xb.shape, xq.shape, xt.shape, gt.shape)
# debug
# xb = xb[:100000]
nq, d = xq.shape

# we need only a StandardGpuResources per GPU
res = faiss.StandardGpuResources()

target_indexes = [
    "IVF16384,Flat",
    "IVF4096,PQ32",
    "IVF8192,PQ32",
    "IVF16384,PQ32",
    "IVF1024,PQ64",
    "IVF2048,PQ64",
    "IVF4096,PQ64",
    "IVF8192,PQ64",
    "IVF16384,PQ64",
    "HNSW",
    "HNSW32,PQ32",
    "HNSW32,PQ64",
    "HNSW64,PQ32",
    "HNSW64,PQ64",
    "HNSW64,PQ128",
]

for index_type in target_indexes:
    print("# ", index_type)
    is_hnsw = "HNSW" in index_type
    index = faiss.index_factory(d, index_type)
    co = faiss.GpuClonerOptions()
    # here we are using a 64-byte PQ, so we must set the lookup tables to
    # 16 bit float (this is due to the limited temporary memory).
    co.useFloat16 = True
    index = faiss.index_cpu_to_gpu(res, 0, index, co)
    index.train(xt)
    index.add(xb)
    # warmup
    index.search(xq, 123)

    if faiss.get_num_gpus() > 0:
        index_cpu = faiss.index_gpu_to_cpu(index)
    else:
        index_cpu = index
    index_serialized = faiss.serialize_index(index_cpu)
    size_in_bytes = len(index_serialized)
    size_in_mbytes = float(size_in_bytes) / 1024 / 1024

    # use index on CPU
    index = index_cpu

    results = []
    for lexpantion in range(6):
        expantion = 16 << lexpantion
        if is_hnsw:
            index.hnsw.efSearch = expantion
        else:
            index.nprobe = expantion
        t, r = evaluate(index, xq, gt, 100)
        results.append(
            {
                "name": index_type,
                "expantion_type": "efSearch" if is_hnsw else "nprobe",
                "expantion": expantion,
                "time(ms)": f"{t:.3f}",  # %.3f ms
                "memory(MB)": f"{size_in_mbytes:.2f}",
                "R@1": r[1],
                "R@2": r[2],
                "R@3": r[3],
                "R@5": r[5],
                "R@10": r[10],
                "R@100": r[100],
            }
        )
    target_df = pd.DataFrame(results)
    print(target_df.to_markdown(index=False))
    df = pd.concat([df, target_df], axis=0)

print(df.to_markdown(index=False))
df.to_csv("result.csv", index=False)
	# base: https://github.com/facebookresearch/faiss/blob/main/benchs/bench_gpu_sift1m.py
	# base code License: MIT License
	#
	# Copyright (c) Facebook, Inc. and its affiliates.
	#
	# This source code is licensed under the MIT license found in the
	# LICENSE file in the root directory of this source tree.

	import os
	import time
	import numpy as np
	import pandas as pd
	import pdb

	import faiss
	from datasets import load_sift1M


	def evaluate(index, xq, gt, k):
	nq = xq.shape[0]
	t0 = time.time()
	D, I = index.search(xq, k) # noqa: E741
	t1 = time.time()

	recalls = {}
	top_n = [1, 2, 3, 5, 10, 100]
	for i in top_n:
	recalls[i] = (I[:, :i] == gt[:, :1]).sum() / float(nq)

	return (t1 - t0) * 1000.0 / nq, recalls


	df = pd.DataFrame(
	columns=[
	"name",
	"expantion_type",
	"expantion",
	"time(ms)",
	"memory(MB)",
	"R@1",
	"R@2",
	"R@3",
	"R@5",
	"R@10",
	"R@100",
	]
	)

	xb, xq, xt, gt = load_sift1M()
	print("load data")
	print(xb.shape, xq.shape, xt.shape, gt.shape)
	# debug
	# xb = xb[:100000]
	nq, d = xq.shape

	# we need only a StandardGpuResources per GPU
	res = faiss.StandardGpuResources()

	target_indexes = [
	"IVF16384,Flat",
	"IVF4096,PQ32",
	"IVF8192,PQ32",
	"IVF16384,PQ32",
	"IVF1024,PQ64",
	"IVF2048,PQ64",
	"IVF4096,PQ64",
	"IVF8192,PQ64",
	"IVF16384,PQ64",
	"HNSW",
	"HNSW32,PQ32",
	"HNSW32,PQ64",
	"HNSW64,PQ32",
	"HNSW64,PQ64",
	"HNSW64,PQ128",
	]

	for index_type in target_indexes:
	print("# ", index_type)
	is_hnsw = "HNSW" in index_type
	index = faiss.index_factory(d, index_type)
	co = faiss.GpuClonerOptions()
	# here we are using a 64-byte PQ, so we must set the lookup tables to
	# 16 bit float (this is due to the limited temporary memory).
	co.useFloat16 = True
	index = faiss.index_cpu_to_gpu(res, 0, index, co)
	index.train(xt)
	index.add(xb)
	# warmup
	index.search(xq, 123)

	if faiss.get_num_gpus() > 0:
	index_cpu = faiss.index_gpu_to_cpu(index)
	else:
	index_cpu = index
	index_serialized = faiss.serialize_index(index_cpu)
	size_in_bytes = len(index_serialized)
	size_in_mbytes = float(size_in_bytes) / 1024 / 1024

	# use index on CPU
	index = index_cpu

	results = []
	for lexpantion in range(6):
	expantion = 16 << lexpantion
	if is_hnsw:
	index.hnsw.efSearch = expantion
	else:
	index.nprobe = expantion
	t, r = evaluate(index, xq, gt, 100)
	results.append(
	{
	"name": index_type,
	"expantion_type": "efSearch" if is_hnsw else "nprobe",
	"expantion": expantion,
	"time(ms)": f"{t:.3f}", # %.3f ms
	"memory(MB)": f"{size_in_mbytes:.2f}",
	"R@1": r[1],
	"R@2": r[2],
	"R@3": r[3],
	"R@5": r[5],
	"R@10": r[10],
	"R@100": r[100],
	}
	)
	target_df = pd.DataFrame(results)
	print(target_df.to_markdown(index=False))
	df = pd.concat([df, target_df], axis=0)

	print(df.to_markdown(index=False))
	df.to_csv("result.csv", index=False)