benchmarking numpyindexer
# requires: pip install jina | |
import os | |
import shutil | |
import sys | |
import numpy as np | |
from memory_profiler import profile | |
from jina.executors.indexers.vector import NumpyIndexer | |
from jina.logging.profile import TimeContext | |
filename = 'a%s.gz' | |
binname = 'a%s.bin' | |
num_data = 10000 | |
num_dim = 10000 | |
num_query = 100 | |
top_k = 10 | |
def rm_files(file_paths): | |
for file_path in file_paths: | |
if os.path.exists(file_path): | |
if os.path.isfile(file_path): | |
os.remove(file_path) | |
elif os.path.isdir(file_path): | |
shutil.rmtree(file_path, ignore_errors=False, onerror=None) | |
@profile | |
def index(): | |
rm_files([f'a{sys.argv[2]}.bin', f'a{sys.argv[2]}.gz']) | |
data = np.random.random([num_data, num_dim]) | |
keys = np.random.randint(0, high=num_data, size=[num_data]) | |
with TimeContext('index'): | |
with NumpyIndexer(compress_level=int(sys.argv[2]), index_filename=filename % sys.argv[2]) as ni: | |
ni.add(keys, data) | |
ni.save(binname % sys.argv[2]) | |
@profile | |
def query(): | |
q = [np.random.random([num_query, num_dim]) for _ in range(5)] | |
with NumpyIndexer.load(binname % sys.argv[2]) as ni: | |
with TimeContext('query'): | |
for j in q: | |
ni.query(j, top_k=top_k) | |
if __name__ == '__main__': | |
if sys.argv[1] == 'index': | |
index() | |
else: | |
query() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment