Created September 21, 2020 16:46
benchmarking numpyindexer
# requires: pip install jina
import os
import shutil
import sys
import numpy as np
from memory_profiler import profile
from jina.executors.indexers.vector import NumpyIndexer
from jina.logging.profile import TimeContext
filename = 'a%s.gz'
binname = 'a%s.bin'
num_data = 10000
num_dim = 10000
num_query = 100
top_k = 10
def rm_files(file_paths):
for file_path in file_paths:
if os.path.exists(file_path):
if os.path.isfile(file_path):
elif os.path.isdir(file_path):
shutil.rmtree(file_path, ignore_errors=False, onerror=None)
def index():
rm_files([f'a{sys.argv[2]}.bin', f'a{sys.argv[2]}.gz'])
data = np.random.random([num_data, num_dim])
keys = np.random.randint(0, high=num_data, size=[num_data])
with TimeContext('index'):
with NumpyIndexer(compress_level=int(sys.argv[2]), index_filename=filename % sys.argv[2]) as ni:
ni.add(keys, data) % sys.argv[2])
def query():
q = [np.random.random([num_query, num_dim]) for _ in range(5)]
with NumpyIndexer.load(binname % sys.argv[2]) as ni:
with TimeContext('query'):
for j in q:
ni.query(j, top_k=top_k)
if __name__ == '__main__':
if sys.argv[1] == 'index':
