Skip to content

Instantly share code, notes, and snippets.

@hanxiao

hanxiao/app.py Secret

Created September 21, 2020 16:46
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hanxiao/43cad33b60cadd34f45236993689e5d9 to your computer and use it in GitHub Desktop.
Save hanxiao/43cad33b60cadd34f45236993689e5d9 to your computer and use it in GitHub Desktop.
benchmarking numpyindexer
# requires: pip install jina
import os
import shutil
import sys
import numpy as np
from memory_profiler import profile
from jina.executors.indexers.vector import NumpyIndexer
from jina.logging.profile import TimeContext
filename = 'a%s.gz'
binname = 'a%s.bin'
num_data = 10000
num_dim = 10000
num_query = 100
top_k = 10
def rm_files(file_paths):
for file_path in file_paths:
if os.path.exists(file_path):
if os.path.isfile(file_path):
os.remove(file_path)
elif os.path.isdir(file_path):
shutil.rmtree(file_path, ignore_errors=False, onerror=None)
@profile
def index():
rm_files([f'a{sys.argv[2]}.bin', f'a{sys.argv[2]}.gz'])
data = np.random.random([num_data, num_dim])
keys = np.random.randint(0, high=num_data, size=[num_data])
with TimeContext('index'):
with NumpyIndexer(compress_level=int(sys.argv[2]), index_filename=filename % sys.argv[2]) as ni:
ni.add(keys, data)
ni.save(binname % sys.argv[2])
@profile
def query():
q = [np.random.random([num_query, num_dim]) for _ in range(5)]
with NumpyIndexer.load(binname % sys.argv[2]) as ni:
with TimeContext('query'):
for j in q:
ni.query(j, top_k=top_k)
if __name__ == '__main__':
if sys.argv[1] == 'index':
index()
else:
query()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment