Skip to content

Instantly share code, notes, and snippets.

@healiseu
Last active September 15, 2018 07:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save healiseu/549a32c35b2b45940147fc4c5dcb88a4 to your computer and use it in GitHub Desktop.
Save healiseu/549a32c35b2b45940147fc4c5dcb88a4 to your computer and use it in GitHub Desktop.
# Massive insertion of 1 million sorted float values using RediSearch module
# (C) By Athanassios I. Hatzis
# 14 Sept 2018
# Written to explain issue: https://github.com/RedisLabsModules/RediSearch/issues/493
#
from redisearch import Client, NumericField, NumericFilter, Query
from redis import Redis
import numpy as np
import time
rconn = Redis(host='localhost', port=6379, db=0)
rconn.flushdb()
rconn.dbsize()
# Creating a client with a given index name - DVS (Data Value Store)
dvs = Client('DVS',conn=rconn)
# Creating the index definition and schema
dvs.create_index([NumericField('flt', sortable=True)])
batchndx = dvs.batch_indexer(chunk_size=10000)
size = 1000000
interval = (-5000, 5000)
fltarr = (interval[1]-interval[0])*np.random.sample(size)+interval[0]
print(f'Size: {fltarr.size}')
print(f'Data Type: {fltarr.dtype}')
print(f'Memory Size: {fltarr.nbytes}')
sys.getsizeof(fltarr)
################################ Start Benchmark Test ####################################
t1_start = time.perf_counter()
t2_start = time.process_time()
[batchndx.add_document(doc_id=n, flt=fltarr[n]) for n in range(size)]
batchndx.commit()
t1_stop = time.perf_counter()
t2_stop = time.process_time()
print(f"Elapsed time for {size} floats : {int(round((t1_stop-t1_start)))} [sec]")
print(f"CPU process time for {size} floats: {int(round((t2_stop-t2_start)))} [sec]")
print(f"Fields : {dvs.info()['fields']}")
print(f"Total values : {dvs.info()['num_docs']}")
print(f"Total size mb : {dvs.info()['doc_table_size_mb']}")
print(f"Sortable values size mb : {dvs.info()['sortable_values_size_mb']}")
################################ End Benchmark Test ####################################
# Elapsed time for 1000000 floats : 130 [sec]
# CPU process time for 1000000 floats: 73 [sec]
# Fields : [[b'flt', b'type', b'NUMERIC', b'SORTABLE']]
# Total values : 1000000
# Total size mb : 83.817377090454102
# Sortable values size mb : 22.88818359375
# ----------------------- Other Info -------------------------------
# From redis-cli, info memory
# used_memory_human : 256 MB
# used_memory_dataset: 207 MB
# Save RDB policy: after 600 sec (10 min) if at least 10 keys changed
# OS Environment: Linux Ubuntu x64
# Processor: Intel(R) Core(TM) i3 CPU 540 @ 3.07GHz
# Memory 16GB
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment