global connection num_workers: int = cpu_count() - 1 #number of worker processes for data loading keys: list[int] = [self.num_keys // num_workers for i in range(num_workers)] #number of keys each worker will generate keys[0] += self.num_keys % num_workers connection.flushdb() sleep(5) #wait for counters to reset base_ram: float = round(connection.info('memory')['used_memory']/1048576, 2) # 'empty' Redis db memory usage vec_params: dict = { "TYPE": self.float_type.value, "DIM": self.vec_dim, "DISTANCE_METRIC": self.metric_type.value, } if self.index_type is INDEX_TYPE.HNSW: vec_params['M'] = self.vec_m match self.object_type: case OBJECT_TYPE.JSON: schema = [ VectorField('$.vector', self.index_type.value, vec_params, as_name='vector')] idx_def: IndexDefinition = IndexDefinition(index_type=IndexType.JSON, prefix=['key:']) case OBJECT_TYPE.HASH: schema = [ VectorField('vector', self.index_type.value, vec_params)] idx_def: IndexDefinition = IndexDefinition(index_type=IndexType.HASH, prefix=['key:']) connection.ft('idx').create_index(schema, definition=idx_def) pool_params = zip(keys, repeat(self.object_type), repeat(self.vec_dim), repeat(self.float_type)) t1_start: float = perf_counter() with Pool(cpu_count()) as pool: pool.starmap(load_db, pool_params) # load a Redis instance via a pool of worker processes t1_stop:float = perf_counter()