Benchmark read and write speed of TileDb
#%% | |
import glob | |
import os | |
import datetime | |
import tiledb | |
import pandas as pd | |
from tiledb.dataframe_ import _tiledb_result_as_dataframe | |
from tqdm import tqdm | |
import time | |
import numpy as np | |
cfg = tiledb.Ctx().config() | |
cfg.update( | |
{ | |
'py.init_buffer_bytes': 1024**2 * 50 # 50MB per attribute | |
} | |
) | |
tiledb.default_ctx(cfg) | |
#%% | |
def load_data(data_path = "eodprices.parquet"): | |
ts = time.time() | |
return pd.read_parquet(data_path) | |
te = time.time() | |
print(f"loading data takes {te-ts}s") | |
#%% | |
def write_tiledb(df, path, tile=1024**2): | |
t0 = time.time() | |
tiledb.from_pandas(path, df, sparse=False, tile=tile) | |
t1 = time.time() | |
print(f"Saving to tiledb takes {t1 - t0}") | |
def read_tiledb(path): | |
t0 = time.time() | |
df = tiledb.open_dataframe(path) | |
t1 = time.time() | |
print(f"reading from tiledb takes {t1 - t0}") | |
return df | |
#%% | |
def read_single_column(attrs, path, ctx=None): | |
t0 = time.time() | |
if ctx is None: | |
ctx = tiledb.default_ctx() | |
# TODO support `distributed=True` option? | |
with tiledb.open(path, ctx=ctx) as A: | |
nonempty = A.nonempty_domain() | |
data = A.query(attrs).multi_index.__getitem__(tuple(slice(s1, s2) for s1,s2 in nonempty)) | |
new_df = _tiledb_result_as_dataframe(A, data) | |
t1 = time.time() | |
print(f"reading single column form tiledb takes {t1-t0}s") | |
return new_df | |
#%% | |
def read_slicing(uri=): | |
with tiledb.open(uri) as A: | |
# q = A.query(attrs=('S_FA_ROA',)) | |
# indexing the Query object will only retrieve the | |
# selected attribute(s) | |
q = A.query() | |
data = q[np.datetime64('2005-02-25'):np.datetime64('2010-11-03'), :] | |
new_df = _tiledb_result_as_dataframe(A,data) | |
return new_df | |
#%% | |
# benchmark 1 | |
df = load_data() | |
write_tiledb(df, "benchmark1.tdb") | |
read_tiledb("benchmark1.tdb") | |
read_single_column(["S_DQ_AVGPRICE"],"benchmark1.tdb") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment