Skip to content

Instantly share code, notes, and snippets.

@hnykda
Created June 6, 2017 07:21
Show Gist options
  • Save hnykda/559dbbc63fa26bc67684afd9c6974cea to your computer and use it in GitHub Desktop.
Save hnykda/559dbbc63fa26bc67684afd9c6974cea to your computer and use it in GitHub Desktop.
Test for comparing HDF compression libs through pandas
import os
from time import time
import pandas as pd
from memory_profiler import memory_usage
FILENAME='compressed_df'
def get_size(flnm):
return round(os.path.getsize(flnm) / (1024*1024), 2)
def store_df(original_df: pd.DataFrame, flnm: str, clib: str):
original_df.to_hdf(flnm, key='df', complib=clib, complevel=9)
def benchmark(original_df: pd.DataFrame):
res = {}
for clib in ['zlib', 'lzo', 'bzip2', 'blosc', 'blosc:blosclz', 'blosc:lz4',
'blosc:lz4hc', 'blosc:snappy', 'blosc:zlib', 'blosc:zstd']:
flnm = f'{FILENAME}_{clib}.hdf'
def strdf():
return store_df(original_df, flnm, clib)
started = time()
memus = memory_usage(strdf, interval=1)
res[clib] = {'time [s]': time() - started, 'size [MB]': get_size(flnm), 'memory_usage': memus}
return res
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment