This decorator caches a pandas.DataFrame returning function. It saves the pandas.DataFrame in a parquet file in the cache_dir.
import pandas as pd | |
from pathlib import Path | |
from functools import wraps | |
def cache_pandas_result(cache_dir, hard_reset: bool): | |
''' | |
This decorator caches a pandas.DataFrame returning function. | |
It saves the pandas.DataFrame in a parquet file in the cache_dir. | |
It uses the following naming scheme for the caching files: | |
cache_dir / function_name + '.trc.pqt' | |
Parameters: | |
cache_dir: a pathlib.Path object | |
hard_reset: bool | |
''' | |
def build_caching_function(func): | |
@wraps(func) | |
def cache_function(*args, **kwargs): | |
if not isinstance(cache_dir, Path): | |
raise TypeError('cache_dir should be a pathlib.Path object') | |
cache_file = cache_dir / (func.__name__ + '.trc.pqt') | |
if hard_reset or (not cache_file.exists()): | |
result = func(*args, **kwargs) | |
if not isinstance(result, pd.DataFrame): | |
raise TypeError(f"The result of computing {func.__name__} is not a DataFrame") | |
result.to_parquet(cache_file) | |
return result | |
result = pd.read_parquet(cache_file) | |
return result | |
return cache_function | |
return build_caching_function |
This comment has been minimized.
This comment has been minimized.
Thanks for letting me know! |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This comment has been minimized.
Really good !
Just need to import wraps:
functools import wraps