Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
This decorator caches a pandas.DataFrame returning function. It saves the pandas.DataFrame in a parquet file in the cache_dir.
import pandas as pd
from pathlib import Path
from functools import wraps
def cache_pandas_result(cache_dir, hard_reset: bool):
'''
This decorator caches a pandas.DataFrame returning function.
It saves the pandas.DataFrame in a parquet file in the cache_dir.
It uses the following naming scheme for the caching files:
cache_dir / function_name + '.trc.pqt'
Parameters:
cache_dir: a pathlib.Path object
hard_reset: bool
'''
def build_caching_function(func):
@wraps(func)
def cache_function(*args, **kwargs):
if not isinstance(cache_dir, Path):
raise TypeError('cache_dir should be a pathlib.Path object')
cache_file = cache_dir / (func.__name__ + '.trc.pqt')
if hard_reset or (not cache_file.exists()):
result = func(*args, **kwargs)
if not isinstance(result, pd.DataFrame):
raise TypeError(f"The result of computing {func.__name__} is not a DataFrame")
result.to_parquet(cache_file)
return result
result = pd.read_parquet(cache_file)
return result
return cache_function
return build_caching_function
@slamer59

This comment has been minimized.

Copy link

@slamer59 slamer59 commented Oct 31, 2020

Really good !

Just need to import wraps:
functools import wraps

@GuiMarthe

This comment has been minimized.

Copy link
Owner Author

@GuiMarthe GuiMarthe commented Nov 24, 2020

Thanks for letting me know!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment