Last active
April 16, 2021 14:48
-
-
Save aglove2189/13587005648a01b4b75e84ddf70b0b55 to your computer and use it in GitHub Desktop.
pd.read_sql but with a time to live (ttl) cache.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import os | |
import hashlib | |
import pandas as pd | |
def read_sql_cache(sql, con, ttl=None, dirname="_cache", **kwargs): | |
""" | |
pd.read_sql but with a time to live (ttl) cache. | |
Args: | |
sql (str): SQL query to be executed or a table name. | |
con (SQLAlchemy connectable, str, or sqlite3 connection) | |
ttl (str, optional): Used for determining time to live, e.g. set to today's date to stay | |
alive until tomorrow. Defaults to None, no time to live. | |
dirname (str, optional): directory name and/or path to persist cached queries to. | |
Defaults to '_cache'. | |
**kwargs: Will be passed to pd.read_sql | |
Returns: | |
pd.DataFrame | |
""" | |
if ttl is None: | |
return pd.read_sql(sql, con, **kwargs) | |
h = hashlib.md5((sql + ttl).encode()).hexdigest() | |
os.makedirs(dirname, exist_ok=True) | |
fn = f'{h}.parquet' | |
if fn in os.listdir(dirname): | |
return pd.read_parquet(os.path.join(dirname, fn)) | |
else: | |
df = pd.read_sql(sql, con, **kwargs) | |
df.to_parquet(os.path.join(dirname, fn)) | |
return df | |
def extend_pandas(): | |
pd.read_sql_cache = read_sql_cache |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment