Skip to content

Instantly share code, notes, and snippets.

@aglove2189
Last active April 16, 2021 14:48
Show Gist options
  • Save aglove2189/13587005648a01b4b75e84ddf70b0b55 to your computer and use it in GitHub Desktop.
Save aglove2189/13587005648a01b4b75e84ddf70b0b55 to your computer and use it in GitHub Desktop.
pd.read_sql but with a time to live (ttl) cache.
# -*- coding: utf-8 -*-
import os
import hashlib
import pandas as pd
def read_sql_cache(sql, con, ttl=None, dirname="_cache", **kwargs):
"""
pd.read_sql but with a time to live (ttl) cache.
Args:
sql (str): SQL query to be executed or a table name.
con (SQLAlchemy connectable, str, or sqlite3 connection)
ttl (str, optional): Used for determining time to live, e.g. set to today's date to stay
alive until tomorrow. Defaults to None, no time to live.
dirname (str, optional): directory name and/or path to persist cached queries to.
Defaults to '_cache'.
**kwargs: Will be passed to pd.read_sql
Returns:
pd.DataFrame
"""
if ttl is None:
return pd.read_sql(sql, con, **kwargs)
h = hashlib.md5((sql + ttl).encode()).hexdigest()
os.makedirs(dirname, exist_ok=True)
fn = f'{h}.parquet'
if fn in os.listdir(dirname):
return pd.read_parquet(os.path.join(dirname, fn))
else:
df = pd.read_sql(sql, con, **kwargs)
df.to_parquet(os.path.join(dirname, fn))
return df
def extend_pandas():
pd.read_sql_cache = read_sql_cache
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment