aglove2189/sql_cache.py

## sql_cache.py
# -*- coding: utf-8 -*-
import os
import hashlib
import pandas as pd


def read_sql_cache(sql, con, ttl=None, dirname="_cache", **kwargs):
    """
    pd.read_sql but with a time to live (ttl) cache.

    Args:
        sql (str): SQL query to be executed or a table name.
        con (SQLAlchemy connectable, str, or sqlite3 connection)
        ttl (str, optional): Used for determining time to live, e.g. set to today's date to stay
            alive until tomorrow. Defaults to None, no time to live.
        dirname (str, optional): directory name and/or path to persist cached queries to.
            Defaults to '_cache'.
        **kwargs: Will be passed to pd.read_sql
    Returns:
        pd.DataFrame
    """
    if ttl is None:
        return pd.read_sql(sql, con, **kwargs)

    h = hashlib.md5((sql + ttl).encode()).hexdigest()
    os.makedirs(dirname, exist_ok=True)
    fn = f'{h}.parquet'
    if fn in os.listdir(dirname):
        return pd.read_parquet(os.path.join(dirname, fn))
    else:
        df = pd.read_sql(sql, con, **kwargs)
        df.to_parquet(os.path.join(dirname, fn))
        return df


def extend_pandas():
    pd.read_sql_cache = read_sql_cache
	# -- coding: utf-8 --
	import os
	import hashlib
	import pandas as pd


	def read_sql_cache(sql, con, ttl=None, dirname="_cache", **kwargs):
	"""
	pd.read_sql but with a time to live (ttl) cache.

	Args:
	sql (str): SQL query to be executed or a table name.
	con (SQLAlchemy connectable, str, or sqlite3 connection)
	ttl (str, optional): Used for determining time to live, e.g. set to today's date to stay
	alive until tomorrow. Defaults to None, no time to live.
	dirname (str, optional): directory name and/or path to persist cached queries to.
	Defaults to '_cache'.
	**kwargs: Will be passed to pd.read_sql
	Returns:
	pd.DataFrame
	"""
	if ttl is None:
	return pd.read_sql(sql, con, **kwargs)

	h = hashlib.md5((sql + ttl).encode()).hexdigest()
	os.makedirs(dirname, exist_ok=True)
	fn = f'{h}.parquet'
	if fn in os.listdir(dirname):
	return pd.read_parquet(os.path.join(dirname, fn))
	else:
	df = pd.read_sql(sql, con, **kwargs)
	df.to_parquet(os.path.join(dirname, fn))
	return df


	def extend_pandas():
	pd.read_sql_cache = read_sql_cache