Skip to content

Instantly share code, notes, and snippets.

@haynesgt
Created January 8, 2024 23:00
Show Gist options
  • Save haynesgt/940c354ada40d4b1cad126214a3901db to your computer and use it in GitHub Desktop.
Save haynesgt/940c354ada40d4b1cad126214a3901db to your computer and use it in GitHub Desktop.
File based cacher for python scripting
import asyncio
import hashlib
import json
import os
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime
from functools import wraps
from typing import Any, Union
from pydantic import BaseModel
CACHE_DIR = ".cache"
class JobResult(BaseModel):
job_id: str
date: datetime
name: str
args: tuple
kwargs: dict
value: Any
class JobStatus(BaseModel):
exists: bool
result: Union[JobResult, None]
def get_job_id(name: str, args: tuple, kwargs: dict):
blob = json.dumps({"args": args, "kwargs": kwargs}).encode("utf-8")
return f"{name}-{hashlib.md5(blob).hexdigest()}"
def ensure_cache_dir():
dir_path = CACHE_DIR
if not os.path.exists(dir_path):
os.makedirs(dir_path)
def read_cached_job_status(job_id: str):
ensure_cache_dir()
try:
with open(f"./{CACHE_DIR}/{job_id}") as file:
return JobStatus(exists=True, result=JobResult.model_validate_json(file.read()))
except FileNotFoundError:
return JobStatus(exists=False, result=None)
def write_cached_job_status(job_id: str, result: JobResult):
ensure_cache_dir()
with open(f"./{CACHE_DIR}/{job_id}", "w") as file:
file.write(result.json())
def caches_to_file(f):
@wraps(f)
def wrapper(*args, **kwargs):
job_id = get_job_id(f.__name__, args, kwargs)
job_status = read_cached_job_status(job_id)
if job_status.exists:
return job_status.result.value
else:
value = f(*args, **kwargs)
write_cached_job_status(
job_id,
JobResult(
job_id=job_id,
date=datetime.now(),
name=f.__name__,
args=args,
kwargs=kwargs,
value=value,
),
)
return value
return wrapper
import os
import pytest
from cacher import (
JobResult,
caches_to_file,
ensure_cache_dir,
get_job_id,
read_cached_job_status,
write_cached_job_status,
)
def test_get_job_id():
assert get_job_id("test", [], {}) == "test-fca6eb91156619d0d15face3a31ee677"
def test_ensure_cache_dir():
ensure_cache_dir()
assert os.path.isdir(".cache")
def test_write_read_job_status():
job_id = "test"
result = JobResult(
job_id=job_id,
date="2023-10-28T05:00:00",
name="test",
args=[],
kwargs={},
value=None,
)
write_cached_job_status(job_id, result)
job_status = read_cached_job_status(job_id)
assert job_status.exists
assert job_status.result == result
def test_cache_to_file():
@caches_to_file
def func(a, b):
return a + b
assert func(1, 2) == 3
job_status = read_cached_job_status(get_job_id(func.__name__, [1, 2], {}))
assert job_status.exists
assert job_status.result.name == "func"
assert job_status.result.args == (1, 2)
assert job_status.result.value == 3
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment