-
-
Save akorn/51ee2fe7d36fa139723c851d87e56096 to your computer and use it in GitHub Desktop.
#!/bin/zsh | |
# | |
# Purpose: run specified command with specified arguments and cache result. If cache is fresh enough, don't run command again but return cached output. | |
# Also cache exit status and stderr. | |
# Copyright (c) 2019-2023 András Korn; License: GPLv3 | |
# Use silly long variable names to avoid clashing with whatever the invoked program might use | |
RUNCACHED_MAX_AGE=${RUNCACHED_MAX_AGE:-300} | |
RUNCACHED_IGNORE_ENV=${RUNCACHED_IGNORE_ENV:-0} | |
RUNCACHED_IGNORE_PWD=${RUNCACHED_IGNORE_PWD:-0} | |
[[ -n "$HOME" ]] && RUNCACHED_CACHE_DIR=${RUNCACHED_CACHE_DIR:-$HOME/.runcached} | |
RUNCACHED_CACHE_DIR=${RUNCACHED_CACHE_DIR:-/var/cache/runcached} | |
function usage() { | |
echo "Usage: runcached [--ttl <max cache age>] [--cache-dir <cache directory>]" | |
echo " [--ignore-env] [--ignore-pwd] [--help] [--prune-cache]" | |
echo " [--] command [arg1 [arg2 ...]]" | |
echo | |
echo "Run 'command' with the specified args and cache stdout, stderr and exit" | |
echo "status. If you run the same command again and the cache is fresh, cached" | |
echo "data is returned and the command is not actually run." | |
echo | |
echo "Normally, all exported environment variables as well as the current working" | |
echo "directory are included in the cache key. The --ignore options disable this." | |
echo "The OLDPWD variable is always ignored." | |
echo | |
echo "--prune-cache deletes all cache entries older than the maximum age. There is" | |
echo "no other mechanism to prevent the cache growing without bounds." | |
echo | |
echo "The default cache directory is ${RUNCACHED_CACHE_DIR}." | |
echo "Maximum cache age defaults to ${RUNCACHED_MAX_AGE}." | |
echo | |
echo "CAVEATS:" | |
echo | |
echo "Side effects of 'command' are obviously not cached." | |
echo | |
echo "There is no cache invalidation logic except cache age (specified in seconds)." | |
echo | |
echo "If the cache can't be created, the command is run uncached." | |
echo | |
echo "This script is always silent; any output comes from the invoked command. You" | |
echo "may thus not notice errors creating the cache and such." | |
echo | |
echo "stdout and stderr streams are saved separately. When both are written to a" | |
echo "terminal from cache, they will almost certainly be interleaved differently" | |
echo "than originally. Ordering of messages within the two streams is preserved." | |
echo | |
echo "It's not safe to prune the cache while another instance of runcached is" | |
echo "using it AND it's possible for jobs to run longer than" | |
echo "$RUNCACHED_MAX_AGE seconds." | |
exit 0 | |
} | |
function use_cache_if_cache_fresh() { # Must be called with a lock on $RUNCACHED_CACHE_DIR/$RUNCACHED_CACHE_KEY.lock held. Doesn't return if the cache is fresh. | |
local RUNCACHED_CACHE_FRESH=1 | |
local i | |
for i in $RUNCACHED_CACHE_DIR/$RUNCACHED_CACHE_KEY.{stdout,stderr,exitstatus}; do | |
if [[ -f $i ]] && [[ $[EPOCHSECONDS-$(zstat +mtime $RUNCACHED_CACHE_DIR/$RUNCACHED_CACHE_KEY.stdout)] -ge $RUNCACHED_MAX_AGE ]]; then | |
# if the cache file exists, but is old, we make a note to ourselves: | |
RUNCACHED_CACHE_FRESH=0 | |
elif ! [[ -f $i ]]; then | |
# if the cache file is missing, we also make a note to ourselves: | |
RUNCACHED_CACHE_FRESH=0 | |
fi | |
done | |
# Since we hold a lock, no other instance is able to obtain a write | |
# lock and remove or overwrite our cache files; if the files were | |
# there before, they are still there (except if the cache is being | |
# pruned, but that's future work) | |
if ((RUNCACHED_CACHE_FRESH)); then | |
cat $RUNCACHED_CACHE_DIR/$RUNCACHED_CACHE_KEY.stdout & | |
cat $RUNCACHED_CACHE_DIR/$RUNCACHED_CACHE_KEY.stderr >&2 & | |
wait | |
exit $(<$RUNCACHED_CACHE_DIR/$RUNCACHED_CACHE_KEY.exitstatus) | |
fi | |
} | |
while [[ -n "$1" ]]; do | |
case "$1" in | |
--ttl) RUNCACHED_MAX_AGE="$2"; shift 2;; | |
--cache-dir) RUNCACHED_CACHE_DIR="$2"; shift 2;; | |
--ignore-env) RUNCACHED_IGNORE_ENV=1; shift;; | |
--ignore-pwd) RUNCACHED_IGNORE_PWD=1; shift;; | |
--prune-cache) RUNCACHED_PRUNE=1; shift;; | |
--help) usage;; | |
--) shift; break;; | |
*) break;; | |
esac | |
done | |
zmodload zsh/datetime | |
zmodload zsh/stat | |
zmodload zsh/system | |
zmodload zsh/files | |
# the built-in mv doesn't fall back to copy if renaming fails due to EXDEV; | |
# since the cache directory is likely on a different fs than the tmp | |
# directory, this is an important limitation, so we use /bin/mv instead | |
disable mv | |
mkdir -p "$RUNCACHED_CACHE_DIR" >/dev/null 2>/dev/null | |
# TODO: it would be possible to make pruning safe for concurrent execution | |
# by not relying on find -delete; instead, we could iterate over the cache | |
# contents, obtain write locks on all entries, then remove them if they're | |
# stale. This could then also be started as a background job, and maybe we | |
# could make it so roughly every nth invocation automatically performs a | |
# prune. | |
((RUNCACHED_PRUNE)) && find "$RUNCACHED_CACHE_DIR/." -type f \! -newermt @$[EPOCHSECONDS-RUNCACHED_MAX_AGE] -delete 2>/dev/null | |
[[ -n "$@" ]] || exit 0 # if no command specified, exit silently | |
( | |
# Almost(?) nothing uses OLDPWD, but taking it into account potentially reduces cache efficency. | |
# Thus, we ignore it for the purpose of coming up with a cache key. | |
unset OLDPWD | |
((RUNCACHED_IGNORE_PWD)) && unset PWD | |
((RUNCACHED_IGNORE_ENV)) || env | |
echo -E "$@" | |
) | md5sum | read RUNCACHED_CACHE_KEY RUNCACHED__crap__ | |
# make the cache dir hashed unless a cache file already exists (created by a previous version that didn't use hashed dirs) | |
if ! [[ -f $RUNCACHED_CACHE_DIR/$RUNCACHED_CACHE_KEY.exitstatus ]]; then | |
RUNCACHED_CACHE_KEY=$RUNCACHED_CACHE_KEY[1,2]/$RUNCACHED_CACHE_KEY[3,4]/$RUNCACHED_CACHE_KEY[5,$] | |
mkdir -p "$RUNCACHED_CACHE_DIR/${RUNCACHED_CACHE_KEY:h}" >/dev/null 2>/dev/null | |
fi | |
# If we can't obtain a lock, we want to run uncached; otherwise | |
# 'runcached' wouldn't be transparent because it would prevent | |
# parallel execution of several instances of the same command. | |
# Locking is necessary to avoid races between the mv(1) command | |
# below replacing stderr with a newer version and another instance | |
# of runcached using a newer stdout with the older stderr. | |
: >>$RUNCACHED_CACHE_DIR/$RUNCACHED_CACHE_KEY.lock 2>/dev/null | |
# We don't need an exclusive lock if the cache is fresh; multiple instances | |
# can read it simultaneously. | |
if zsystem flock -t 0 -r $RUNCACHED_CACHE_DIR/$RUNCACHED_CACHE_KEY.lock 2>/dev/null; then # Get a read lock, if possible. | |
use_cache_if_cache_fresh | |
fi | |
# Either we couldn't obtain a read lock (which means another instance had a write lock), or the cache wasn't fresh. | |
if zsystem flock -t 0 $RUNCACHED_CACHE_DIR/$RUNCACHED_CACHE_KEY.lock 2>/dev/null; then # Get a write lock, if possible. | |
# If we couldn't get a read lock before but can get a write lock | |
# now, the instance that held the write lock earlier may have | |
# exited, so it's possible we now have a fresh cache; if so, let's | |
# use it. | |
use_cache_if_cache_fresh | |
# If the function returned, the cache wasn't fresh; remove any stale cache files: | |
rm -f $RUNCACHED_CACHE_DIR/$RUNCACHED_CACHE_KEY.{stdout,stderr,exitstatus} 2>/dev/null | |
# run the command and update the cache: | |
if [[ -d $RUNCACHED_CACHE_DIR/. ]]; then | |
RUNCACHED_tempdir=$(mktemp -d 2>/dev/null) | |
if [[ -d $RUNCACHED_tempdir/. ]]; then | |
$@ >&1 >$RUNCACHED_tempdir/${RUNCACHED_CACHE_KEY:t}.stdout 2>&2 2>$RUNCACHED_tempdir/${RUNCACHED_CACHE_KEY:t}.stderr | |
RUNCACHED_ret=$? | |
echo $RUNCACHED_ret >$RUNCACHED_tempdir/${RUNCACHED_CACHE_KEY:t}.exitstatus 2>/dev/null | |
mv $RUNCACHED_tempdir/${RUNCACHED_CACHE_KEY:t}.{stdout,stderr,exitstatus} $RUNCACHED_CACHE_DIR/${RUNCACHED_CACHE_KEY:h} 2>/dev/null | |
rmdir $RUNCACHED_tempdir 2>/dev/null | |
exit $RUNCACHED_ret | |
fi | |
fi | |
fi | |
# only reached if cache not created successfully or lock couldn't be obtained | |
exec $@ |
Just want to say I still use and rely on this daily. It's always been rock solid. Thank you @akorn!
I use it so much I made my own fork of this that defines runcachedFunc
as a shell function in one of my rc files. I've found that this is very slightly faster because it doesn't have to start a new zsh process to run. But mainly I did this so I can use runcache
to cache the output of other shell functions.
I also added support for VERBOSE=1
, and when it encounters that env it outputs logging information to stderr which lets me know if the function is returning from cache, if a cache exists but is expired, or if no cache exists at all. It's been helpful for debugging. To make this work nicely I also had to remove VERBOSE
from the env used for calculating the cache key like you do for OLDPWD
. Unfortunately these verbose logging get included in the cache though which is pretty janky and confusing.
I also added support for sub-second cache TTLs just by using zstat +mtime -F "%s.%N"
. Though that's only supported it recent-ish zsh versions.
If there's others out there that would find any of these features useful let me know and I can try to clean this up and share it. But for rock solid quality I can't vouch enough for this as it already is. Cheers!
@varenc Could you by any chance share the runcachedFunc you mentioned? I'm thinking of integrating it in my dotfiles and would love it if I don't have to reinvent the wheel :)
@varenc Could you by any chance share the runcachedFunc you mentioned? I'm thinking of integrating it in my dotfiles and would love it if I don't have to reinvent the wheel :)
I don't immediately see a reason why just putting function runcachedFunc() {
at the top and }
at the bottom shouldn't work, possibly after minor fiddling.
@varenc: I just uploaded a version that I believe fixes these problems (pruning and parallelism). Take a look if you're still interested.