Skip to content

Instantly share code, notes, and snippets.

@ndbroadbent
Last active September 2, 2020 12:15
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ndbroadbent/d394f8a6890eddcaeafe9223e8b50be5 to your computer and use it in GitHub Desktop.
Save ndbroadbent/d394f8a6890eddcaeafe9223e8b50be5 to your computer and use it in GitHub Desktop.
A powerful CI caching tool for Google Cloud Storage
#!/bin/bash
set -e
# CI Cache Script for Google Cloud Storage
# TIP: Set CI_CACHE_VERBOSE=true while testing the script
# to show a list of all files that are compressed/extracted.
# Note that you might see "tar: write error" if there are too many
# files in the verbose output.
# Examples
# ----------------------------------------------------------------
#
# - A simple CI job that always downloads the cache (if present),
# runs bundle install, and then uploads the new cache.
# CI_COMMIT_REF_NAME is the current git branch.
# (This is the same behavior as the GitLab CI cache.)
#
# CACHE_KEY="gems-$CI_COMMIT_REF_NAME"
# ci_cache download $CACHE_KEY
# bundle install --path vendor/bundle
# ci_cache upload $CACHE_KEY vendor/bundle .bundle/config
#
#
# - A CI job that only runs bundle install and uploads the new cache
# if there is a change in Gemfile or Gemfile.lock.
# CI_COMMIT_REF_NAME is the current git branch.
# (This uses an isolated cache for each branch. So CI will need to run
# bundle install from scratch each time you push a new branch. Look at the
# next example to see how this can be solved with a "fallback" cache key.)
#
# CACHE_KEY="gems-$CI_COMMIT_REF_NAME"
# BUNDLER_SOURCE_FILES="Gemfile Gemfile.lock"
#
# ci_cache download $CACHE_KEY
# if ! ci_cache check_hash $CACHE_KEY $BUNDLER_SOURCE_FILES; then
# bundle install --path vendor/bundle
# ci_cache upload $CACHE_KEY vendor/bundle .bundle/config
# ci_cache update_hash $CACHE_KEY $BUNDLER_SOURCE_FILES
# fi
#
#
# - A CI job that only runs bundle install and uploads the new cache
# if there is a change in Gemfile or Gemfile.lock.
# If a cache doesn't already exist for the current git branch,
# then fall back to downloading the cache for the master branch.
#
# CACHE_KEY="gems-$CI_COMMIT_REF_NAME"
# FALLBACK_CACHE_KEY="gems-master"
# BUNDLER_SOURCE_FILES="Gemfile Gemfile.lock"
#
# ci_cache download $CACHE_KEY $FALLBACK_CACHE_KEY
# if ! ci_cache check_hash_with_fallback \
# $CACHE_KEY $FALLBACK_CACHE_KEY $BUNDLER_SOURCE_FILES; then
# bundle install --path vendor/bundle
# ci_cache upload $CACHE_KEY vendor/bundle .bundle/config
# ci_cache update_hash $CACHE_KEY $BUNDLER_SOURCE_FILES
# fi
if ! which gcloud > /dev/null 2>&1 || ! which gsutil > /dev/null 2>&1 ; then
echo "Could not find gcloud and/or gsutil." >&2
echo "Please install the Google Cloud SDK: https://cloud.google.com/sdk/docs" >&2
exit 1
fi
if [ -z "$CI_CACHE_GCS_BUCKET" ]; then
echo "CI_CACHE_GCS_BUCKET environment variable is required!" >&2
exit 1
fi
if [ -z "$CI_CACHE_GCS_AUTH" ]; then
echo "CI_CACHE_GCS_AUTH environment variable is required!" >&2
exit 1
fi
COMMAND="$1"
# Argument Validation
# --------------------------------------------------
case "$COMMAND" in
# The following commands support a cache key as the first argument.
remote_hash|check_hash|check_hash_with_fallback|update_hash|update_hash_manual|upload|download)
CACHE_KEY="$2"
if [ -z "$CACHE_KEY" ]; then
echo "The '$COMMAND' command requires a <cache key> argument." >&2
exit 1
fi
CACHE_KEY_REGEX="^[0-9A-Za-z_-]+$"
if ! [[ $CACHE_KEY =~ $CACHE_KEY_REGEX ]]; then
echo "Cache key '$CACHE_KEY' does not match: $CACHE_KEY_REGEX" >&2
exit 1
fi
case "$COMMAND" in
# The following commands support a fallback cache key as the second argument.
remote_hash|check_hash_with_fallback|download)
FALLBACK_CACHE_KEY="$3"
if [ -n "$FALLBACK_CACHE_KEY" ]; then
if ! [[ $FALLBACK_CACHE_KEY =~ $CACHE_KEY_REGEX ]]; then
echo "Fallback cache key '$FALLBACK_CACHE_KEY' \
does not match: $CACHE_KEY_REGEX" >&2
exit 1
fi
# Always skip the fallback if it's the same as the first cache key.
if [ "$CACHE_KEY" = "$FALLBACK_CACHE_KEY" ]; then
unset FALLBACK_CACHE_KEY
fi
elif [ "$COMMAND" = "check_hash_with_fallback" ]; then
echo "Fallback cache key is required for check_hash_with_fallback" >&2
exit 1
fi
esac
esac
# Shared Functions
# --------------------------------------------------
function calculate_sha256_hash() {
if [ -z "$1" ]; then
echo "Please provide at least one source file!" >&2
exit 1
fi
FILE_HASHES=()
for SOURCE_FILE in "$@"; do
if ! [ -f "$SOURCE_FILE" ]; then
echo "$SOURCE_FILE does not exist!" >&2
return 1
fi
FILE_HASHES+=($(sha256sum "$SOURCE_FILE" | cut -f1))
done
echo "${FILE_HASHES[@]}" | sha256sum | cut -d" " -f 1
}
function fetch_remote_sha256_hash() {
GCS_HASH_PATH=$(mktemp /tmp/cache-$1.XXXXXXXX)
GCS_HASH_KEY="$1.hash"
GCS_LOCATION="gs://$CI_CACHE_GCS_BUCKET/$GCS_HASH_KEY"
gsutil cp "$GCS_LOCATION" "$GCS_HASH_PATH" || true
if ! [ -f "$GCS_HASH_PATH" ]; then return; fi
cat "$GCS_HASH_PATH"
rm "$GCS_HASH_PATH"
}
# Commands
# --------------------------------------------------
case "$COMMAND" in
authenticate)
if [ -z "$CI_CACHE_GCS_AUTH" ]; then
echo "CI_CACHE_GCS_AUTH environment variable is required!" >&2
exit 1
fi
GCS_AUTH_BASE64_PATH=$(mktemp /tmp/gcs_auth.json.base64.XXXXXXXX)
GCS_AUTH_PATH=$(mktemp /tmp/gcs_auth.json.XXXXXXXX)
echo "$CI_CACHE_GCS_AUTH" > "$GCS_AUTH_BASE64_PATH"
openssl base64 -d -A -in "$GCS_AUTH_BASE64_PATH" -out "$GCS_AUTH_PATH"
gcloud auth activate-service-account --key-file "$GCS_AUTH_PATH"
rm "$GCS_AUTH_BASE64_PATH" "$GCS_AUTH_PATH"
;;
local_hash)
calculate_sha256_hash "${@:2}"
;;
remote_hash)
REMOTE_HASH=$(fetch_remote_sha256_hash "$CACHE_KEY")
if [ -n "$REMOTE_HASH" ]; then
echo "$REMOTE_HASH"
exit
fi
if [ -n "$FALLBACK_CACHE_KEY" ]; then
fetch_remote_sha256_hash "$FALLBACK_CACHE_KEY"
fi
;;
check_hash|check_hash_with_fallback)
if [ "$COMMAND" = "check_hash" ]; then
LOCAL_HASH=$(calculate_sha256_hash "${@:3}")
else
LOCAL_HASH=$(calculate_sha256_hash "${@:4}")
fi
REMOTE_HASH=$(fetch_remote_sha256_hash "$CACHE_KEY")
if [ -z "$REMOTE_HASH" ]; then
echo "Could not find remote hash for cache key: $CACHE_KEY" >&2
# Note: FALLBACK_CACHE_KEY can be unset if it's the same as CACHE_KEY.
if [ "$COMMAND" = "check_hash_with_fallback" ] && [ -n "$FALLBACK_CACHE_KEY" ]; then
REMOTE_HASH=$(fetch_remote_sha256_hash "$FALLBACK_CACHE_KEY")
IS_FALLBACK_KEY=true
if [ -z "$REMOTE_HASH" ]; then
echo "Could not find remote hash for fallback cache key: $FALLBACK_CACHE_KEY" >&2
fi
fi
fi
if [ -z "$REMOTE_HASH" ]; then exit 1; fi
if [ "$REMOTE_HASH" = "$LOCAL_HASH" ]; then
if [ -n "$IS_FALLBACK_KEY" ]; then
echo "Hash has not changed for the $FALLBACK_CACHE_KEY fallback cache \
($LOCAL_HASH)" >&2
else
echo "Hash has not changed for the $CACHE_KEY cache ($LOCAL_HASH)" >&2
fi
exit
fi
if [ -n "$IS_FALLBACK_KEY" ]; then
echo "Hash changed for the $FALLBACK_CACHE_KEY fallback cache!" >&2
else
echo "Hash changed for the $CACHE_KEY cache!" >&2
fi
echo "=> Previous: $REMOTE_HASH" >&2
echo "=> Current: $LOCAL_HASH" >&2
exit 1
;;
update_hash|update_hash_manual)
if [ "$COMMAND" = "update_hash" ]; then
LOCAL_HASH=$(calculate_sha256_hash "${@:3}")
else
LOCAL_HASH="$3"
fi
GCS_HASH_PATH=$(mktemp /tmp/cache-$CACHE_KEY.XXXXXXXX)
echo "$LOCAL_HASH" > "$GCS_HASH_PATH"
GCS_HASH_KEY="$CACHE_KEY.hash"
GCS_LOCATION="gs://$CI_CACHE_GCS_BUCKET/$GCS_HASH_KEY"
echo "Updating hash for $CACHE_KEY cache ($GCS_LOCATION)..."
echo "=> New hash: $LOCAL_HASH"
gsutil cp "$GCS_HASH_PATH" "$GCS_LOCATION"
rm "$GCS_HASH_PATH"
;;
upload)
CACHE_PATH=$(mktemp /tmp/cache-$CACHE_KEY.XXXXXXXX)
echo "Saving files to gzip archive for $CACHE_KEY cache: $CACHE_PATH"
TAR_FLAGS="cz"
if [ -n "$CI_CACHE_VERBOSE" ]; then TAR_FLAGS="${TAR_FLAGS}v"; fi
tar "-$TAR_FLAGS" -f "$CACHE_PATH" "${@:3}"
GCS_KEY="$CACHE_KEY.tar.gz"
GCS_LOCATION="gs://$CI_CACHE_GCS_BUCKET/$GCS_KEY"
echo "Uploading $CACHE_KEY cache to $GCS_LOCATION..."
gsutil cp "$CACHE_PATH" "$GCS_LOCATION"
rm -f "$CACHE_PATH"
;;
download)
CACHE_PATH=$(mktemp /tmp/cache-$CACHE_KEY.XXXXXXXX)
GCS_KEY="$CACHE_KEY.tar.gz"
GCS_LOCATION="gs://$CI_CACHE_GCS_BUCKET/$GCS_KEY"
echo "Downloading $CACHE_KEY cache from $GCS_LOCATION to $CACHE_PATH..." >&2
rm -f "$CACHE_PATH"
unset CACHE_DOWNLOAD_FAILED
gsutil cp "$GCS_LOCATION" "$CACHE_PATH" || true
if ! [ -f "$CACHE_PATH" ]; then
CACHE_DOWNLOAD_FAILED=true
else
CACHE_FILE_SIZE=$(du -k $CACHE_PATH | cut -f1)
if [ $CACHE_FILE_SIZE -eq 0 ]; then
echo "=> $CACHE_PATH is an empty file!" >&2
CACHE_DOWNLOAD_FAILED=true
fi
fi
if [ -n "$CACHE_DOWNLOAD_FAILED" ]; then
echo "Could not download $CACHE_KEY cache!" >&2
if [ -z "$FALLBACK_CACHE_KEY" ]; then exit; fi
FALLBACK_CACHE_PATH=$(mktemp /tmp/cache-$FALLBACK_CACHE_KEY.XXXXXXXX)
FALLBACK_GCS_KEY="$FALLBACK_CACHE_KEY.tar.gz"
FALLBACK_GCS_LOCATION="gs://$CI_CACHE_GCS_BUCKET/$FALLBACK_GCS_KEY"
echo "Downloading fallback $FALLBACK_CACHE_KEY cache from \
$FALLBACK_GCS_LOCATION to $FALLBACK_CACHE_PATH..." >&2
rm -f $FALLBACK_CACHE_PATH
unset CACHE_DOWNLOAD_FAILED
gsutil cp "$FALLBACK_GCS_LOCATION" "$FALLBACK_CACHE_PATH" || true
if ! [ -f "$FALLBACK_CACHE_PATH" ]; then
CACHE_DOWNLOAD_FAILED=true
else
CACHE_FILE_SIZE=$(du -k $FALLBACK_CACHE_PATH | cut -f1)
if [ $CACHE_FILE_SIZE -eq 0 ]; then
echo "=> $FALLBACK_CACHE_PATH is an empty file!" >&2
CACHE_DOWNLOAD_FAILED=true
fi
fi
if [ -n "$CACHE_DOWNLOAD_FAILED" ]; then
echo "Could not download $FALLBACK_CACHE_KEY cache!" >&2
exit
fi
fi
echo "Extracting $CACHE_KEY cache..." >&2
TAR_FLAGS="xz"
if [ -n "$CI_CACHE_VERBOSE" ]; then TAR_FLAGS="${TAR_FLAGS}v"; fi
tar "-$TAR_FLAGS" -f "$CACHE_PATH"
rm "$CACHE_PATH"
echo "success"
;;
*)
cat <<HELP >&2
Usage: $0 [
authenticate |
local_hash | remote_hash | check_hash | check_hash_with_fallback | update_hash
upload | download
]
* authenticate
Sets up Google Cloud authentication for gsutil.
Looks for a base64 encoded JSON key in \$CI_CACHE_GCS_AUTH.
Generate this base64 string by running:
$ openssl base64 -A -in your-gcs-auth-key.json
* local_hash <source files...>
Shows the current SHA256 hash for a list of local source files.
* remote_hash <cache key> [fallback cache key]
Fetches and prints the saved SHA256 hash from the GCS bucket.
If a fallback cache key is specified, this cache key will be downloaded if
the original cache key is missing.
(This is useful for branches and pull requests, if you want to default
to downloading the cache for the master branch.)
* check_hash <cache key> <source files...>
Fetches the remote hash for <cache key>.
=> If no remote hash is found, exits with code 1.
Calculates the local hash for the source files.
=> If the local and remote hashes are different, exits with code 1.
=> Otherwise, if the hashes are the same, exits with code 0.
* check_hash_with_fallback <cache key> <fallback cache key> <source files...>
Fetch the remote hash for <cache key>. If this does not exist,
fetch the remote hash for <fallback cache key>.
=> If no remote hashes are found, exits with code 1.
Calculates the local hash for the source files.
=> If the local and remote hashes are different, exits with code 1.
=> Otherwise, if the hashes are the same, exits with code 0.
NOTE: The fallback hash is only fetched if the first hash does not exist
(but not if a non-matching hash is found.)
* update_hash <cache key> <source files...>
Calculates the SHA256 hash for a list of source files,
then updates the remote hash in GCS.
* update_hash_manual <cache key> <hash>
If you calculate a hash manually, use this set a custom hash in GCS.
For example, I run the following to calculate a hash for all files
in a directory that are tracked by git:
$ git ls-files <directory> | xargs sha256sum | cut -d" " -f1 | \
sha256sum | cut -d" " -f1
* upload <cache key> <cache paths...>
*You must run "$0 authenticate" before running this command.*
Creates a gzipped tar archive for all the cached files, then uploads
the archive to a GCS bucket (\$CI_CACHE_GCS_BUCKET).
The file key in GCS is "<cache key>.tar.gz".
* download <cache key> [fallback cache key]
*You must run "$0 authenticate" before running this command.*
Downloads and extracts the cached files from "<cache key>.tar.gz".
If a fallback cache key is specified, this will be downloaded if
the first cache is missing.
Note: All log messages are sent to stderr. The download command always
exits with code 0, even if no cache is found.
(So a failed cache download will never fail your CI build.)
=> If a cache could be downloaded, the script prints "success" to stdout.
=> If no cache could be downloaded, the script prints no output to stdout.
This means you can do:
DOWNLOAD_RESULT=\$($0 download my-cache)
if [ "\$DOWNLOAD_RESULT" = "success" ]; then
...
fi
HELP
exit 1
;;
esac
# ...
# In a multistage CI build, you don't always need to download
# the cache during the setup stage.
# (You just need to see if the file hashes match.)
stages:
- setup
- test
install_dependencies:
stage: setup
script:
- '(
CACHE_PREFIX="gems";
SOURCE_FILES="Gemfile Gemfile.lock" &&
CACHE_FILES="vendor/bundle .bundle/config" &&
CACHE_KEY="$CACHE_PREFIX-$CI_COMMIT_REF_NAME" &&
FALLBACK_CACHE_KEY="$CACHE_PREFIX-master" &&
if ! ./scripts/ci_cache check_hash_with_fallback
$CACHE_KEY $FALLBACK_CACHE_KEY $SOURCE_FILES; then
./scripts/ci_cache download "$CACHE_KEY" "$FALLBACK_CACHE_KEY" &&
bundle install -j $(nproc)
--path "$BUNDLE_PATH"
--binstubs "$BUNDLE_BIN" &&
./scripts/ci_cache upload "$CACHE_KEY" $CACHE_FILES &&
./scripts/ci_cache update_hash "$CACHE_KEY" $SOURCE_FILES;
fi
)'
# ...
rspec:
before_script:
- '(
CACHE_PREFIX="gems" &&
SOURCE_FILES="Gemfile Gemfile.lock" &&
CACHE_FILES="vendor/bundle .bundle/config" &&
CACHE_KEY="$CACHE_PREFIX-$CI_COMMIT_REF_NAME" &&
FALLBACK_CACHE_KEY="$CACHE_PREFIX-master" &&
DOWNLOAD_RESULT="$(./scripts/ci_cache download "$CACHE_KEY" "$FALLBACK_CACHE_KEY")" &&
if [ "$DOWNLOAD_RESULT" != "success" ] ||
! ./scripts/ci_cache check_hash_with_fallback
$CACHE_KEY $FALLBACK_CACHE_KEY $SOURCE_FILES; then
bundle install -j $(nproc)
--path "$BUNDLE_PATH"
--binstubs "$BUNDLE_BIN" &&
./scripts/ci_cache upload "$CACHE_KEY" $CACHE_FILES &&
./scripts/ci_cache update_hash "$CACHE_KEY" $SOURCE_FILES;
fi
)'
script:
- rspec
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment