Last active
May 18, 2017 13:19
-
-
Save miminar/4a5f85c17691cc1801cfde1cd851e819 to your computer and use it in GitHub Desktop.
A script to remove unreferenced blobs from registry storage
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
set -euo pipefail | |
IFS=$'\n\t' | |
readonly AUDITOR_ROLE='system:image-auditor' | |
readonly DEFAULT_DAYS=3 | |
readonly DEFAULT_STORAGE_PATH_ROOT="/registry" | |
readonly DEFAULT_CONFIG_PATH="/config.yml" | |
readonly USAGE="Usage: $(basename ${BASH_SOURCE[0]}) [OPTIONS] | |
It lists blobs on the registry storage and compares them against a set of blobs | |
known to etcd. It will remove those blobs not referenced in etcd. By default, | |
the script doesn't do any changes, it just lists candidates for the removal. | |
Pass '-c' to perform the actual removal. | |
The script makes several assumptions: | |
- docker-registry uses filesystem driver | |
- docker-registry is deployed in 'default' namespace using a deploymentconfig | |
with the name 'docker-registry' | |
- docker-registry runs in a read-only mode in order to prevent races; | |
or at least no image pushes should occur while this script is being run | |
What will be deleted: | |
- layer blobs and layer links pointing to them | |
- manifest config blobs | |
What will NOT be deleted: | |
- manifest blobs, signature blobs | |
- these are small and their removal will be left to pruning algorihm once | |
reworked | |
See bug https://bugzilla.redhat.com/show_bug.cgi?id=1408676 | |
After the script is run done, registry needs to be restarted in order to update | |
internal caches. | |
Options: | |
-h Print this message and exit. | |
-c Confirm - apply the changes. | |
-y <days> Keep blobs younger than <days> (defaults to ${DEFAULT_DAYS}). | |
To consider all blobs regardless of age, pass -1. | |
-n Don't ask for confirmation if registry is not running in | |
readonly mode. | |
**** MAKE SURE THE REGISTRY RUNS IN READONLY MODE BEFORE RUNNING THIS ****** | |
Add following lines to your docker-registry config: | |
storage: | |
maintenance: | |
readonly: | |
enabled: true | |
" | |
# maps blob's sha to a comma-separated list of paths relative to docker/registry/v2/repositories | |
# an entry may look like: | |
# blobs[<sha>]='_layers/<sha>' | |
declare -gA blobs | |
# contains blobs that should not be deleted | |
declare -gA blobstokeep | |
days="${DEFAULT_DAYS}" | |
dry_run=1 | |
dont_ask=0 | |
while getopts 'hy:cn' opt; do | |
case "${opt}" in | |
h) | |
echo "${USAGE}" | |
exit 0 | |
;; | |
y) | |
days="${OPTARG}" | |
;; | |
n) | |
dont_ask=1 | |
;; | |
c) | |
dry_run=0 | |
;; | |
*) | |
echo "Unrecognized option! See help." >&2 | |
exit 1 | |
;; | |
esac | |
done | |
function append_sha_path() { | |
local prefix="$1" | |
local keep="$2" | |
local path="$(echo "$3" | sed -e 's,^\./\+,,' -e 's/[^a-fA-F0-9]\+$//')" | |
[[ "${path}" ]] || return 0 | |
local sha="${path##*/}" | |
local val="${blobs[$sha]:-}" | |
path="${prefix}/${path}" | |
if [[ "${keep}" == 1 ]]; then | |
blobstokeep[$sha]=1 | |
else | |
if [[ "${val}" ]]; then | |
blobs[$sha]="${val},${path}" | |
else | |
blobs[$sha]="${path}" | |
fi | |
fi | |
} | |
function load_blobs_from_storage() { | |
local pod="$1" storage_path="$2" | |
local blob sha | |
declare -a bloblist layerlist | |
local pth="${storage_path}/blobs/sha256" | |
if ! oc exec -n default "${pod}" -- /bin/test -d "${pth}"; then | |
return 0 | |
fi | |
readarray -t bloblist <<<"$(oc exec -n default "${pod}" -- /bin/sh -c \ | |
"cd "${pth}"; find -maxdepth 2 -mindepth 2 -type d -regex '.*/../[a-f0-9]+$' -mtime +${days}")" | |
[[ "${bloblist[@]}" ]] || return 0 | |
for blob in "${bloblist[@]:-}"; do | |
append_sha_path "blobs/sha256" 0 "${blob}" | |
done | |
local pth="${storage_path}/repositories" | |
local findcmd='find -maxdepth 9 -mindepth 5 -regextype egrep -type d \(' | |
findcmd+=" \( -regex '.*/[^/]+/[^/]+/_layers/sha256/[a-f0-9]+$' -mtime +${days} \)" | |
findcmd+=" -o -regex '.*/[^/]+/[^/]+/_manifests/revisions(|/sha256/[a-f0-9]+/signatures)/sha256/[a-f0-9]+$' \)" | |
readarray -t layerlist <<<"$(oc exec -n default "${pod}" -- /bin/sh -c "cd '${pth}'; ${findcmd} 2>/dev/null")" | |
for blob in "${layerlist[@]:-}"; do | |
keep=1 | |
[[ "${blob}" =~ /_layers/ ]] && keep=0 | |
append_sha_path "repositories" ${keep} "${blob}" | |
done | |
} | |
function check_permissions() { | |
local authorized=1 | |
local verb | |
for verb in get list; do | |
if [[ "$(oc policy can-i "${verb}" images)" != yes ]]; then | |
echo "The user isn't authorized to ${verb} images!" >&2 | |
authorized=0 | |
fi | |
done | |
if [[ "${authorized}" == 0 ]]; then | |
echo "Ask your admin to give you permissions to work with images, e.g.:" >&2 | |
echo " oadm policy add-cluster-role-to-user ${AUDITOR_ROLE} $(oc whoami)" >&2 | |
return 1 | |
fi | |
if [[ "$(oc policy can-i -n default create pods/exec)" != yes ]]; then | |
echo "The user isn't authorized to exec into pods in the default namespace!" >&2 | |
echo "Ask your admin to give you admin role in the default namespace, e.g.:" >&2 | |
echo " oadm policy add-role-to-user -n default edit $(oc whoami)" >&2 | |
return 1 | |
fi | |
} | |
function unindent() { | |
local indentation | |
while read -r l; do | |
if [[ -z "${indentation:-}" ]]; then | |
[[ "${l}" =~ ^([[:space:]]+)(.*) ]] && indentation="${BASH_REMATCH[1]}" | |
echo "${BASH_REMATCH[2]}" | |
continue | |
fi | |
echo "${l#${indentation}}" | |
done | |
} | |
function check_registry() { | |
local pod="$1" | |
local dry_run="$2" | |
local dont_ask="$3" | |
if ! oc exec -n default "${pod}" -- '/bin/true'; then | |
echo "Failed to exec into pod '${pod}'!" >&2 | |
return 1 | |
fi | |
local config_path="$(oc exec -n default "${pod}" -- /bin/sh -c \ | |
'env | grep REGISTRY_CONFIGURATION_PATH' | sed 's/[^[:alpha:]]\+$//')" | |
config_path="${config_path:-$DEFAULT_CONFIG_PATH}" | |
local registry_readonly=0 | |
if ! oc exec -n default "${pod}" -- /bin/cat /config.yml | \ | |
sed -n -e '/^storage:/,/^[^[:space:]#]/p' | grep -q '^\s\+filesystem:'; | |
then | |
( | |
echo "The registry must be configured to use filesystem driver in order to run this script!" | |
echo "Please refer to documentation at" | |
echo " https://docs.openshift.org/latest/install_config/registry/extended_registry_configuration.html#docker-registry-configuration-reference-storage" | |
echo "to configure it." | |
) >&2 | |
return 1 | |
fi | |
if oc exec -n default "${pod}" -- /bin/cat /config.yml | \ | |
sed -n '/^storage:/,/^[^[:space:]#]/s/^\s//p' | unindent | \ | |
sed -n '/^maintenance:/,/^[^[:space:]#]/s/^\s//p' | unindent | \ | |
sed -n '/^readonly:/,/^[^[:space:]#]/s/^\s//p' | grep -q '\<enabled:\s*true'; then | |
registry_readonly=1 | |
fi | |
local enabled="$(oc exec -n default "${pod}" -- env | \ | |
sed -n 's/^REGISTRY_STORAGE_MAINTENANCE_READONLY_ENABLED=\([[:alpha:]]\+\).*/\1/p' | \ | |
tr '[[:upper:]]' '[[:lower]]')" | |
case "${enabled:-}" in | |
true) | |
registry_readonly=1 | |
;; | |
false) | |
registry_readonly=0 | |
;; | |
esac | |
case "${dry_run}${dont_ask}${registry_readonly}" in | |
000) | |
local answer | |
echo -n "Registry doesn't seem to be in a read-only mode, do you want to continue? [yes/no] " | |
read answer | |
case "${answer,,}" in | |
y|yes) | |
return 0 | |
;; | |
n|no) | |
exit 0 | |
;; | |
*) | |
return 1 | |
;; | |
esac | |
;; | |
??0) | |
echo "Warning: The registry doesn't seem to be in a read-only mode!" | |
;; | |
esac | |
} | |
function get_registry_pod() { | |
oc get pods -n default \ | |
-l deploymentconfig=docker-registry \ | |
-o jsonpath=$'{range .items[*]}{.metadata.name}\n{end}' | head -n 1 | |
} | |
function get_registry_storage_path() { | |
local pod="$1" | |
local root="${DEFAULT_STORAGE_PATH_ROOT}" | |
local configroot="$(oc exec -n default "${pod}" -- /bin/cat /config.yml | \ | |
sed -n '/^storage:/,/^[^[:space:]#]/s/^\s//p' | unindent | \ | |
sed -n '/^filesystem:/,/^[^[:space:]#]/s/^\s//p' | \ | |
sed -n -e 's/\s*rootdirectory:\s*\(.*\)/\1/p' | \ | |
sed 's/[^[:alpha:]]\+$//')" | |
if [[ -n "${configroot:-}" ]]; then | |
root="${configroot}" | |
fi | |
local envroot="$(oc exec -n default "${pod}" -- /bin/env | \ | |
sed -n 's/^REGISTRY_STORAGE_FILESYSTEM_ROOTDIRECTORY=\(.*\)/\1/p' | \ | |
sed 's/[^[:space:]]\+$//')" | |
if [[ -n "${envroot:-}" ]]; then | |
root="${envroot}" | |
fi | |
echo "${root}/docker/registry/v2" | |
} | |
function load_blobs_from_etcd() { | |
local depstmpl=$'{{range $img := .items}}{{$img.metadata.name}}\n' | |
depstmpl+=$'{{if and $img.dockerImageMetadata.Id (eq $img.dockerImageManifestMediaType' | |
depstmpl+=$' "application/vnd.docker.distribution.manifest.v2+json")}}' | |
# dockerImageMetadata.Id is a sha of manifest config which is a blob dependency of the manifest | |
depstmpl+=$'{{$img.dockerImageMetadata.Id}}\n{{end}}' | |
depstmpl+=$'{{range $l := $img.dockerImageLayers}}{{$l.name}}\n{{end}}{{end}}' | |
declare -a deps | |
local dep | |
readarray -t deps <<<"$(oc get -o go-template="${depstmpl}" images)" | |
for dep in "${deps[@]}"; do | |
blobstokeep["${dep##sha*:}"]=1 | |
done | |
} | |
function paths_to_remove() { | |
local blob | |
for blob in "${!blobs[@]}"; do | |
[[ "${blobstokeep[$blob]:-}" == 1 ]] && continue | |
echo "${blobs[$blob]:-}" | tr ',' $'\n' | |
done | sort -u | |
} | |
function delete_blobs() { | |
local pod="$1" dry_run="$2" storage_path="$3" | |
local counter=0 | |
if [[ "${dry_run}" = 1 ]]; then | |
while read l; do | |
counter=$(($counter+1)) | |
echo "Would remove ${l}" | |
done | |
else | |
while read -r -u 3 line; do | |
counter=$(($counter+1)) | |
echo "${line}" | |
done 3< <(oc exec -i -n default "${pod}" -- /bin/sh -c \ | |
"cd '${storage_path}'; xargs -r rm -rvf") | |
if [[ "${counter}" -gt 0 ]]; then | |
echo -en "\nThe registry is pruned, make sure to restart the " | |
echo "registry before the subsequent use. E.g.:" | |
echo " oc scale --replicas=0 dc/docker-registry && oc scale --replicas=1 dc/docker-registry" | |
fi | |
fi < <(paths_to_remove) | |
if [[ "${counter}" == 0 ]]; then | |
echo "No unreferenced blobs found." >&2 | |
fi | |
} | |
check_permissions || exit 1 | |
pod="$(get_registry_pod)" | |
if [[ -z "${pod:-}" ]]; then | |
echo "Failed to get registry pod!" >&2 | |
exit 1 | |
fi | |
storage_path="$(get_registry_storage_path "${pod}")" | |
check_registry "${pod}" "${dry_run}" "${dont_ask}" || exit 1 | |
load_blobs_from_storage "${pod}" "${storage_path}" | |
if [[ -z "${blobs[@]:-}" ]]; then | |
echo "No blobs found on registry storage." | |
exit 0 | |
fi | |
load_blobs_from_etcd | |
delete_blobs "${pod}" "${dry_run}" "${storage_path}" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment