Skip to content

Instantly share code, notes, and snippets.

@gbougeard
Last active January 10, 2023 21:27
Show Gist options
  • Star 16 You must be signed in to star a gist
  • Fork 3 You must be signed in to fork a gist
  • Save gbougeard/48e190f931653f99aaea668dd03759ef to your computer and use it in GitHub Desktop.
Save gbougeard/48e190f931653f99aaea668dd03759ef to your computer and use it in GitHub Desktop.
#!/bin/bash
set -x
DATE=`date +%Y%m%d_%H%M%S`
OUTPUT_DIR=${DATE}
mkdir ${OUTPUT_DIR}
ROOT_PATH="/Users/gbougeard/REGISTRY/data"
REGISTRY_HOME="docker/registry/v2"
DIR_REPOSITORIES="repositories"
DIR_TAGS="_manifests/tags"
DIR_REVISIONS="_manifests/revisions"
DIR_BLOBS="blobs"
PATH_REGISTRY=${ROOT_PATH}/${REGISTRY_HOME}
PATH_REPOSITORIES=${PATH_REGISTRY}/${DIR_REPOSITORIES}
if [ "${DRY_RUN}" == "true" ]; then
echo "Running in dry-run mode. Will not make any changes"
fi
image=${1}
# if no image, clean all images
if [ ! -z "${image}" ]; then
echo ${image} > ${OUTPUT_DIR}/images2clean
else
ls -1 ${PATH_REPOSITORIES} > ${OUTPUT_DIR}/images2clean
fi
for IMAGE in $(cat ${OUTPUT_DIR}/images2clean); do
PATH_TAGS=${PATH_REPOSITORIES}/${IMAGE}/${DIR_TAGS}
PATH_REVISIONS=${PATH_REPOSITORIES}/${IMAGE}/${DIR_REVISIONS}
PATH_BLOBS=${PATH_REGISTRY}/${DIR_BLOBS}
LOG_FILE="${OUTPUT_DIR}/${IMAGE}.log"
touch ${LOG_FILE}
echo "Cleaning ${IMAGE}" >> ${LOG_FILE}
echo "" >> ${LOG_FILE}
for tag in $(ls -1 ${PATH_TAGS}); do
echo "Tag ${tag}" >> ${LOG_FILE}
image_hash=$(cat ${PATH_TAGS}/${tag}/current/link | sed 's|sha256:||')
echo "Current hash is ${image_hash}" >> ${LOG_FILE}
index_hashes=$(ls -1 ${PATH_TAGS}/${tag}/index/sha256 | grep -v ${image_hash})
# If there are more than on file in ${PATH_TAGS}/${tag}/index/sha256
# that means there are outdated digests and they are the ones we want to delete
if [ -z "${index_hashes}" ];then
echo "No hash to clean" >> ${LOG_FILE}
echo "==============================" >> ${LOG_FILE}
continue
fi
echo "-----------------------" >> ${LOG_FILE}
nb_hash_to_delete=$(echo ${index_hashes} | wc -w)
echo "There are ${nb_hash_to_delete} hashes to delete" >> ${LOG_FILE}
for hash in ${index_hashes}; do
echo "Deleting index hash ${PATH_TAGS}/${tag}/index/sha256/${hash}" >> ${LOG_FILE}
if [ ${DRY_RUN} ]; then
echo "Would have run : rm -rf ${PATH_TAGS}/${tag}/index/sha256/${hash}" >> ${LOG_FILE}
else
rm -rf ${PATH_TAGS}/${tag}/index/sha256/${hash}
fi
echo "Deleting revision hash ${PATH_REVISIONS}/sha256/${hash}" >> ${LOG_FILE}
if [ ${DRY_RUN} ]; then
echo "Would have run : rm -rf ${PATH_REVISIONS}/sha256/${hash}" >> ${LOG_FILE}
else
rm -rf ${PATH_REVISIONS}/sha256/${hash}
fi
# Estimate blobs to delete
uniq_digest=$(jq -r '.config.digest' "${PATH_BLOBS}/sha256/${hash:0:2}/$hash/data" | sed 's|sha256:||')
echo ${uniq_digest} >> ${OUTPUT_DIR}/${IMAGE}-blob2delete
echo "${uniq_digest} unique digest found" >> ${LOG_FILE}
layers=$(jq -r '.layers[].digest' "${PATH_BLOBS}/sha256/${hash:0:2}/$hash/data" | sed 's|sha256:||')
for layer in ${layers}; do
echo ${layer} >> ${OUTPUT_DIR}/${IMAGE}-blob2delete
done
echo "${layers} layers found" >> ${LOG_FILE}
done
echo "==============================" >> ${LOG_FILE}
done
echo "" >> ${LOG_FILE}
# Estimate freeed storage size
sort ${OUTPUT_DIR}/${IMAGE}-blob2delete | uniq > ${OUTPUT_DIR}/${IMAGE}-blob2delete.sort
for hash in $(cat ${OUTPUT_DIR}/${IMAGE}-blob2delete.sort); do
echo "${PATH_BLOBS}/sha256/${hash:0:2}/$hash" >> ${OUTPUT_DIR}/${IMAGE}-path2delete
done
if [ -f ${OUTPUT_DIR}/${IMAGE}-path2delete ];then
echo "$(cat ${OUTPUT_DIR}/${IMAGE}-path2delete | wc -l) blobs to delete" >> ${LOG_FILE}
estimated_size=$(du -hc $(cat ${OUTPUT_DIR}/${IMAGE}-path2delete) | tail -n1 | cut -f1)
echo "${estimated_size} estimated" >> ${LOG_FILE}
echo "${estimated_size} ${IMAGE}" >> ${OUTPUT_DIR}/total
else
echo "No blobs to delete" >> ${LOG_FILE}
continue
fi
done
@morrizon
Copy link

Hi gbougeard,

thanks for the script. I did a similar one based in your version. In my case I use it only to find the outdated digests. Also, I can find nested images. Maybe you or someone find it useful:

#!/bin/bash

function help() {
  cat << EOF
Usage: $(basename $0) [OPTION] REGISTRY_PATH
Find the images with outdated digests in the docker registry stored in ROOT_PATH

Available options:
  -h Display help
  -c Show the command to be run to remove the outdated digests.

Warning, be sure that you know what you are doing before remove anything. Check https://gbougeard.github.io/blog.english/2017/05/20/How-to-clean-a-docker-registry-v2.html
EOF
}

function get_repositories() {
    local registry_path=$1
    find $registry_path/repositories -type d -name _manifests
}

function get_images() {
    local repository=$1
    for tag in $(ls $repository/tags);do
        echo $repository/tags/$tag
    done
}

function get_indexes() {
    local image=$1
    ls -1 $image/index/sha256
}

function get_sha() {
    local image=$1
    cat $image/current/link | sed 's/sha256://'
}

function get_outdated_indexes() {
    local image=$1
    get_indexes $image | grep -v $(get_sha $image)
}

function number_outdated_indexes() {
    local image=$1
    get_outdated_indexes $image|wc -l
}

function has_outdated_digests() {
    local image=$1
    [ "$(number_outdated_indexes $image)" != "0" ]
}


show_commands=false
while getopts ':hc' option;do
    case "$option" in
        h) help
           exit
           ;;
        c) show_commands=true
    esac
done
shift $((OPTIND-1))

if [ "$#" -ne 1 ];then
    >&2 echo "Invalid number of parameters!"
    help
    exit 1
fi

registry_path="$1/docker/registry/v2"
if [ ! -d $registry_path ];then
    >&2 echo "Invalid registry path!"
    help
    exit 1
fi

for repository in $(get_repositories $registry_path);do
    for image in $(get_images $repository);do
        if has_outdated_digests $image ;then
            if [ "$show_commands" = true ]; then
                for hash in $(get_outdated_indexes $image);do
                    echo "rm -rf $image/index/sha256/$hash $repository/revisions/sha256/$hash"
                done
            else
                echo "There are $(number_outdated_indexes $image) outdated index for $image"
            fi
        fi
    done
done

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment