Skip to content

Instantly share code, notes, and snippets.

@kwilczynski
Last active March 21, 2021 20:48
Show Gist options
  • Save kwilczynski/e116d730a869da0db890 to your computer and use it in GitHub Desktop.
Save kwilczynski/e116d730a869da0db890 to your computer and use it in GitHub Desktop.
Remove old keys from Salt Master
#!/bin/bash -eu
export PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
readonly LOCK_FILE="/var/lock/$(basename -- "$0").lock"
readonly LOG_FILE='/var/log/salt/salt-key-cleaner'
readonly MASTER_CACHE='/var/cache/salt/master/minions'
readonly DEBUG=${DEBUG-''}
function _notice() {
local MESSAGE="$(basename -- "$0") [$$]: $(date -R) $@"
echo "$MESSAGE" | tee -a $LOG_FILE
}
function _error() {
local MESSAGE="$(basename -- "$0") [$$]: $(date -R) ERROR: $@"
(echo "$MESSAGE" >&2) 2> >(tee -a $LOG_FILE >&2)
}
function _die() {
_error "$@"
exit 1
}
if (( $EUID != 0 )); then
echo 'You must be a super-user to run this script!' >&2
exit 1
fi
[[ -e "/proc/$(cat $LOCK_FILE 2>/dev/null)" ]] || rm -f $LOCK_FILE
_notice 'Starting Salt Master obsolete keys clean-up ...'
if (set -o noclobber; echo $$ > $LOCK_FILE) &>/dev/null; then
# Make sure to clean-up on exit.
trap "rm -f $LOCK_FILE; exit" EXIT
trap "{
rm -f $LOCK_FILE
_die '*** Aborting execution ***'
}" HUP INT QUIT KILL TERM
COUNT=0
START=$(date +%s)
declare -A IDS=()
declare -a CACHES=() KEYS=() UPS=() PINGS=() RUNS=()
_notice 'Checking Salt Master cache ...'
# A list of registered keys.
CACHES+=( $(ls -1 $MASTER_CACHE 2>/dev/null) )
[[ ! -z $DEBUG ]] && declare -p CACHES KEYS UPS PINGS RUNS
_notice 'Gathering a list of currently registered keys ...'
# A list of registered keys.
KEYS+=( $(salt-key -L --output=newline_values_only 2>/dev/null) )
[[ ! -z $DEBUG ]] && declare -p CACHES KEYS UPS PINGS RUNS
if [[ ${#KEYS[@]} == 0 ]]; then
_die 'Unable to collect the list of available keys.'
fi
_notice '(1/3) Refreshing a list of active Salt Minions (manage.up) ...'
UPS+=( $(salt-run manage.up --no-color 2>/dev/null ) )
[[ ! -z $DEBUG ]] && declare -p CACHES KEYS UPS PINGS RUNS
if [[ ${UPS[@]} == 0 ]]; then
_die 'Unable to collect the list of active Salt Minions.'
fi
_notice '(2/3) Refreshing a list of active Salt Minions (test.ping) ...'
# A list of active (alive) Salt Minions.
PINGS+=( $(salt '*' test.ping --output=txt 2>/dev/null | \
awk -F':' '{ print $1 }') )
[[ ! -z $DEBUG ]] && declare -p CACHES KEYS UPS PINGS RUNS
if [[ ${PINGS[@]} == 0 ]]; then
_die 'Unable to refresh the list of active Salt Minions.'
fi
_notice '(3/3) Refreshing a list of active Salt Minions (cmd.run) ...'
# Query allegedly alive Salt Minions to make sure that they are talking back.
RUNS+=( $(salt '*' cmd.run 'cat /etc/salt/minion_id' --output=txt 2>/dev/null | \
awk -F':' '{ print $1 }') )
[[ ! -z $DEBUG ]] && declare -p CACHES KEYS UPS PINGS RUNS
if [[ ${RUNS[@]} == 0 ]]; then
_die 'Unable to refresh the list of active Salt Minions by sending a query.'
fi
# Verify whether the same number of Salt Minions responded each time.
if [[ ${#UPS[@]} != ${#PINGS[@]} ]] || [[ ${#KEYS[@]} != ${#CACHES[@]} ]]; then
_die 'Number of active Salt Minions does not match, aborting ...'
fi
_notice 'Reconciling obsolete keys ...'
# Combine known keys with seen Salt Minions.
for i in ${KEYS[@]}; do
[[ ${IDS[$i]-} ]] && v=$(( ${IDS[$i]} + 1 )) || v=1
IDS[$i]=$v
done
for i in ${UPS[@]}; do
[[ ${IDS[$i]-} ]] && v=$(( ${IDS[$i]} + 1 )) || v=1
IDS[$i]=$v
done
for i in ${PINGS[@]}; do
[[ ${IDS[$i]-} ]] && v=$(( ${IDS[$i]} + 1 )) || v=1
IDS[$i]=$v
done
for i in ${RUNS[@]}; do
[[ ${IDS[$i]-} ]] && v=$(( ${IDS[$i]} + 1 )) || v=1
IDS[$i]=$v
done
# Content of the Salt Master cache does not partake in scoring.
for i in ${CACHES[@]}; do
[[ ${IDS[$i]-} ]] || IDS[$i]=1
done
[[ ! -z $DEBUG ]] && declare -p CACHES KEYS UPS PINGS RUNS IDS
for i in ${!IDS[@]}; do
# Remove anything that was seen only once.
if [[ ${IDS[$i]-} == 1 ]]; then
# Final check before removing the key.
OUTPUT=$(salt $i cmd.run 'date' --output=quiet 2>/dev/null)
if [[ -z $OUTPUT ]] && [[ $? == 0 ]]; then
salt-key --yes --delete $i --output=quiet &>/dev/null || true
if [[ ! -z $(salt-key --print $i 2>/dev/null) ]]; then
_error "Failed to remove key: $i"
else
COUNT=$(( $COUNT + 1 ))
CACHE="${MASTER_CACHE}/${i}"
# Remove old cached data.
[[ -d $CACHE ]] && rm -rf $CACHE &>/dev/null
fi
fi
fi
done
_notice "Removed $COUNT obsolete keys (time taken: $[$(date +%s) - $START] seconds)."
unset CACHES KEYS UPS PINGS RUNS IDS
rm -f $LOCK_FILE &>/dev/null
trap - HUP INT QUIT KILL TERM EXIT
_notice 'Completed the clean-up.'
else
_die "Unable to create lock file (current owner: "$(cat $LOCK_FILE 2>/dev/null)")."
fi
exit 0
@kwilczynski
Copy link
Author

Note: This solution might not work too way as I found out that both test.ping and/or cmd.run is not always showing every active node, even when run multiple times.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment