grosser/etcd_compact.sh

## etcd_compact.sh
#!/bin/bash -e

# Kubernetes api server is supposed to run compaction on etcd
# when that does not happen we need to do an emergency compaction to make etcd not lock up
# this should only be done once, so we pick the leader to do it, which we assume is healthy
# (compacting manually outside of api server can lead to watches failing / requests for specific revisions failing etc)
#
# After compacting the keyspace, the backend database may exhibit internal fragmentation.
# Any internal fragmentation is space that is free to use by the backend but still consumes storage space.
# The process of defragmentation releases this storage space back to the file system.
# It also decreases the db_size which we monitor.
# Defragmentation is issued on a per-member so that cluster-wide latency spikes may be avoided.

set -o pipefail

source /etc/profile # get env vars

while true; do
  compact_limit="1610612736" # 1.5 GB of the 2 GB default db size
  defrag_limit="1073741824" # 1 GB of the 2 GB default db size
  status=$(etcdctl endpoint status --write-out json)
  member_id=$(echo $status | jq .[0].Status.header.member_id)
  leader_id=$(echo $status | jq .[0].Status.leader)
  revision=$(echo $status | jq .[0].Status.header.revision)
  db_size=$(echo $status | jq .[0].Status.dbSize)
  pause=60

  echo "member $member_id / leader $leader_id / db_size $db_size"

  if [ "$member_id" = "$leader_id" ] && [ $db_size -gt $compact_limit ]; then
    echo "Started compact"
    timeout 10 etcdctl compact $revision
  else
    echo "decided not to compact"
  fi

  if [ $db_size -gt $defrag_limit ]; then
    if [ "$member_id" != "$leader_id" ]; then
      sleep $[$RANDOM % 60]s # Stagger for each member so we don't lock db
    fi

    echo "Started defrag"
    timeout 10 etcdctl defrag
  else
    echo "decided not to defrag"
  fi

  sleep $pause
done
	#!/bin/bash -e

	# Kubernetes api server is supposed to run compaction on etcd
	# when that does not happen we need to do an emergency compaction to make etcd not lock up
	# this should only be done once, so we pick the leader to do it, which we assume is healthy
	# (compacting manually outside of api server can lead to watches failing / requests for specific revisions failing etc)
	#
	# After compacting the keyspace, the backend database may exhibit internal fragmentation.
	# Any internal fragmentation is space that is free to use by the backend but still consumes storage space.
	# The process of defragmentation releases this storage space back to the file system.
	# It also decreases the db_size which we monitor.
	# Defragmentation is issued on a per-member so that cluster-wide latency spikes may be avoided.

	set -o pipefail

	source /etc/profile # get env vars

	while true; do
	compact_limit="1610612736" # 1.5 GB of the 2 GB default db size
	defrag_limit="1073741824" # 1 GB of the 2 GB default db size
	status=$(etcdctl endpoint status --write-out json)
	member_id=$(echo $status \| jq .[0].Status.header.member_id)
	leader_id=$(echo $status \| jq .[0].Status.leader)
	revision=$(echo $status \| jq .[0].Status.header.revision)
	db_size=$(echo $status \| jq .[0].Status.dbSize)
	pause=60

	echo "member $member_id / leader $leader_id / db_size $db_size"

	if [ "$member_id" = "$leader_id" ] && [ $db_size -gt $compact_limit ]; then
	echo "Started compact"
	timeout 10 etcdctl compact $revision
	else
	echo "decided not to compact"
	fi

	if [ $db_size -gt $defrag_limit ]; then
	if [ "$member_id" != "$leader_id" ]; then
	sleep $[$RANDOM % 60]s # Stagger for each member so we don't lock db
	fi

	echo "Started defrag"
	timeout 10 etcdctl defrag
	else
	echo "decided not to defrag"
	fi

	sleep $pause
	done