Skip to content

Instantly share code, notes, and snippets.

@pbostrom
Last active July 17, 2018 19:25
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pbostrom/fe8cb8e7a27f53d687b2110d06cd0567 to your computer and use it in GitHub Desktop.
Save pbostrom/fe8cb8e7a27f53d687b2110d06cd0567 to your computer and use it in GitHub Desktop.
Monitor your gaiad validator
#!/bin/bash
aws cloudwatch put-metric-data --metric-name UnhealthyValidator\
--value $1 --namespace "Cosmos" --region us-east-1
#!/bin/bash
block_age() {
bt=$(date -d "$1" +%s)
now=$(date +%s)
echo $(( $now - $bt ))
}
jq --help > /dev/null
if [ $? -ne 0 ]; then
echo "jq not installed. Please use your favorite package manager to install jq. More info at https://stedolan.github.io/jq/download/"
exit 1
fi
curl --help > /dev/null
if [ $? -ne 0 ]; then
echo "curl not installed. Please use your favorite package manager to install curl. More info at https://curl.haxx.se/download.html"
exit 1
fi
curl -s http://localhost:26657/status > /dev/null 2>&1
if [ $? -ne 0 ]; then
echo "gaiad RPC unavailable"
/usr/bin/cloudwatch_health.sh 1
exit 1
fi
addr=$(curl -s http://localhost:26657/status | jq -r ".result.validator_info.address")
lbh=$(curl -s http://localhost:26657/status | jq -r ".result.sync_info.latest_block_height")
lbt=$(curl -s http://localhost:26657/status | jq -r ".result.sync_info.latest_block_time")
ba=$(block_age $lbt)
echo "Latest block height: $lbh"
echo "Latest block age: $ba seconds"
echo "Validator address: $addr"
threshold=90
precommit=$(curl -s http://localhost:26657/block?height=$lbh | jq -r ".result.block.last_commit.precommits | .[] | select(.validator_address==\"$addr\")")
if (( $ba > $threshold )); then
msg="Latest block age is over $threshold seconds; Validator or network has halted"
error=true
elif [ -z "$precommit" ]; then
msg="Validator not active; latest block does not contain a precommit from this validator"
error=true
fi
if [ $error ]; then
# configure an alert here: email, SNS, PagerDuty, etc.
echo "$msg"
/usr/bin/cloudwatch_health.sh 1
exit 1
fi
echo "Validator active"
/usr/bin/cloudwatch_health.sh 0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment