Skip to content

Instantly share code, notes, and snippets.

@blockpane
Created December 8, 2021 23:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save blockpane/17392df025bc89245518870e5cd62413 to your computer and use it in GitHub Desktop.
Save blockpane/17392df025bc89245518870e5cd62413 to your computer and use it in GitHub Desktop.
Tendermint Monit Checks
#!/bin/bash
# used by monit, place in: /etc/monit/scripts/no-peers.sh
# as simple as it gets. Additional upside is it's easy to see what ports are used in m/monit
/usr/bin/ss -lntp | grep "${1}"
#!/bin/bash
# used by monit, place in: /etc/monit/scripts/no-peers.sh
# checks if node has no peers via prometheus
PROMETHEUS=http://${1}
NUM_PEERS=$(curl -s ${PROMETHEUS}/stats |grep 'p2p_peers{' |awk '{print $NF}')
# if we can't connect don't alarm, that is done in another check.
[ -z $NUM_PEERS ] && exit 0
if [ $NUM_PEERS -eq 0 ] ; then
echo NO PEERS ARE CONNECTED
exit 1
fi
echo $NUM_PEERS peers are connected.
exit 0
# Replace aaa with the appropriate daemon name.
# place in: /etc/monit/conf.d/tendermint
# This check ensures the daemon is running, it also allows using m/monit to remotely
# stop / start / restart the service
check process aaa matching aaa
start program = "/usr/bin/systemctl start aaa.service"
restart program = "/usr/bin/systemctl restart aaa.service"
stop program = "/usr/bin/systemctl stop aaa.service"
# Alerts if node falls behind
check host aaa-syncing with address 127.0.0.1
if failed
port 26657 protocol http
request /status with content = '"catching_up": false'
then alert
depends on aaa
# Ensure the daemon has ports listening, this can happen if cosmovisor locks up, or
# is performing a backup at upgrade time.
check program aaa-listening with path "/bin/bash /etc/monit/scripts/listening.sh aaa"
if status != 0 then alert
IF status != 0 FOR 3 CYCLES THEN UNMONITOR
depends on aaa
# Uses the prometheus endpoint to watch for connected peers, if no peers are present after
# 3 cycles (I use 3 minutes,) it will restart.
check program aaa-peers with path "/bin/bash /etc/monit/scripts/no-peers.sh 127.0.0.1:26660"
restart program = "/usr/bin/systemctl restart aaa.service"
if status != 0 then alert
IF status != 0 FOR 3 CYCLES THEN RESTART
depends on aaa
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment