Last active
January 20, 2022 17:40
-
-
Save calebpower/a4682d8e488eb753658741555a2a9e71 to your computer and use it in GitHub Desktop.
A quick script that kicks a validator back online by reconnecting it to its peers after i.e. an epoch or some other event that make sentries stall out for an extended period of time.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Overview | |
# ======== | |
# | |
# This is a hack-and-slash script to re-peer the validator without rebooting it | |
# once sentries come back online. The following assumptions are made: | |
# | |
# - This script is used in conjunction with the validator. | |
# - The validator has PEX disabled and is peering only with sentries. | |
# - The validator has access to the RPC endpoints of the sentries. | |
# - The validator's RPC port is the same as those of the sentries. | |
# - The validator has unsafe RPC endpoints enabled. | |
# - The sentries are considered "persistent" peers. | |
# - You're running on Ubuntu 20.04 LTS. | |
# - You've installed curl via apt. | |
# - You've installed jq via apt. | |
# - You've installed golang-github-pelletier-go-toml via apt. | |
# - You have a valid daemon config.toml file. | |
# | |
# Written sloppily by Caleb L. Power under the Apache 2.0 License. | |
# There be bugs here. Use at your own risk. | |
# Location of the configuration file | |
NODE_CONFIG='/path/to/your/config.toml' | |
# Number of seconds to wait between loops while the validator is offline | |
OFFLINE_TIMEOUT=10 | |
# Number of seconds to wait between loops while the validator is operating normally | |
NORMAL_TIMEOUT=60 | |
# Number of seconds to wait while waiting for the epoch to complete | |
EPOCH_TIMEOUT=5 | |
# Number of seconds to wait after kicking the validator | |
KICK_TIMEOUT=20 | |
for pkg in "golang-github-pelletier-go-toml" "curl" "jq"; do | |
dpkg -l $pkg > /dev/null 2>&1 | |
if [ "$?" != "0" ]; then | |
echo "Please do \"sudo apt install ${pkg}\" and try again." | |
exit 1 | |
fi | |
done | |
if [ ! -f "$NODE_CONFIG" ]; then | |
echo "Please fix the location of the config file and try again." | |
exit 1 | |
fi | |
while true; do | |
config=`cat "$NODE_CONFIG" | tomljson` | |
if [ `echo $config | jq -r '.rpc.unsafe'` != "true" ]; then | |
echo 'Please set [rpc] -> unsafe = true in the config and try again.' | |
exit 1 | |
fi | |
rpcaddr=`echo $config | jq -r '.rpc.laddr' | sed 's/tcp:\/\//http:\/\//g'` | |
peers=`echo $config | jq '.p2p.persistent_peers'` | |
while true; do | |
peercount=`curl -s "${rpcaddr}/net_info" | jq -r '.result.n_peers'` | |
if [ "${peercount}" == "" ]; then | |
echo "[`date`] The node appears to have died." | |
sleep $OFFLINE_TIMEOUT | |
continue 2; | |
fi | |
if [ "$peercount" == "0" ]; then | |
echo "[`date`] All peers appear to have dropped off." | |
valheight=`curl -s "${rpcaddr}/status" | jq -r '.result.sync_info.latest_block_height'` | |
echo "[`date`] The current validator block height is ${valheight}." | |
let valheight+=1; | |
while true; do | |
pointer=1 | |
sentryheight=$valheight | |
while true; do | |
peer=`echo $peers | cut -d ',' -f${pointer} | cut -d '@' -f2` | |
if [ "${peer}" == "" ]; then | |
break; | |
fi | |
peer=$(echo $peer | sed "s/$(echo ${peer} | cut -d ':' -f2)/$(echo ${rpcaddr} | sed 's/http:\/\///g' | cut -d ':' -f2)/g") | |
echo "[`date`] Retrieving status of ${peer}..." | |
sentry=`curl -s "http://${peer}/status"` | |
if [ "$sentry" == "" ]; then | |
echo "[`date`] The sentry at ${peer} did not respond properly." | |
else | |
height=`echo $sentry | jq -r '.result.sync_info.latest_block_height'` | |
if [ $sentryheight -lt $height ]; then | |
sentryheight=$height | |
moniker=`echo $sentry | jq -r '.result.node_info.moniker'` | |
echo "[`date`] A new sentry height was recorded from ${moniker}: ${height}." | |
fi | |
fi | |
let pointer+=1; | |
done | |
if [ $sentryheight -gt $valheight ]; then | |
echo "[`date`] The chain appears to be syncing." | |
echo "[`date`] Kicking validator..." | |
curl -s --globoff "${rpcaddr}/dial_peers?peers=[`echo $peers | sed 's/,/","/g'`]"'&persistent=true' > /dev/null 2>&1 | |
sleep $KICK_TIMEOUT | |
break; | |
else | |
echo "[`date`] The chain appears to still be halted." | |
sleep $EPOCH_TIMEOUT | |
fi | |
done | |
else | |
echo "[`date`] This node currently has ${peercount} peer(s)." | |
sleep $NORMAL_TIMEOUT | |
fi | |
done | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment