Skip to content

Instantly share code, notes, and snippets.

@ronakbanka
Forked from dansimau/pgprepmgr.sh
Last active August 29, 2015 14:06
Show Gist options
  • Save ronakbanka/56558ddfab07efc1850e to your computer and use it in GitHub Desktop.
Save ronakbanka/56558ddfab07efc1850e to your computer and use it in GitHub Desktop.
#!/bin/bash
#
# pgpool-II replication manager
#
# Interfaces with pgpool's pcp command-line tools to provide access to common functions for managing
# load-balancing and failover.
#
# dsimmons@squiz.co.uk
# 2011-08-28
#
# PCP configuration
pcp_host="127.0.0.1"
pcp_port="9898"
pcp_username="pgpool"
pcp_password="password"
pcp_timeout="10"
# Health check uses psql to connect to each backend server. Specify options required to connect here
psql_healthcheck_opts="-U postgres"
# Default options to send to pcp commands
pcp_cmd_preamble="$pcp_timeout $pcp_host $pcp_port $pcp_username $pcp_password"
#
# Runs abitrary pcp command with the preamble
#
_pcp()
{
cmd=$1
shift
$cmd $pcp_cmd_preamble $*
}
#
# Prints the total number of pgpool nodes.
#
_get_node_count()
{
pcp_node_count $pcp_cmd_preamble
}
#
# Prints out node information for the specified pgpool node
#
_get_node_info()
{
if [ -z $1 ]; then
echo "Usage: $0 _get_node_info <node id>" >&2
return 99
fi
pcp_node_info $pcp_cmd_preamble $1
}
#
# Outputs the replication lag (in bytes) between the specified master and slave
#
_get_replication_lag()
{
if [ -z $2 ]; then
echo "Usage: $0 _get_replication_lag <master node id> <slave node id>" >&2
return 99
fi
# Get connection information for nodes
master_node_info=($(_get_node_info $1))
if [ $? -gt 0 ]; then
echo "ERROR: Failed getting node info for node $1" >&2
return 1
fi
slave_node_info=($(_get_node_info $2))
if [ $? -gt 0 ]; then
echo "ERROR: Failed getting node info for node $2" >&2
return 1
fi
for n in $1 $2; do
if [ $(_is_node_alive $n) -eq 0 ]; then
echo "ERROR: Node $n is not available. Unable to calculate lag." >&2
return 1
fi
done
export PGCONNECT_TIMEOUT=2
# Get xlog locations for master and slaves
master_xlog_loc=$(psql $psql_healthcheck_opts -h "${master_node_info[0]}" -p "${master_node_info[1]}" -Atc "SELECT pg_current_xlog_location();")
if [ $? -gt 0 ]; then
echo "ERROR: Failed getting xlog location from node $1" >&2
return 1
fi
slave_xlog_loc=$(psql $psql_healthcheck_opts -h "${slave_node_info[0]}" -p "${slave_node_info[1]}" -Atc "SELECT pg_last_xlog_replay_location();")
if [ $? -gt 0 ]; then
echo "ERROR: Failed getting xlog location from node $2" >&2
return 1
fi
# Number of bytes behind
echo $(($(_xlog_to_bytes $master_xlog_loc) - $(_xlog_to_bytes $slave_xlog_loc)))
}
#
# Converts specified xlog number to byte location
#
_xlog_to_bytes()
{
if [ -z $1 ]; then
echo "Usage: $0 _xlog_to_bytes <xlog loc>" >&2
echo " Eg.: $0 _xlog_to_bytes 0/2BC825C0" >&2
return 99
fi
logid="${1%%/*}"
offset="${1##*/}"
echo $((0xFFFFFF * 0x$logid + 0x$offset))
}
#
# Prints whether the postgresql service on the specified node is responding.
#
_is_node_alive()
{
if [ -z $1 ]; then
echo "Usage: $0 _is_node_alive <node id>" >&2
return 99
fi
# Get node connection information
node_info=($(_get_node_info $1))
if [ $? -gt 0 ]; then
echo "ERROR: Failed getting node info for node $1" >&2
return 1
fi
export PGCONNECT_TIMEOUT=2
result=$(psql $psql_healthcheck_opts -h "${node_info[0]}" -p "${node_info[1]}" -Atc "SELECT 1;" 2>/dev/null)
if [ "$result" == "1" ]; then
echo 1
return 1
else
echo 0
return 0
fi
}
#
# Get the node ID of the first master
#
_get_master_node()
{
# Get total number of nodes
nodes=$(_get_node_count)
if [ $? -gt 0 ]; then
echo "ERROR: Failed getting node count: $nodes" >&2
exit 1
fi
c=0
# Loop through each node to check if it's the master
while [ $c -lt $nodes ]; do
if [ "$(_is_standby $c)" == "0" ]; then
echo $c
return 0
fi
let c=c+1
done
echo "-1"
return 1
}
#
# Checks if the node is in postgresql recovery mode (ie. if it is a slave)
#
_is_standby()
{
if [ -z $1 ]; then
echo "Usage: $0 _is_standby <node id>" >&2
return 99
fi
# Get node connection information
node_info=($(_get_node_info $1))
if [ $? -gt 0 ]; then
echo "ERROR: Failed getting node info for node $1" >&2
return 1
fi
export PGCONNECT_TIMEOUT=2
result=$(psql $psql_healthcheck_opts -h "${node_info[0]}" -p "${node_info[1]}" -Atc "SELECT pg_is_in_recovery();" 2>/dev/null)
if [ "$result" == "t" ]; then
echo 1
return 1
else
echo 0
return 0
fi
}
#
# Attaches the specified node to the pool
#
attach()
{
if [ -z $1 ]; then
echo "Usage: $0 attach <node id>" >&2
return 99
fi
pcp_attach_node $pcp_cmd_preamble $1
}
#
# Detaches the specified node from the pool
#
detach()
{
if [ -z $1 ]; then
echo "Usage: $0 detach <node id>" >&2
return 99
fi
pcp_detach_node $pcp_cmd_preamble $1
}
#
# Recovers the specified node (restores it from current master and re-attaches)
#
recover()
{
if [ -z $1 ]; then
echo "Usage: $0 recover <node id>" >&2
return 99
fi
pcp_recovery_node $pcp_cmd_preamble $1
}
#
# Prints the status of the specified node in human readable format.
#
_get_node_status()
{
if [ -z $1 ]; then
echo "Usage: $0 _get_node_status <node id>" >&2
return 99
fi
node_info=($(_get_node_info $1))
if [ $? -gt 0 ]; then
echo "ERROR: Failed getting node info for node $1" >&2
else
node_role=""
node_replication_lag=""
node_alive=""
case "$(_is_node_alive $1)" in
1)
node_alive="Up"
;;
*)
node_alive="Down"
;;
esac
if [ "$node_alive" == "Up" ]; then
# Find out what role this node has
if [ "$(_is_standby $1)" == "1" ]; then
node_role="Slave"
# Calculation replication lag
master_node=$(_get_master_node)
node_replication_lag=$(_get_replication_lag $master_node $1)
if [ $? -eq 0 ]; then
node_replication_lag="$node_replication_lag bytes"
else
node_replication_log="Unknown"
fi
else
node_role="Master"
fi
fi
case "${node_info[2]}" in
3)
node_status="detached from pool"
;;
2)
node_status="in pool and connected"
;;
1)
node_status="in pool"
;;
*)
node_status="Unknown"
;;
esac
# Print status information about this node
echo "Node: $1"
echo "Host: ${node_info[0]}"
echo "Port: ${node_info[1]}"
echo "Weight: ${node_info[3]}"
echo "Status: $node_alive, $node_status (${node_info[2]})"
[ -n "$node_role" ] && echo "Role: $node_role"
[ -n "$node_replication_lag" ] && echo "Replication lag: $node_replication_lag"
echo ""
fi
}
#
# Prints out the status of all pgpool nodes in human readable form.
#
status()
{
# Get total number of nodes
nodes=$(_get_node_count)
if [ $? -gt 0 ]; then
echo "ERROR: Failed getting node count: $nodes" >&2
exit 1
fi
c=0
# Loop through each node to retrieve info
while [ $c -lt $nodes ]; do
_get_node_status $c
let c=c+1
done
}
# Run function
if [ ! "$(type -t $1)" ]; then
echo "Usage $0 <option>" >&2
echo "" >&2
echo "Available options:" >&2
echo "$(compgen -A function |grep -v '^_')" >&2
exit 99
else
cmd=$1
shift
$cmd $*
exit $?
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment