Created
February 1, 2016 18:18
-
-
Save nickadam/05ecee00b4bc250dd7fd to your computer and use it in GitHub Desktop.
Failover
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# Usage | |
if [ -z "$4" ] || [ "$1" == "--help" ] | |
then | |
echo "Please specify host (the system going down), failover host (where the failover container" | |
echo "lives), the failover container, and the drbd resource" | |
echo " failover server-b server-a server-b-fo r1" | |
exit | |
fi | |
host="$1" | |
failover="$2" | |
container="$3" | |
resource="$4" | |
# find the device for this resource | |
device=$(ssh root@"$host" cat /etc/drbd.d/"$resource".res | grep "^\s\+device" | head -n 1 | awk '{print $2}' | sed 's/\;//g') | |
if [ -z "$device" ] | |
then | |
echo "Could not find drbd device for $resource" | |
exit 1 | |
fi | |
# check that the failover container is running | |
if ! ssh root@"$failover" /usr/bin/docker ps | grep "\b$container\$" | |
then | |
echo "Failover container $container is not running on $failover" | |
exit 1 | |
fi | |
# check if the failover container can be reached | |
if ! ssh root@"$container" true | |
then | |
echo "The failover container $container is not responding" | |
fi | |
# demote the resource on the host | |
if ! ssh root@"$host" /usr/local/sbin/drbd-autodemote "$resource" | |
then | |
echo "Failed to demote $resource on $host" | |
exit 1 | |
fi | |
# wait for a bit before checking drbd conditions | |
sleep 5 | |
# verify that resource is connected | |
if ! ssh root@"$failover" /sbin/drbdadm cstate "$resource" 2>/dev/null | grep "^Connected$" > /dev/null | |
then | |
echo "Resource $resource is not connected on $failover" | |
exit 1 | |
fi | |
# verify that resource is uptodate | |
if ! ssh root@"$failover" /sbin/drbdadm dstate "$resource" 2>/dev/null | grep "^UpToDate/UpToDate$" > /dev/null | |
then | |
echo "Resource $resource is not up to date" | |
exit 1 | |
fi | |
# verify the resource is secondary everywhere | |
if ! ssh root@"$failover" /sbin/drbdadm role "$resource" 2>/dev/null | grep "^Secondary/Secondary$" > /dev/null | |
then | |
echo "Resource $resource is not secondary everywhere" | |
exit 1 | |
fi | |
# set the resource as primary | |
if ! ssh root@"$failover" /sbin/drbdadm primary $resource 2>/dev/null | |
then | |
echo "Set resource $resource to primary failed on $failover" | |
exit 1 | |
fi | |
# mount the device in the failover container and start docker | |
if ! ssh root@"$container" /usr/local/sbin/failover "$device" | |
then | |
echo "Failover on $container failed" | |
exit 1 | |
fi | |
# try to start/restart services on the container, reload haproxy config | |
ssh root@"$container" service haproxy start | |
ssh root@"$container" service haproxy reload | |
ssh root@"$container" service keepalived restart |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment