Skip to content

Instantly share code, notes, and snippets.

@jazzl0ver
Last active May 24, 2023 10:19
Show Gist options
  • Save jazzl0ver/c6859e1615a0f97b8704052db0745e25 to your computer and use it in GitHub Desktop.
Save jazzl0ver/c6859e1615a0f97b8704052db0745e25 to your computer and use it in GitHub Desktop.
Firecamp Cassandra backup script
#!/bin/bash
#
# Firecamp Cassandra backup script
# Example:
# ./fc_cass_backup.sh -r us-east-1 -c firecamp-qa -s cass-qa -k '14 days' -p
# where:
# -r - region
# -c - firecamp cluster name
# -s - firecamp service name
# -k - days to purge backup after in the date tool format
# -p - purge the expired backups
# -d - destination region to copy snapshots to
# -g - days to purge backup in the destination region after in the date tool format
# -u - purge the expired backups in the destination region
#
# Script should be executed on a docker-enabled instance within the same VPC as Firecamp cluster and in AppSecurityGroup
# Dependancies: awscli, docker, firecamp-service-cli, jq
#
# This script takes snapshots of the Cassandra service volumes (primary and journal) and tag them accordingly
# Use fc_cass_restore.sh to restore from the backup
#
# Following policy should be assigned to the instance (or a user) where the script is executed:
# {
# "Version": "2012-10-17",
# "Statement": [
# {
# "Sid": "VisualEditor0",
# "Effect": "Allow",
# "Action": [
# "ec2:DescribeVolumes",
# "ec2:CreateSnapshot",
# "ec2:CopySnapshot",
# "ec2:DeleteSnapshot",
# "ec2:DescribeSnapshots",
# "ec2:CreateTags",
# "sts:GetCallerIdentity"
# ],
# "Resource": "*"
# }
# ]
# }
#
#
# Modify FCCLI var to the actual paths
FCCLI=~ec2-user/firecamp/1.6/firecamp-service-cli
#-- Do not modify below
[ -x "$FCCLI" ] || { echo "Download $(basename $FCCLI) tool into $(dirname $FCCLI) before using this script"; exit; }
purge_snapshots=false
dest_purge_snapshots=false
while getopts :s:c:r:k:d:g:pu opt; do
case $opt in
r) region="$OPTARG" ;;
c) cluster="$OPTARG" ;;
s) servicename="$OPTARG" ;;
k) purge_after_input="$OPTARG" ;;
p) purge_snapshots=true ;;
d) dest_region="$OPTARG" ;;
g) dest_purge_after_input="$OPTARG" ;;
u) dest_purge_snapshots=true ;;
*) echo "=== Error with Options Input. Cause of failure is most likely that an unsupported parameter was passed or a parameter was passed without a corresponding option." 1>&2 ; exit 64 ;;
esac
done
[ -n "$region" -a -n "$cluster" -a -n "$servicename" ] || { echo "region, cluster name or service name is not specified, exiting..."; exit; }
[ "$purge_snapshots" = "true" -a -z "$purge_after_input" ] && { echo "retention period is not specified, exiting..."; exit; }
[ "$dest_purge_snapshots" = "true" -a -z "$dest_purge_after_input" ] && { echo "retention period for the destination region is not specified, exiting..."; exit; }
today=$(date -u +%Y-%m-%d)
hostname=$(hostname)
purge_after=$(date -d "$purge_after_input" -u +%Y-%m-%d)
purge_after_fe=$(date -d "$purge_after_input" -u +%s)
dest_purge_after=$(date -d "$dest_purge_after_input" -u +%Y-%m-%d)
dest_purge_after_fe=$(date -d "$dest_purge_after_input" -u +%s)
echo "Retriving credentials..."
for line in $($FCCLI -region=$region -cluster=$cluster -op=get-service -service-type=cassandra -service-name=$servicename \
| grep -oE "(JMX_REMOTE_USER|JMX_REMOTE_PASSWD)=[a-z0-9-]+"); do
key=$(echo $line | cut -f1 -d=)
val=$(echo $line | cut -f2 -d=)
if [[ $key == "JMX_REMOTE_USER" ]]; then
jmxuser=$val
fi
if [[ $key == "JMX_REMOTE_PASSWD" ]]; then
jmxpass=$val
fi
done
echo
for line in $($FCCLI -region=$region -cluster=$cluster -op=list-members -service-type=cassandra -service-name=$servicename \
| grep -oE "(ServiceUUID|MemberName|AvailableZone|ServerInstanceID|PrimaryVolumeID|JournalVolumeID):[a-z0-9-]+"); do
key=$(echo $line | cut -f1 -d:)
val=$(echo $line | cut -f2 -d:)
if [[ $key == "ServiceUUID" ]]; then
uuid=$val
fi
if [[ $key == "MemberName" ]]; then
member=$val
fi
if [[ $key == "AvailableZone" ]]; then
az=$val
fi
if [[ $key == "ServerInstanceID" ]]; then
instanceid=$val
fi
if [[ $key == "PrimaryVolumeID" ]]; then
pvolid=$val
fi
if [[ $key == "JournalVolumeID" ]]; then
jvolid=$val
fi
if [ -n "$uuid" -a -n "$member" -a -n "$az" -a -n "$instanceid" -a -n "$pvolid" -a -n "$jvolid" ]; then
pvolName=$(aws --region=$region ec2 describe-volumes --volume-ids $pvolid --query 'Volumes[].Tags[?Key==`Name`].Value' --output text)
jvolName=$(aws --region=$region ec2 describe-volumes --volume-ids $jvolid --query 'Volumes[].Tags[?Key==`Name`].Value' --output text)
[ -n "$pvolName" -a -n "$jvolName" ] || { echo "Can't get volume name for $pvolid or $jvolid"; exit; }
echo "$(date): Flushing node $member..."
/bin/docker run --rm harisekhon/cassandra-dev nodetool -h $member.$cluster-firecamp.com -u $jmxuser -pw $jmxpass flush
echo "$(date): Taking $pvolName ($pvolid) snapshot..."
pvolss=$(aws --region=$region ec2 create-snapshot --volume-id $pvolid --description 'Created by Firecamp Cassandra Backup Script' \
--tag-specifications "ResourceType=snapshot,Tags=[\
{Key=Name,Value=$pvolName},
{Key=VolumeDestiny,Value=Primary},
{Key=ServiceUUID,Value=$uuid},
{Key=Created,Value=$today},
{Key=InitiatingHost,Value=$hostname},
{Key=MemberName,Value=$member},
{Key=Instance,Value=$instanceid},
{Key=AvailableZone,Value=$az},
{Key=PurgeAfter,Value=$purge_after},
{Key=PurgeAfterFE,Value=$purge_after_fe},
{Key=PurgeAllow,Value=$purge_snapshots}\
]" --query 'SnapshotId' --output text)
[ $? -ne 0 ] && { echo "$(date): Failed to CreateSnapshot for $pvolid, exiting..."; exit; }
while true; do
pvolss_state=$(aws --region $region ec2 describe-snapshots --snapshot-ids $pvolss --query 'Snapshots[].State' --output text)
if [ "$pvolss_state" = "completed" ]; then
echo "$(date): $pvolName ($pvolid) snapshot completed - $pvolss"
if [ -n "$dest_region" ]; then
echo "$(date): Copying snapshot $pvolss to $dest_region region..."
pvolss_copy=$(aws --region=$dest_region ec2 copy-snapshot --description 'Created by Firecamp Cassandra Backup Script' \
--source-region $region --source-snapshot-id $pvolss --tag-specifications "ResourceType=snapshot,Tags=[\
{Key=Name,Value=$pvolName},
{Key=VolumeDestiny,Value=Primary},
{Key=ServiceUUID,Value=$uuid},
{Key=Created,Value=$today},
{Key=InitiatingHost,Value=$hostname},
{Key=MemberName,Value=$member},
{Key=Instance,Value=$instanceid},
{Key=AvailableZone,Value=$az},
{Key=SourceRegion,Value=$region},
{Key=SourceSnapshotId,Value=$pvolss},
{Key=PurgeAfter,Value=$dest_purge_after},
{Key=PurgeAfterFE,Value=$dest_purge_after_fe},
{Key=PurgeAllow,Value=$dest_purge_snapshots}\
]" --query 'SnapshotId' --output text)
[ $? -ne 0 ] && { echo "$(date): Failed to CopySnapshot $pvolss for $pvolName ($pvolid), exiting..."; exit; }
while true; do
pvolss_copy_state=$(aws --region $dest_region ec2 describe-snapshots --snapshot-ids $pvolss_copy --query 'Snapshots[].State' --output text)
if [ "$pvolss_copy_state" = "completed" ]; then
echo "$(date): snapshot $pvolss copying for $pvolName ($pvolid) to $dest_region region completed - $pvolss_copy"
break
else
sleep 60
fi
done &
fi
break
else
sleep 60
fi
done &
echo "$(date): Taking $jvolName ($jvolid) snapshot..."
jvolss=$(aws --region=$region ec2 create-snapshot --volume-id $jvolid --description 'Created by Firecamp Cassandra Backup Script' \
--tag-specifications "ResourceType=snapshot,Tags=[\
{Key=Name,Value=$jvolName},
{Key=VolumeDestiny,Value=Journal},
{Key=ServiceUUID,Value=$uuid},
{Key=Created,Value=$today},
{Key=InitiatingHost,Value=$hostname},
{Key=MemberName,Value=$member},
{Key=Instance,Value=$instanceid},
{Key=AvailableZone,Value=$az},
{Key=PurgeAfter,Value=$purge_after},
{Key=PurgeAfterFE,Value=$purge_after_fe},
{Key=PurgeAllow,Value=$purge_snapshots}\
]" --query 'SnapshotId' --output text)
[ $? -ne 0 ] && { echo "$(date): Failed to CreateSnapshot for $jvolid, exiting..."; exit; }
while true; do
jvolss_state=$(aws --region us-east-1 ec2 describe-snapshots --snapshot-ids $jvolss --query 'Snapshots[].State' --output text)
if [ "$jvolss_state" = "completed" ]; then
echo "$(date): $jvolName ($jvolid) snapshot completed - $jvolss"
if [ -n "$dest_region" ]; then
echo "$(date): Copying snapshot $jvolss to $dest_region region..."
jvolss_copy=$(aws --region=$dest_region ec2 copy-snapshot --description 'Created by Firecamp Cassandra Backup Script' \
--source-region $region --source-snapshot-id $jvolss --tag-specifications "ResourceType=snapshot,Tags=[\
{Key=Name,Value=$jvolName},
{Key=VolumeDestiny,Value=Journal},
{Key=ServiceUUID,Value=$uuid},
{Key=Created,Value=$today},
{Key=InitiatingHost,Value=$hostname},
{Key=MemberName,Value=$member},
{Key=Instance,Value=$instanceid},
{Key=AvailableZone,Value=$az},
{Key=SourceRegion,Value=$region},
{Key=SourceSnapshotId,Value=$jvolss},
{Key=PurgeAfter,Value=$dest_purge_after},
{Key=PurgeAfterFE,Value=$dest_purge_after_fe},
{Key=PurgeAllow,Value=$dest_purge_snapshots}\
]" --query 'SnapshotId' --output text)
[ $? -ne 0 ] && { echo "$(date): Failed to CopySnapshot $jvolss for $jvolName ($jvolid), exiting..."; exit; }
while true; do
jvolss_copy_state=$(aws --region $dest_region ec2 describe-snapshots --snapshot-ids $jvolss_copy --query 'Snapshots[].State' --output text)
if [ "$jvolss_copy_state" = "completed" ]; then
echo "$(date): snapshot $jvolss copying for $jvolName ($jvolid) to $dest_region region completed - $jvolss_copy"
break
else
sleep 60
fi
done &
fi
break
else
sleep 60
fi
done &
uuid=""; member=""; az=""; instanceid=""; pvolid=""; jvolid=""; pvolss=""; jvolss=""
echo
fi
done
echo "Commands to snapshot Cassandra volumes have been sent"
echo
if $purge_snapshots; then
echo "$(date): Purging expired snapshots..."
accountid=$(aws sts get-caller-identity --output text --query 'Account')
yesterday=$(($(date -u +%s)-86400))
sshots=$(aws --region=$region ec2 describe-snapshots --owner-ids $accountid \
--filters Name=tag-key,Values=ServiceUUID \
Name=tag:PurgeAllow,Values=true \
| jq -r '.Snapshots[] | select(.Tags[]|select(.Key=="PurgeAfterFE").Value<"'$yesterday'") | .SnapshotId')
for sshot in $(echo $sshots); do
echo -e "\t$sshot"
aws --region=$region ec2 delete-snapshot --snapshot-id $sshot > /dev/null
done
if [ -n "$dest_region" ]; then
sshots=$(aws --region=$dest_region ec2 describe-snapshots --owner-ids $accountid \
--filters Name=tag-key,Values=ServiceUUID \
Name=tag:PurgeAllow,Values=true \
| jq -r '.Snapshots[] | select(.Tags[]|select(.Key=="PurgeAfterFE").Value<"'$yesterday'") | .SnapshotId')
for sshot in $(echo $sshots); do
echo -e "\t$sshot"
aws --region=$dest_region ec2 delete-snapshot --snapshot-id $sshot > /dev/null
done
fi
[ -n "$sshots" ] && echo "$(date): Commands to delete expired snapshots have been sent"
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment