Skip to content

Instantly share code, notes, and snippets.

@jazzl0ver
Last active February 26, 2021 12:11
Show Gist options
  • Save jazzl0ver/c87c5ebfd76c07b56ffe8448f40e737b to your computer and use it in GitHub Desktop.
Save jazzl0ver/c87c5ebfd76c07b56ffe8448f40e737b to your computer and use it in GitHub Desktop.
Firecamp Cassandra restore script (aws cli v2 is required!)
#!/bin/bash
#
# Firecamp Cassandra restore script
# Example:
# ./fc_cass_restore.sh -r us-east-1 -c firecamp-qa -s cass-qa -d 2019-05-28 -u bd751a2269a44a2e52898bc0dd5cb2ac
# where:
# -r - region
# -c - firecamp cluster name (MUST NOT match the cluster where backup was taken)
# -s - firecamp cluster's service name (MAY match the name of the backed up service, but not recommended)
# -d - backup creation date (Created tag of the volumes snapshots)
# -u - ServiceUUID (ServiceUUID tag of the volumes snapshots)
# -o - origin firecamp cluster name (MUST match the cluster where backup was taken)
#
# Script should be executed on an instance within the same VPC as Firecamp cluster and in AppSecurityGroup
# Dependencies: awscli, docker, firecamp-service-cli, firecamp-volume-replace, jq
#
# The script does not change anything. It just creates new volumes from snapshots and prints
# commands needed to restore the backed up volumes (made by fc_cass_backup.sh).
#
# Following policy should be assigned to the instance (or a user) where the script is executed:
# {
# "Version": "2012-10-17",
# "Statement": [
# {
# "Sid": "VisualEditor0",
# "Effect": "Allow",
# "Action": [
# "ec2:DescribeVolumes",
# "ec2:CreateSnapshot",
# "ec2:DescribeSnapshots",
# "ec2:CreateVolume",
# "ec2:CreateTags",
# "sts:GetCallerIdentity",
# "dynamodb:GetItem",
# "dynamodb:Query",
# "dynamodb:UpdateItem"
# ],
# "Resource": "*"
# }
# ]
# }
#
#
# Modify FCCLI and FCVR vars to the actual paths
FCCLI=~ec2-user/firecamp/1.3/firecamp-service-cli
FCVR=~ec2-user/firecamp/1.3/firecamp-volume-replace
#-- Do not modify below
[ -x "$FCCLI" ] || { echo "Download $(basname $FCCLI) tool into $(dirname $FCCLI) before using this script"; exit; }
[ -x "$FCVR" ] || { echo "Download $(basname $FCVR) tool into $(dirname $FCVR) before using this script"; exit; }
[ $(aws --version | cut -f1 -d' ' | cut -f1 -d.) = "aws-cli/2" ] || { echo "AWS CLI v2 is required"; exit; }
while getopts :s:c:r:d:u:o: opt; do
case $opt in
r) region="$OPTARG" ;;
c) cluster="$OPTARG" ;;
s) servicename="$OPTARG" ;;
d) created="$OPTARG" ;;
u) uuid="$OPTARG" ;;
o) origcluster="$OPTARG" ;;
*) echo "=== Error with Options Input. Cause of failure is most likely that an unsupported parameter was passed or a parameter was passed without a corresponding option." 1>&2 ; exit 64 ;;
esac
done
[ $OPTIND -lt 12 ] && { echo "Not enough paramaters, exiting..."; exit; }
#-- iterates the expression until the output is non-zero
wait_until()
{
result=`eval $* | sed 's/ //g'`
if [[ $result == 0 ]]
then
sleep 5
wait_until $*
fi
}
replace_cmd=""; delete_cmd="";
echo
echo "Before running this script make sure to create new C* service (hit Ctrl-C after the 1st 'wait the service containers running, RunningCount 0' line) and stop it:"
echo $FCCLI -region=$region -cluster=$cluster -op=create-service -service-type=cassandra -service-name=$servicename -replicas=3 -volume-size=2 -journal-volume-size=1 -cas-heap-size=512
echo $FCCLI -region=$region -cluster=$cluster -op=stop-service -service-type=cassandra -service-name=$servicename
echo
echo "Hit Ctrl-C to exit or Enter to continue"
read
newuuid=$($FCCLI -region=$region -cluster=$cluster -op=get-service -service-type=cassandra -service-name=$servicename | grep ServiceUUID | cut -f2 -d: | cut -f1 -d' ')
for line in $($FCCLI -region=$region -cluster=$cluster -op=list-members -service-type=cassandra -service-name=$servicename \
| grep -oE "(MemberName|AvailableZone|PrimaryVolumeID|JournalVolumeID):[a-z0-9-]+"); do
key=$(echo $line | cut -f1 -d:)
val=$(echo $line | cut -f2 -d:)
if [[ $key == "MemberName" ]]; then
member=$val
fi
if [[ $key == "AvailableZone" ]]; then
az=$val
fi
if [[ $key == "PrimaryVolumeID" ]]; then
pvolid=$val
fi
if [[ $key == "JournalVolumeID" ]]; then
jvolid=$val
fi
if [ -n "$member" -a -n "$az" -a -n "$pvolid" -a -n "$jvolid" ]; then
pvolName=$(aws --region=$region ec2 describe-volumes --volume-ids $pvolid --query 'Volumes[].Tags[?Key==`Name`].Value' --output text)
jvolName=$(aws --region=$region ec2 describe-volumes --volume-ids $jvolid --query 'Volumes[].Tags[?Key==`Name`].Value' --output text)
[ -n "$pvolName" -a -n "$jvolName" ] || { echo "Can't get volume name for $pvolid or $jvolid"; exit; }
echo "Creating volumes from snapshots for $member:"
accountid=$(aws sts get-caller-identity --output text --query 'Account')
sshots=$(aws --region=us-east-1 ec2 describe-snapshots --owner-ids $accountid \
--filters Name=tag:Created,Values=$created \
Name=tag:ServiceUUID,Values=$uuid \
Name=tag:AvailableZone,Values=$az \
--output json)
spvolid=$(echo $sshots | jq -r '.[][] | select(.Tags[].Value|test("Primary")) | .SnapshotId') #'
sjvolid=$(echo $sshots | jq -r '.[][] | select(.Tags[].Value|test("Journal")) | .SnapshotId') #'
if [ -z "$spvolid" -o -z "$sjvolid" ]; then
echo "Something went wrong - can't find Primary/Journal tags in snapshots"
exit
fi
npvolid=$(aws --region=$region ec2 create-volume --snapshot-id $spvolid --availability-zone $az --volume-type gp2 \
--tag-specifications "ResourceType=volume,Tags=[\
{Key=Name,Value=$pvolName}
]" --query 'VolumeId' --output text)
[ -n "$npvolid" ] || { echo "Can't create volume from snapshot $spvolid"; exit; }
VOL_AVAIL="aws --region=$region ec2 describe-volumes --volume-ids $npvolid | grep available | wc -l"
wait_until $VOL_AVAIL
echo -e "\tPrimary volume created - $npvolid"
njvolid=$(aws --region=$region ec2 create-volume --snapshot-id $sjvolid --availability-zone $az --volume-type gp2 \
--tag-specifications "ResourceType=volume,Tags=[\
{Key=Name,Value=$jvolName}
]" --query 'VolumeId' --output text)
[ -n "$njvolid" ] || { echo "Can't create volume from snapshot $sjvolid"; exit; }
VOL_AVAIL="aws --region=$region ec2 describe-volumes --volume-ids $njvolid | grep available | wc -l"
wait_until $VOL_AVAIL
echo -e "\tJournal volume created - $njvolid"
replace_cmd+="$FCVR -cluster=$cluster -service-name=$servicename -bad-volumeid=$pvolid -new-volumeid=$npvolid 2>> fcvr.replace.log\n"
replace_cmd+="$FCVR -cluster=$cluster -service-name=$servicename -bad-volumeid=$jvolid -new-volumeid=$njvolid 2>> fcvr.replace.log\n"
restore_cmd+="$FCVR -cluster=$cluster -service-name=$servicename -bad-volumeid=$npvolid -new-volumeid=$pvolid 2>> fcvr.restore.log\n"
restore_cmd+="$FCVR -cluster=$cluster -service-name=$servicename -bad-volumeid=$njvolid -new-volumeid=$jvolid 2>> fcvr.restore.log\n"
delete_cmd+="aws --region $region ec2 delete-volume --volume-id $pvolid\n"
delete_cmd+="aws --region $region ec2 delete-volume --volume-id $jvolid\n"
member=""; az=""; pvolid=""; jvolid=""
fi
done
echo
echo "Replace volumes using the following commands:"
echo -e "$replace_cmd"
echo
echo "Create new log group:"
echo aws --region $region logs create-log-group --log-group-name $cluster-$servicename-$uuid
echo
echo "Modify task definition and service:"
echo taskdef=\$\(aws --region $region ecs describe-services --cluster $cluster --services $servicename \| jq -r \'.services[].taskDefinition\'\)
echo aws --region $region ecs describe-task-definition --task-definition \$taskdef \| jq -r \'.taskDefinition\' \| jq -r \'del\(.taskDefinitionArn,.requiresAttributes,.compatibilities,.status,.family,.revision\)\' \| jq -r \'.containerDefinitions[] += { \"entryPoint\": [\"/bin/bash\"\,\"-c\"\,\"sed -ie \\\"s/CLUSTER=$cluster/CLUSTER=$origcluster/g\\\" /data/conf/service.conf \&\& /docker-entrypoint.sh cassandra -f\" ] }\' \| sed -e \"s/$newuuid/$uuid/g\" \> taskdef.json
echo aws --region $region ecs deregister-task-definition --task-definition \$taskdef --no-paginate >/dev/null
echo aws --region $region ecs register-task-definition --family \$\(echo \$taskdef \| cut -f2 -d/ \| cut -f1 -d:\) --cli-input-json file://taskdef.json \>/dev/null
echo
echo "Update ECS service:"
echo aws --region $region ecs update-service --cluster $cluster --service $servicename --task-definition \$\(echo \$taskdef \| cut -f2 -d/ \| cut -f1 -d:\) \>/dev/null
echo
echo "Update DynamoDB:"
echo aws --region $region dynamodb query --table-name $cluster-firecamp-table --key-condition-expression \"PartitionKey = :v1\" --expression-attribute-values \'{\":v1\": {\"S\":\"ConfigKey-$newuuid\"}}\' \| sed -e \"s/$newuuid/$uuid/g\" \> dyn.json
echo for \(\(i=0\;i\<\$\(cat dyn.json \| jq -r \'.Count\'\)\;i++\)\)\; do cat dyn.json \| jq -r \".Items[\$i]\" \| aws --region $region dynamodb put-item --table-name $cluster-firecamp-table --item file:///dev/stdin\; done
echo aws --region $region dynamodb query --table-name $cluster-firecamp-table --key-condition-expression \"PartitionKey = :v1\" --expression-attribute-values \'{\":v1\": {\"S\":\"ServiceAttrKey-$newuuid\"}}\' \| sed -e \"s/$newuuid/$uuid/g\" \> dyn.json
echo for \(\(i=0\;i\<\$\(cat dyn.json \| jq -r \'.Count\'\)\;i++\)\)\; do cat dyn.json \| jq -r \".Items[\$i]\" \| aws --region $region dynamodb put-item --table-name $cluster-firecamp-table --item file:///dev/stdin\; done
echo aws --region $region dynamodb query --table-name $cluster-firecamp-table --key-condition-expression \"PartitionKey = :v1\" --expression-attribute-values \'{\":v1\": {\"S\":\"ServiceMemberKey-$newuuid\"}}\' \| sed -e \"s/$newuuid/$uuid/g\" \> dyn.json
echo for \(\(i=0\;i\<\$\(cat dyn.json \| jq -r \'.Count\'\)\;i++\)\)\; do cat dyn.json \| jq -r \".Items[\$i]\" \| aws --region $region dynamodb put-item --table-name $cluster-firecamp-table --item file:///dev/stdin\; done
echo aws --region $region dynamodb update-item --table-name $cluster-firecamp-table --key \'{\"PartitionKey\": {\"S\":\"ServiceKey-$cluster\"}\,\"SortKey\":{\"S\":\"$servicename\"}}\' --update-expression \'SET \#S = :u\' --expression-attribute-names \'{\"\#S\":\"ServiceUUID\"}\' --expression-attribute-values \'{\":u\": {\"S\":\"$uuid\"}}\'
echo
echo "Start C*:"
echo $FCCLI -region=$region -cluster=$cluster -op=start-service -service-type=cassandra -service-name=$servicename
echo
echo "Make sure everything is alright and (if yes) run the following commands to delete the old volumes:"
echo -e "$delete_cmd"
echo
echo "To revert changes back, use the following commands:"
echo -e "$restore_cmd"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment