Last active
February 26, 2021 12:11
-
-
Save jazzl0ver/c87c5ebfd76c07b56ffe8448f40e737b to your computer and use it in GitHub Desktop.
Firecamp Cassandra restore script (aws cli v2 is required!)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# | |
# Firecamp Cassandra restore script | |
# Example: | |
# ./fc_cass_restore.sh -r us-east-1 -c firecamp-qa -s cass-qa -d 2019-05-28 -u bd751a2269a44a2e52898bc0dd5cb2ac | |
# where: | |
# -r - region | |
# -c - firecamp cluster name (MUST NOT match the cluster where backup was taken) | |
# -s - firecamp cluster's service name (MAY match the name of the backed up service, but not recommended) | |
# -d - backup creation date (Created tag of the volumes snapshots) | |
# -u - ServiceUUID (ServiceUUID tag of the volumes snapshots) | |
# -o - origin firecamp cluster name (MUST match the cluster where backup was taken) | |
# | |
# Script should be executed on an instance within the same VPC as Firecamp cluster and in AppSecurityGroup | |
# Dependencies: awscli, docker, firecamp-service-cli, firecamp-volume-replace, jq | |
# | |
# The script does not change anything. It just creates new volumes from snapshots and prints | |
# commands needed to restore the backed up volumes (made by fc_cass_backup.sh). | |
# | |
# Following policy should be assigned to the instance (or a user) where the script is executed: | |
# { | |
# "Version": "2012-10-17", | |
# "Statement": [ | |
# { | |
# "Sid": "VisualEditor0", | |
# "Effect": "Allow", | |
# "Action": [ | |
# "ec2:DescribeVolumes", | |
# "ec2:CreateSnapshot", | |
# "ec2:DescribeSnapshots", | |
# "ec2:CreateVolume", | |
# "ec2:CreateTags", | |
# "sts:GetCallerIdentity", | |
# "dynamodb:GetItem", | |
# "dynamodb:Query", | |
# "dynamodb:UpdateItem" | |
# ], | |
# "Resource": "*" | |
# } | |
# ] | |
# } | |
# | |
# | |
# Modify FCCLI and FCVR vars to the actual paths | |
FCCLI=~ec2-user/firecamp/1.3/firecamp-service-cli | |
FCVR=~ec2-user/firecamp/1.3/firecamp-volume-replace | |
#-- Do not modify below | |
[ -x "$FCCLI" ] || { echo "Download $(basname $FCCLI) tool into $(dirname $FCCLI) before using this script"; exit; } | |
[ -x "$FCVR" ] || { echo "Download $(basname $FCVR) tool into $(dirname $FCVR) before using this script"; exit; } | |
[ $(aws --version | cut -f1 -d' ' | cut -f1 -d.) = "aws-cli/2" ] || { echo "AWS CLI v2 is required"; exit; } | |
while getopts :s:c:r:d:u:o: opt; do | |
case $opt in | |
r) region="$OPTARG" ;; | |
c) cluster="$OPTARG" ;; | |
s) servicename="$OPTARG" ;; | |
d) created="$OPTARG" ;; | |
u) uuid="$OPTARG" ;; | |
o) origcluster="$OPTARG" ;; | |
*) echo "=== Error with Options Input. Cause of failure is most likely that an unsupported parameter was passed or a parameter was passed without a corresponding option." 1>&2 ; exit 64 ;; | |
esac | |
done | |
[ $OPTIND -lt 12 ] && { echo "Not enough paramaters, exiting..."; exit; } | |
#-- iterates the expression until the output is non-zero | |
wait_until() | |
{ | |
result=`eval $* | sed 's/ //g'` | |
if [[ $result == 0 ]] | |
then | |
sleep 5 | |
wait_until $* | |
fi | |
} | |
replace_cmd=""; delete_cmd=""; | |
echo | |
echo "Before running this script make sure to create new C* service (hit Ctrl-C after the 1st 'wait the service containers running, RunningCount 0' line) and stop it:" | |
echo $FCCLI -region=$region -cluster=$cluster -op=create-service -service-type=cassandra -service-name=$servicename -replicas=3 -volume-size=2 -journal-volume-size=1 -cas-heap-size=512 | |
echo $FCCLI -region=$region -cluster=$cluster -op=stop-service -service-type=cassandra -service-name=$servicename | |
echo | |
echo "Hit Ctrl-C to exit or Enter to continue" | |
read | |
newuuid=$($FCCLI -region=$region -cluster=$cluster -op=get-service -service-type=cassandra -service-name=$servicename | grep ServiceUUID | cut -f2 -d: | cut -f1 -d' ') | |
for line in $($FCCLI -region=$region -cluster=$cluster -op=list-members -service-type=cassandra -service-name=$servicename \ | |
| grep -oE "(MemberName|AvailableZone|PrimaryVolumeID|JournalVolumeID):[a-z0-9-]+"); do | |
key=$(echo $line | cut -f1 -d:) | |
val=$(echo $line | cut -f2 -d:) | |
if [[ $key == "MemberName" ]]; then | |
member=$val | |
fi | |
if [[ $key == "AvailableZone" ]]; then | |
az=$val | |
fi | |
if [[ $key == "PrimaryVolumeID" ]]; then | |
pvolid=$val | |
fi | |
if [[ $key == "JournalVolumeID" ]]; then | |
jvolid=$val | |
fi | |
if [ -n "$member" -a -n "$az" -a -n "$pvolid" -a -n "$jvolid" ]; then | |
pvolName=$(aws --region=$region ec2 describe-volumes --volume-ids $pvolid --query 'Volumes[].Tags[?Key==`Name`].Value' --output text) | |
jvolName=$(aws --region=$region ec2 describe-volumes --volume-ids $jvolid --query 'Volumes[].Tags[?Key==`Name`].Value' --output text) | |
[ -n "$pvolName" -a -n "$jvolName" ] || { echo "Can't get volume name for $pvolid or $jvolid"; exit; } | |
echo "Creating volumes from snapshots for $member:" | |
accountid=$(aws sts get-caller-identity --output text --query 'Account') | |
sshots=$(aws --region=us-east-1 ec2 describe-snapshots --owner-ids $accountid \ | |
--filters Name=tag:Created,Values=$created \ | |
Name=tag:ServiceUUID,Values=$uuid \ | |
Name=tag:AvailableZone,Values=$az \ | |
--output json) | |
spvolid=$(echo $sshots | jq -r '.[][] | select(.Tags[].Value|test("Primary")) | .SnapshotId') #' | |
sjvolid=$(echo $sshots | jq -r '.[][] | select(.Tags[].Value|test("Journal")) | .SnapshotId') #' | |
if [ -z "$spvolid" -o -z "$sjvolid" ]; then | |
echo "Something went wrong - can't find Primary/Journal tags in snapshots" | |
exit | |
fi | |
npvolid=$(aws --region=$region ec2 create-volume --snapshot-id $spvolid --availability-zone $az --volume-type gp2 \ | |
--tag-specifications "ResourceType=volume,Tags=[\ | |
{Key=Name,Value=$pvolName} | |
]" --query 'VolumeId' --output text) | |
[ -n "$npvolid" ] || { echo "Can't create volume from snapshot $spvolid"; exit; } | |
VOL_AVAIL="aws --region=$region ec2 describe-volumes --volume-ids $npvolid | grep available | wc -l" | |
wait_until $VOL_AVAIL | |
echo -e "\tPrimary volume created - $npvolid" | |
njvolid=$(aws --region=$region ec2 create-volume --snapshot-id $sjvolid --availability-zone $az --volume-type gp2 \ | |
--tag-specifications "ResourceType=volume,Tags=[\ | |
{Key=Name,Value=$jvolName} | |
]" --query 'VolumeId' --output text) | |
[ -n "$njvolid" ] || { echo "Can't create volume from snapshot $sjvolid"; exit; } | |
VOL_AVAIL="aws --region=$region ec2 describe-volumes --volume-ids $njvolid | grep available | wc -l" | |
wait_until $VOL_AVAIL | |
echo -e "\tJournal volume created - $njvolid" | |
replace_cmd+="$FCVR -cluster=$cluster -service-name=$servicename -bad-volumeid=$pvolid -new-volumeid=$npvolid 2>> fcvr.replace.log\n" | |
replace_cmd+="$FCVR -cluster=$cluster -service-name=$servicename -bad-volumeid=$jvolid -new-volumeid=$njvolid 2>> fcvr.replace.log\n" | |
restore_cmd+="$FCVR -cluster=$cluster -service-name=$servicename -bad-volumeid=$npvolid -new-volumeid=$pvolid 2>> fcvr.restore.log\n" | |
restore_cmd+="$FCVR -cluster=$cluster -service-name=$servicename -bad-volumeid=$njvolid -new-volumeid=$jvolid 2>> fcvr.restore.log\n" | |
delete_cmd+="aws --region $region ec2 delete-volume --volume-id $pvolid\n" | |
delete_cmd+="aws --region $region ec2 delete-volume --volume-id $jvolid\n" | |
member=""; az=""; pvolid=""; jvolid="" | |
fi | |
done | |
echo | |
echo "Replace volumes using the following commands:" | |
echo -e "$replace_cmd" | |
echo | |
echo "Create new log group:" | |
echo aws --region $region logs create-log-group --log-group-name $cluster-$servicename-$uuid | |
echo | |
echo "Modify task definition and service:" | |
echo taskdef=\$\(aws --region $region ecs describe-services --cluster $cluster --services $servicename \| jq -r \'.services[].taskDefinition\'\) | |
echo aws --region $region ecs describe-task-definition --task-definition \$taskdef \| jq -r \'.taskDefinition\' \| jq -r \'del\(.taskDefinitionArn,.requiresAttributes,.compatibilities,.status,.family,.revision\)\' \| jq -r \'.containerDefinitions[] += { \"entryPoint\": [\"/bin/bash\"\,\"-c\"\,\"sed -ie \\\"s/CLUSTER=$cluster/CLUSTER=$origcluster/g\\\" /data/conf/service.conf \&\& /docker-entrypoint.sh cassandra -f\" ] }\' \| sed -e \"s/$newuuid/$uuid/g\" \> taskdef.json | |
echo aws --region $region ecs deregister-task-definition --task-definition \$taskdef --no-paginate >/dev/null | |
echo aws --region $region ecs register-task-definition --family \$\(echo \$taskdef \| cut -f2 -d/ \| cut -f1 -d:\) --cli-input-json file://taskdef.json \>/dev/null | |
echo | |
echo "Update ECS service:" | |
echo aws --region $region ecs update-service --cluster $cluster --service $servicename --task-definition \$\(echo \$taskdef \| cut -f2 -d/ \| cut -f1 -d:\) \>/dev/null | |
echo | |
echo "Update DynamoDB:" | |
echo aws --region $region dynamodb query --table-name $cluster-firecamp-table --key-condition-expression \"PartitionKey = :v1\" --expression-attribute-values \'{\":v1\": {\"S\":\"ConfigKey-$newuuid\"}}\' \| sed -e \"s/$newuuid/$uuid/g\" \> dyn.json | |
echo for \(\(i=0\;i\<\$\(cat dyn.json \| jq -r \'.Count\'\)\;i++\)\)\; do cat dyn.json \| jq -r \".Items[\$i]\" \| aws --region $region dynamodb put-item --table-name $cluster-firecamp-table --item file:///dev/stdin\; done | |
echo aws --region $region dynamodb query --table-name $cluster-firecamp-table --key-condition-expression \"PartitionKey = :v1\" --expression-attribute-values \'{\":v1\": {\"S\":\"ServiceAttrKey-$newuuid\"}}\' \| sed -e \"s/$newuuid/$uuid/g\" \> dyn.json | |
echo for \(\(i=0\;i\<\$\(cat dyn.json \| jq -r \'.Count\'\)\;i++\)\)\; do cat dyn.json \| jq -r \".Items[\$i]\" \| aws --region $region dynamodb put-item --table-name $cluster-firecamp-table --item file:///dev/stdin\; done | |
echo aws --region $region dynamodb query --table-name $cluster-firecamp-table --key-condition-expression \"PartitionKey = :v1\" --expression-attribute-values \'{\":v1\": {\"S\":\"ServiceMemberKey-$newuuid\"}}\' \| sed -e \"s/$newuuid/$uuid/g\" \> dyn.json | |
echo for \(\(i=0\;i\<\$\(cat dyn.json \| jq -r \'.Count\'\)\;i++\)\)\; do cat dyn.json \| jq -r \".Items[\$i]\" \| aws --region $region dynamodb put-item --table-name $cluster-firecamp-table --item file:///dev/stdin\; done | |
echo aws --region $region dynamodb update-item --table-name $cluster-firecamp-table --key \'{\"PartitionKey\": {\"S\":\"ServiceKey-$cluster\"}\,\"SortKey\":{\"S\":\"$servicename\"}}\' --update-expression \'SET \#S = :u\' --expression-attribute-names \'{\"\#S\":\"ServiceUUID\"}\' --expression-attribute-values \'{\":u\": {\"S\":\"$uuid\"}}\' | |
echo | |
echo "Start C*:" | |
echo $FCCLI -region=$region -cluster=$cluster -op=start-service -service-type=cassandra -service-name=$servicename | |
echo | |
echo "Make sure everything is alright and (if yes) run the following commands to delete the old volumes:" | |
echo -e "$delete_cmd" | |
echo | |
echo "To revert changes back, use the following commands:" | |
echo -e "$restore_cmd" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment