Skip to content

Instantly share code, notes, and snippets.

@vcastellm
Last active April 15, 2024 17:55
Show Gist options
  • Star 65 You must be signed in to star a gist
  • Fork 16 You must be signed in to fork a gist
  • Save vcastellm/3ac4aae9279d7c68c486fecccc2546cc to your computer and use it in GitHub Desktop.
Save vcastellm/3ac4aae9279d7c68c486fecccc2546cc to your computer and use it in GitHub Desktop.
Run task and wait for result in AWS ECS
#!/usr/bin/env bash
set -e
function usage() {
set -e
cat <<EOM
##### ecs-run #####
Simple script for running tasks on Amazon Elastic Container Service
One of the following is required:
Required arguments:
-d | --task-definition Name of task definition to deploy
-c | --cluster Name of ECS cluster
-n | --container-name Name of Docker container
Optional arguments:
-m | --command
--aws-instance-profile Use the IAM role associated with this instance
-D | --desired-count The number of instantiations of the task to place.
-t | --retries Default is 12 for two hours. Script monitors ECS Service for new task definition to be running.
-e | --tag-env-var Get image tag name from environment variable. If provided this will override value specified in image name argument.
-v | --verbose Verbose output
-r | --region AWS Region
-p | --profile AWS Profile to use
Requirements:
aws: AWS Command Line Interface
jq: Command-line JSON processor
Examples:
Simple deployment of a service (Using env vars for AWS settings):
ecs-run -c production1 -d foo-taskdef -n foo-container -m "sleep,15"
All options:
EOM
exit 2
}
if [ $# == 0 ]; then usage; fi
function _echo {
echo "${PREFIX}$@"
}
function command_to_array {
local plain_array=""
local IFS=' '
for item in $@; do
plain_array=$plain_array\"$item\"
done
echo "[$(echo $plain_array | sed 's/,/","/g')]"
}
# Check requirements
function require {
command -v $1 > /dev/null 2>&1 || {
_echo "Some of the required software is not installed:"
_echo " please install $1" >&2;
exit 1;
}
}
function logs_link {
local service_name=$1
local task_id=$(echo $2 | cut -d / -f 2)
local logs_name=$(echo $service_name | cut -d \- -f 1)
echo "https://eu-west-1.console.aws.amazon.com/cloudwatch/home?region=eu-west-1#logEventViewer:group=awslogs-${logs_name};stream=${service_name}/${service_name}/${task_id}"
}
# Check for AWS, AWS Command Line Interface
require aws
# Check for jq, Command-line JSON processor
require jq
# Setup default values for variables
CLUSTER=false
TASK_DEFINITION=false
CONTAINER_NAME=false
COMMAND=false
VERBOSE=false
TAGVAR=false
AWS_CLI=$(which aws)
if [ -n "$DRY_RUN" ]; then
AWS_ECS="echo $AWS_CLI --output json ecs"
else
AWS_ECS="$AWS_CLI --output json ecs"
fi
DESIRED=1
# Retry the command if the reason of failure is REOURCE:CPU or RESOURCE:MEMORY
MAX_RETRIES=5
RETRY_SLEEP_TIME=60
RETRIES_ACCEPTED_FAILURES=(
RESOURCE:CPU
RESOURCE:MEMORY
)
# Two hours waiting
MAX_WAITER_RETRIES=12
# Loop through arguments, two at a time for key and value
while [[ $# > 0 ]]
do
key="$1"
case $key in
-p|--profile)
AWS_PROFILE="$2"
shift # past argument
;;
--aws-instance-profile)
AWS_IAM_ROLE=true
;;
-c|--cluster)
CLUSTER="$2"
shift # past argument
;;
-n|--container-name)
CONTAINER_NAME="$2"
shift # past argument
;;
-d|--task-definition)
TASK_DEFINITION="$2"
shift
;;
-m|--command)
COMMAND="$(command_to_array $2)"
shift
;;
-r|--region)
AWS_DEFAULT_REGION="$2"
shift # past argument
;;
-D|--desired-count)
DESIRED="$2"
shift
;;
-e|--tag-env-var)
TAGVAR="$2"
shift
;;
-v|--verbose)
VERBOSE=true
shift
;;
-t|--retries)
MAX_WAITER_RETRIES="$2"
shift
;;
*)
_echo "ERROR: $1 is not a valid option"
usage
exit 2
;;
esac
shift # past argument or value
done
if [ $VERBOSE == true ]; then
set -x
fi
if [ $TASK_DEFINITION == false ]; then
_echo "TASK DEFINITON is required. You can pass the value using -d / --task-definiton for a task"
exit 1
fi
if [ $CLUSTER == false ]; then
_echo "CLUSTER is required. You can pass the value using -c or --cluster"
exit 1
fi
if [ $CONTAINER_NAME == false ]; then
_echo "CONTAINER_NAME is required. You can pass the value using -n or --container-name"
exit 1
fi
if [ "$COMMAND" == false ]; then
OVERRIDES="{\"containerOverrides\": [{\"name\": \"$CONTAINER_NAME\"}]}"
else
OVERRIDES='{"containerOverrides": [{"name": "'"$CONTAINER_NAME"'", "command": '"$COMMAND"'}]}'
fi
if [ -z ${AWS_DEFAULT_REGION+x} ]; then
unset AWS_DEFAULT_REGION
else
AWS_ECS="$AWS_ECS --region $AWS_DEFAULT_REGION"
fi
if [ -z ${AWS_PROFILE+x} ]; then
unset AWS_PROFILE
else
AWS_ECS="$AWS_ECS --profile $AWS_PROFILE"
fi
run_task () {
local run_result
run_result=$($AWS_ECS run-task \
--cluster $CLUSTER \
--task-definition $TASK_DEFINITION \
--count $DESIRED \
--overrides "$OVERRIDES")
local returned_value=$?
echo $run_result
return $returned_value
}
DO_RETRY=1
RETRIES=0
set +e
while [ "$RETRIES" -lt $MAX_RETRIES ] && [ $DO_RETRY -eq 1 ]; do
DO_RETRY=0
REASON_FAILURE=''
RUN_TASK=$(run_task)
RUN_TASK_EXIT_CODE=$?
_echo $RUN_TASK
FAILURES=$(echo $RUN_TASK | jq '.failures|length')
if [ $FAILURES -eq 0 ]; then
TASK_ARN=$(echo $RUN_TASK | jq '.tasks[0].taskArn' | sed -e 's/^"//' -e 's/"$//')
WAITER_RETRY=1
WAITER_RETRIES=0
while [ $WAITER_RETRIES -lt $MAX_WAITER_RETRIES ] && [ $WAITER_RETRY -eq 1 ]; do
WAITER_RETRY=0
$AWS_ECS wait tasks-stopped --tasks "$TASK_ARN" --cluster $CLUSTER 2>/dev/null
WAITER_EXIT_CODE=$?
if [ $WAITER_EXIT_CODE -eq 0 ]; then
DESCRIBE_TASKS=$($AWS_ECS describe-tasks --tasks "$TASK_ARN" --cluster $CLUSTER)
EXIT_CODE=$(echo $DESCRIBE_TASKS | jq '.tasks[0].containers[0].exitCode')
if [ $EXIT_CODE -eq 0 ]; then
_echo "ECS task exited successfully"
_echo $(logs_link $CONTAINER_NAME $TASK_ARN)
exit 0
else
_echo "ECS task failed: $DESCRIBE_TASKS"
_echo $(logs_link $CONTAINER_NAME $TASK_ARN)
exit $EXIT_CODE
fi
elif [ $WAITER_EXIT_CODE -eq 255 ]; then
((WAITER_RETRIES++))
WAITER_RETRY=1
if [ $WAITER_RETRIES -eq $MAX_WAITER_RETRIES ]; then
_echo "ECS Waiter max retries reached, $WAITER_RETRIES, exit"
exit 255
fi
_echo "ECS Waiter because timeout, waiter retry $WAITER_RETRIES (don't launch the task other time)"
else
_echo "ECS Waiter failed, status: $WAITER_EXIT_CODE"
_echo $(logs_link $CONTAINER_NAME $TASK_ARN)
exit $WAITER_EXIT_CODE
fi
done
else
REASON_FAILURE=$(echo $RUN_TASK | jq -r '.failures[0].reason')
if [ -n "$REASON_FAILURE" ] && [[ "${RETRIES_ACCEPTED_FAILURES[@]}" =~ $REASON_FAILURE ]]; then
DO_RETRY=1
((RETRIES++))
if [ -n "$REASON_FAILURE" ] && [ $RETRIES -eq $MAX_RETRIES ]; then
_echo "Max RETRIES reached REASON: $REASON_FAILURE RETRIES: $RETRIES"
_echo $(logs_link $CONTAINER_NAME $TASK_ARN)
exit 253
fi
_echo "Retrying in ${RETRY_SLEEP_TIME}s, try number: $RETRIES because: $REASON_FAILURE"
sleep $RETRY_SLEEP_TIME
else
_echo "ECS task failed: $REASON_FAILURE"
_echo $(logs_link $CONTAINER_NAME $TASK_ARN)
exit 1
fi
fi
done
@msmidc
Copy link

msmidc commented Aug 29, 2017

Can I ask how the TAGVAR variable should be used? I am interested in how to override a docker image in run-task command, but I don't see the variable used anywhere.

@rafaelmagu
Copy link

rafaelmagu commented Sep 28, 2017

@Victorcoder why do you retry on those errors (RESOURCE:CPU and RESOURCE:MEMORY)? Is your cluster on an AutoScaling Group?

@guilatrova
Copy link

You need to delete line 143 in order to get --verbose flag working correctly

@sblack4
Copy link

sblack4 commented Apr 15, 2024

This is great. I found it via SO. I updated this script to accommodate Fargate launch types: https://gist.github.com/sblack4/2edc4f64693792b57963d8c99d12e95e

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment