Skip to content

Instantly share code, notes, and snippets.

@brianleroux
Last active August 10, 2017 16:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save brianleroux/385240cfa5047de78bd35d662954b758 to your computer and use it in GitHub Desktop.
Save brianleroux/385240cfa5047de78bd35d662954b758 to your computer and use it in GitHub Desktop.
an example diff between arc and cloudformation
@app
testapp
@html
get /
---
AWSTemplateFormatVersion: '2010-09-09'
Description: 'ECS: cluster, a cloudonaut.io template'
Metadata:
'AWS::CloudFormation::Interface':
ParameterGroups:
- Label:
default: 'Parent Stacks'
Parameters:
- ParentVPCStack
- ParentSSHBastionStack
- ParentAuthProxyStack
- ParentAlertStack
- Label:
default: 'EC2 Parameters'
Parameters:
- KeyName
- IAMUserSSHAccess
- NewRelicLicenseKey
- Label:
default: 'Load Balancer Parameters'
Parameters:
- LoadBalancerScheme
- LoadBalancerCertificateArn
- Label:
default: 'Cluster Parameters'
Parameters:
- SubnetsReach
- InstanceType
- LogsRetentionInDays
- MaxSize
- MinSize
- DesiredCapacity
- DrainingTimeoutInSeconds
Parameters:
ParentVPCStack:
Description: 'Stack name of parent VPC stack based on vpc/vpc-*azs.yaml template.'
Type: String
ParentSSHBastionStack:
Description: 'Optional but recommended stack name of parent SSH bastion host/instance stack based on vpc/vpc-ssh-bastion.yaml template.'
Type: String
Default: ''
ParentAuthProxyStack:
Description: 'Optional stack name of parent auth proxy stack based on security/auth-proxy-*.yaml template.'
Type: String
Default: ''
ParentAlertStack:
Description: 'Optional but recommended stack name of parent alert stack based on operations/alert.yaml template.'
Type: String
Default: ''
KeyName:
Description: 'Optional key pair of the ec2-user to establish a SSH connection to the EC2 instances of the ECS cluster.'
Type: String
Default: ''
IAMUserSSHAccess:
Description: 'Synchronize public keys of IAM users to enable personalized SSH access (Doc: https://cloudonaut.io/manage-aws-ec2-ssh-access-with-iam/).'
Type: String
Default: false
AllowedValues:
- true
- false
NewRelicLicenseKey:
Description: 'Optional New Relic License Key will install the New Relic Servers for Linux agent.'
Type: String
Default: ''
NoEcho: true
SubnetsReach:
Description: 'Should the cluster have direct access to the Internet or do you prefer private subnets with NAT?'
Type: String
Default: Public
AllowedValues:
- Public
- Private
LoadBalancerScheme:
Description: 'Indicates whether the load balancer in front of the ECS cluster is internet-facing or internal.'
Type: String
Default: 'internet-facing'
AllowedValues:
- 'internet-facing'
- internal
LoadBalancerCertificateArn:
Description: 'Optional Amazon Resource Name (ARN) of the certificate to associate with the load balancer.'
Type: String
Default: ''
InstanceType:
Description: 'The instance type of the EC2 instances of the ECS cluster.'
Type: String
Default: 't2.micro'
LogsRetentionInDays:
Description: 'Specifies the number of days you want to retain log events in the specified log group.'
Type: Number
Default: 14
AllowedValues: [1, 3, 5, 7, 14, 30, 60, 90, 120, 150, 180, 365, 400, 545, 731, 1827, 3653]
MaxSize:
Description: 'The maximum size of the Auto Scaling group.'
Type: Number
Default: 4
ConstraintDescription: 'Must be >= 1'
MinValue: 1
MinSize:
Description: 'The minimum size of the Auto Scaling group.'
Type: Number
Default: 2
ConstraintDescription: 'Must be >= 1'
MinValue: 1
DesiredCapacity:
Description: 'The desired start size of the Auto Scaling group.'
Type: Number
Default: 2
ConstraintDescription: 'Must be >= 1'
MinValue: 1
DrainingTimeoutInSeconds:
Description: 'Maximum time in seconds an EC2 instance waits when terminating until all containers are moved to another EC2 instance (draining).'
Type: Number
Default: 60
ConstraintDescription: 'Must be in the range [60-7200]'
MinValue: 60
MaxValue: 7200
Mappings:
RegionMap:
'eu-west-2':
ECSAMI: 'ami-ff15039b'
'eu-west-1':
ECSAMI: 'ami-809f84e6'
'ap-northeast-1':
ECSAMI: 'ami-e4657283'
'ca-central-1':
ECSAMI: 'ami-3da81759'
'ap-southeast-1':
ECSAMI: 'ami-19f7787a'
'ap-southeast-2':
ECSAMI: 'ami-42e9f921'
'eu-central-1':
ECSAMI: 'ami-a3a006cc'
'us-east-1':
ECSAMI: 'ami-04351e12'
'us-east-2':
ECSAMI: 'ami-207b5a45'
'us-west-1':
ECSAMI: 'ami-7d664a1d'
'us-west-2':
ECSAMI: 'ami-57d9cd2e'
Conditions:
HasKeyName: !Not [!Equals [!Ref KeyName, '']]
HasIAMUserSSHAccess: !Equals [!Ref IAMUserSSHAccess, 'true']
HasSSHBastionSecurityGroup: !Not [!Equals [!Ref ParentSSHBastionStack, '']]
HasNotSSHBastionSecurityGroup: !Equals [!Ref ParentSSHBastionStack, '']
HasAuthProxySecurityGroup: !Not [!Equals [!Ref ParentAuthProxyStack, '']]
HasNotAuthProxySecurityGroup: !Equals [!Ref ParentAuthProxyStack, '']
HasNewRelic: !Not [!Equals [!Ref NewRelicLicenseKey, '']]
HasLoadBalancerSchemeInternal: !Equals [!Ref LoadBalancerScheme, 'internal']
HasLoadBalancerCertificateArn: !Not [!Equals [!Ref LoadBalancerCertificateArn, '']]
HasAuthProxySecurityGroupAndLoadBalancerCertificateArn: !And [!Condition HasAuthProxySecurityGroup, !Condition HasLoadBalancerCertificateArn]
HasNotAuthProxySecurityGroupAndLoadBalancerCertificateArn: !And [!Condition HasNotAuthProxySecurityGroup, !Condition HasLoadBalancerCertificateArn]
HasAlertTopic: !Not [!Equals [!Ref ParentAlertStack, '']]
Resources:
Cluster:
Type: 'AWS::ECS::Cluster'
Properties: {}
LogGroup:
Type: 'AWS::Logs::LogGroup'
Properties:
RetentionInDays: !Ref LogsRetentionInDays
InstanceProfile:
Type: 'AWS::IAM::InstanceProfile'
Properties:
Path: '/'
Roles:
- !Ref Role
Role:
Type: 'AWS::IAM::Role'
Properties:
AssumeRolePolicyDocument:
Version: '2012-10-17'
Statement:
- Effect: Allow
Principal:
Service: 'ec2.amazonaws.com'
Action: 'sts:AssumeRole'
Path: '/'
Policies:
- PolicyName: ecs
PolicyDocument:
Version: '2012-10-17'
Statement:
- Effect: Allow
Action:
- 'ecs:DiscoverPollEndpoint'
Resource: '*'
- PolicyName: ecs-cluster
PolicyDocument:
Version: '2012-10-17'
Statement:
- Effect: Allow
Action:
- 'ecs:DeregisterContainerInstance'
- 'ecs:RegisterContainerInstance'
- 'ecs:SubmitContainerStateChange'
- 'ecs:SubmitTaskStateChange'
- 'ecs:ListContainerInstances'
Resource: !Sub 'arn:aws:ecs:${AWS::Region}:${AWS::AccountId}:cluster/${Cluster}'
- PolicyName: ecs-cluster-instance
PolicyDocument:
Version: '2012-10-17'
Statement:
- Effect: Allow
Action:
- 'ecs:Poll'
- 'ecs:StartTelemetrySession'
- 'ecs:UpdateContainerInstancesState'
- 'ecs:ListTasks'
- 'ecs:DescribeContainerInstances'
Resource: !Sub 'arn:aws:ecs:${AWS::Region}:${AWS::AccountId}:container-instance/*'
Condition:
'StringEquals':
'ecs:cluster':
!Sub 'arn:aws:ecs:${AWS::Region}:${AWS::AccountId}:cluster/${Cluster}'
- PolicyName: ecr
PolicyDocument:
Version: '2012-10-17'
Statement:
- Effect: Allow
Action:
- 'ecr:GetAuthorizationToken'
- 'ecr:BatchCheckLayerAvailability'
- 'ecr:GetDownloadUrlForLayer'
- 'ecr:BatchGetImage'
Resource: '*'
- PolicyName: logs
PolicyDocument:
Version: '2012-10-17'
Statement:
- Effect: Allow
Action:
- 'logs:CreateLogGroup'
- 'logs:CreateLogStream'
- 'logs:PutLogEvents'
- 'logs:DescribeLogStreams'
Resource: 'arn:aws:logs:*:*:*'
- PolicyName: autoscaling
PolicyDocument:
Version: '2012-10-17'
Statement:
- Sid: write
Effect: Allow
Action: 'autoscaling:CompleteLifecycleAction'
Resource: '*'
- PolicyName: sqs
PolicyDocument:
Version: '2012-10-17'
Statement:
- Sid: write
Effect: Allow
Action:
- 'sqs:DeleteMessage'
- 'sqs:ReceiveMessage'
Resource: !GetAtt 'AutoScalingGroupLifecycleHookQueue.Arn'
IAMPolicySSHAccess:
Type: 'AWS::IAM::Policy'
Condition: HasIAMUserSSHAccess
Properties:
Roles:
- !Ref Role
PolicyName: 'iam-ssh'
PolicyDocument:
Version: '2012-10-17'
Statement:
- Effect: Allow
Action:
- 'iam:ListUsers'
Resource:
- '*'
- Effect: Allow
Action:
- 'iam:ListSSHPublicKeys'
- 'iam:GetSSHPublicKey'
Resource:
- !Sub 'arn:aws:iam::${AWS::AccountId}:user/*'
ALBSecurityGroup:
Type: 'AWS::EC2::SecurityGroup'
Properties:
GroupDescription: 'ecs-cluster-alb'
VpcId:
'Fn::ImportValue': !Sub '${ParentVPCStack}-VPC'
ALBSecurityGroupInHttpWorld:
Type: 'AWS::EC2::SecurityGroupIngress'
Condition: HasNotAuthProxySecurityGroup
Properties:
GroupId: !Ref ALBSecurityGroup
IpProtocol: tcp
FromPort: 80
ToPort: 80
CidrIp: '0.0.0.0/0'
ALBSecurityGroupInHttpsWorld:
Type: 'AWS::EC2::SecurityGroupIngress'
Condition: HasNotAuthProxySecurityGroupAndLoadBalancerCertificateArn
Properties:
GroupId: !Ref ALBSecurityGroup
IpProtocol: tcp
FromPort: 443
ToPort: 443
CidrIp: '0.0.0.0/0'
ALBSecurityGroupInHttpAuthProxy:
Type: 'AWS::EC2::SecurityGroupIngress'
Condition: HasAuthProxySecurityGroup
Properties:
GroupId: !Ref ALBSecurityGroup
IpProtocol: tcp
FromPort: 80
ToPort: 80
SourceSecurityGroupId:
'Fn::ImportValue': !Sub '${ParentAuthProxyStack}-SecurityGroup'
ALBSecurityGroupInHttpsAuthProxy:
Type: 'AWS::EC2::SecurityGroupIngress'
Condition: HasAuthProxySecurityGroupAndLoadBalancerCertificateArn
Properties:
GroupId: !Ref ALBSecurityGroup
IpProtocol: tcp
FromPort: 443
ToPort: 443
SourceSecurityGroupId:
'Fn::ImportValue': !Sub '${ParentAuthProxyStack}-SecurityGroup'
SecurityGroup:
Type: 'AWS::EC2::SecurityGroup'
Properties:
GroupDescription: 'ecs-cluster'
VpcId:
'Fn::ImportValue': !Sub '${ParentVPCStack}-VPC'
SecurityGroupIngress:
- SourceSecurityGroupId: !Ref ALBSecurityGroup
FromPort: 0
ToPort: 65535
IpProtocol: tcp
SecurityGroupInSSHBastion:
Type: 'AWS::EC2::SecurityGroupIngress'
Condition: HasSSHBastionSecurityGroup
Properties:
GroupId: !Ref SecurityGroup
IpProtocol: tcp
FromPort: 22
ToPort: 22
SourceSecurityGroupId:
'Fn::ImportValue': !Sub '${ParentSSHBastionStack}-SecurityGroup'
SecurityGroupInSSHWorld:
Type: 'AWS::EC2::SecurityGroupIngress'
Condition: HasNotSSHBastionSecurityGroup
Properties:
GroupId: !Ref SecurityGroup
IpProtocol: tcp
FromPort: 22
ToPort: 22
CidrIp: '0.0.0.0/0'
HTTPCodeELB5XXTooHighAlarm:
Condition: HasAlertTopic
Type: 'AWS::CloudWatch::Alarm'
Properties:
AlarmDescription: 'Application load balancer returns 5XX HTTP status codes'
Namespace: 'AWS/ApplicationELB'
MetricName: HTTPCode_ELB_5XX_Count
Statistic: Sum
Period: 60
EvaluationPeriods: 1
ComparisonOperator: GreaterThanThreshold
Threshold: 0
AlarmActions:
- 'Fn::ImportValue': !Sub '${ParentAlertStack}-TopicARN'
Dimensions:
- Name: LoadBalancer
Value: !GetAtt LoadBalancer.LoadBalancerFullName
HTTPCodeTarget5XXTooHighAlarm:
Condition: HasAlertTopic
Type: 'AWS::CloudWatch::Alarm'
Properties:
AlarmDescription: 'Application load balancer receives 5XX HTTP status codes from targets'
Namespace: 'AWS/ApplicationELB'
MetricName: HTTPCode_Target_5XX_Count
Statistic: Sum
Period: 60
EvaluationPeriods: 1
ComparisonOperator: GreaterThanThreshold
Threshold: 0
AlarmActions:
- 'Fn::ImportValue': !Sub '${ParentAlertStack}-TopicARN'
Dimensions:
- Name: LoadBalancer
Value: !GetAtt LoadBalancer.LoadBalancerFullName
LoadBalancer:
Type: 'AWS::ElasticLoadBalancingV2::LoadBalancer'
Properties:
Scheme: !Ref LoadBalancerScheme
SecurityGroups:
- !Ref ALBSecurityGroup
Subnets: !If
- HasLoadBalancerSchemeInternal
- !Split
- ','
- 'Fn::ImportValue':
!Sub '${ParentVPCStack}-SubnetsPrivate'
- !Split
- ','
- 'Fn::ImportValue':
!Sub '${ParentVPCStack}-SubnetsPublic'
DefaultTargetGroup: # this is used as the fall-back target group and is used to health checking the ECS agent. Services use their own ListenerRules to accept traffic based on path prefixes.
Type: 'AWS::ElasticLoadBalancingV2::TargetGroup'
Properties:
HealthCheckIntervalSeconds: 15
HealthCheckPort: 51678
HealthCheckPath: '/'
HealthCheckProtocol: HTTP
HealthCheckTimeoutSeconds: 10
HealthyThresholdCount: 2
Matcher:
HttpCode: '200'
Port: 80 # port 80 is a reserved port that is never used by the agent, so we are safe to use it here
Protocol: HTTP
VpcId:
'Fn::ImportValue': !Sub '${ParentVPCStack}-VPC'
HttpListener:
Type: 'AWS::ElasticLoadBalancingV2::Listener'
Properties:
DefaultActions:
- TargetGroupArn: !Ref DefaultTargetGroup
Type: forward
LoadBalancerArn: !Ref LoadBalancer
Port: 80
Protocol: HTTP
HttpsListener:
Type: 'AWS::ElasticLoadBalancingV2::Listener'
Condition: HasLoadBalancerCertificateArn
Properties:
Certificates:
- CertificateArn: !Ref LoadBalancerCertificateArn
DefaultActions:
- TargetGroupArn: !Ref DefaultTargetGroup
Type: forward
LoadBalancerArn: !Ref LoadBalancer
Port: 443
Protocol: HTTPS
LaunchConfiguration:
Type: 'AWS::AutoScaling::LaunchConfiguration'
Metadata:
'AWS::CloudFormation::Init':
configSets:
default:
!If
- HasNewRelic
- !If [HasIAMUserSSHAccess, [awslogs, ssh-access, install, newrelic], [awslogs, install, newrelic]]
- !If [HasIAMUserSSHAccess, [awslogs, ssh-access, install], [awslogs, install]]
awslogs:
packages:
yum:
awslogs: []
files:
'/etc/awslogs/awscli.conf':
content: !Sub |
[default]
region = ${AWS::Region}
[plugins]
cwlogs = cwlogs
mode: '000644'
owner: root
group: root
'/etc/awslogs/awslogs.conf':
content: !Sub |
[general]
state_file = /var/lib/awslogs/agent-state
[/var/log/messages]
datetime_format = %b %d %H:%M:%S
file = /var/log/messages
log_stream_name = {instance_id}/var/log/messages
log_group_name = ${LogGroup}
[/var/log/secure]
datetime_format = %b %d %H:%M:%S
file = /var/log/secure
log_stream_name = {instance_id}/var/log/secure
log_group_name = ${LogGroup}
[/var/log/cron]
datetime_format = %b %d %H:%M:%S
file = /var/log/cron
log_stream_name = {instance_id}/var/log/cron
log_group_name = ${LogGroup}
[/var/log/cloud-init.log]
datetime_format = %b %d %H:%M:%S
file = /var/log/cloud-init.log
log_stream_name = {instance_id}/var/log/cloud-init.log
log_group_name = ${LogGroup}
[/var/log/cfn-init.log]
datetime_format = %Y-%m-%d %H:%M:%S
file = /var/log/cfn-init.log
log_stream_name = {instance_id}/var/log/cfn-init.log
log_group_name = ${LogGroup}
[/var/log/cfn-hup.log]
datetime_format = %Y-%m-%d %H:%M:%S
file = /var/log/cfn-hup.log
log_stream_name = {instance_id}/var/log/cfn-hup.log
log_group_name = ${LogGroup}
[/var/log/cfn-init-cmd.log]
datetime_format = %Y-%m-%d %H:%M:%S
file = /var/log/cfn-init-cmd.log
log_stream_name = {instance_id}/var/log/cfn-init-cmd.log
log_group_name = ${LogGroup}
[/var/log/cloud-init-output.log]
file = /var/log/cloud-init-output.log
log_stream_name = {instance_id}/var/log/cloud-init-output.log
log_group_name = ${LogGroup}
[/var/log/dmesg]
file = /var/log/dmesg
log_stream_name = {instance_id}/var/log/dmesg
log_group_name = ${LogGroup}
[/var/log/newrelic/nrsysmond.log]
datetime_format = %Y-%m-%d %H:%M:%S
file = /var/log/newrelic/nrsysmond.log
log_stream_name = {instance_id}/var/log/newrelic/nrsysmond.log
log_group_name = ${LogGroup}
mode: '000644'
owner: root
group: root
services:
sysvinit:
awslogs:
enabled: true
ensureRunning: true
packages:
yum:
- awslogs
files:
- '/etc/awslogs/awslogs.conf'
- '/etc/awslogs/awscli.conf'
newrelic:
packages:
rpm:
newrelic-sysmond: 'https://download.newrelic.com/pub/newrelic/el5/x86_64/newrelic-sysmond-2.3.0.132-1.x86_64.rpm'
files:
'/etc/newrelic/nrsysmond.cfg':
content: !Sub |
license_key=${NewRelicLicenseKey}
loglevel=info
logfile=/var/log/newrelic/nrsysmond.log
labels=StackName:${AWS::StackName};AccountId:${AWS::AccountId};Region:${AWS::Region}
mode: '000640'
owner: root
group: newrelic
services:
sysvinit:
newrelic-sysmond:
enabled: true
ensureRunning: true
files:
- '/etc/newrelic/nrsysmond.cfg'
ssh-access:
packages:
yum:
'aws-cli': []
files:
'/opt/authorized_keys_command.sh':
content: |
#!/bin/bash -e
if [ -z "$1" ]; then
exit 1
fi
SaveUserName="$1"
SaveUserName=${SaveUserName//"+"/".plus."}
SaveUserName=${SaveUserName//"="/".equal."}
SaveUserName=${SaveUserName//","/".comma."}
SaveUserName=${SaveUserName//"@"/".at."}
aws iam list-ssh-public-keys --user-name "$SaveUserName" --query "SSHPublicKeys[?Status == 'Active'].[SSHPublicKeyId]" --output text | while read KeyId; do
aws iam get-ssh-public-key --user-name "$SaveUserName" --ssh-public-key-id "$KeyId" --encoding SSH --query "SSHPublicKey.SSHPublicKeyBody" --output text
done
mode: '000755'
owner: root
group: root
'/opt/import_users.sh':
content: |
#!/bin/bash -e
aws iam list-users --query "Users[].[UserName]" --output text | while read User; do
SaveUserName="$User"
SaveUserName=${SaveUserName//"+"/".plus."}
SaveUserName=${SaveUserName//"="/".equal."}
SaveUserName=${SaveUserName//","/".comma."}
SaveUserName=${SaveUserName//"@"/".at."}
if [ "${#SaveUserName}" -le "32" ]; then
if ! id -u "$SaveUserName" >/dev/null 2>&1; then
#sudo will read each file in /etc/sudoers.d, skipping file names that end in ‘~’ or contain a ‘.’ character to avoid causing problems with package manager or editor temporary/backup files.
SaveUserFileName=$(echo "$SaveUserName" | tr "." " ")
/usr/sbin/useradd "$SaveUserName"
echo "$SaveUserName ALL=(ALL) NOPASSWD:ALL" > "/etc/sudoers.d/$SaveUserFileName"
fi
else
echo "Can not import IAM user ${SaveUserName}. User name is longer than 32 characters."
fi
done
mode: '000755'
owner: root
group: root
'/etc/cron.d/import_users':
content: |
*/10 * * * * root /opt/import_users.sh
mode: '000644'
owner: root
group: root
commands:
'a_configure_sshd_command':
command: 'sed -i "s:#AuthorizedKeysCommand none:AuthorizedKeysCommand /opt/authorized_keys_command.sh:g" /etc/ssh/sshd_config'
'b_configure_sshd_commanduser':
command: 'sed -i "s:#AuthorizedKeysCommandUser nobody:AuthorizedKeysCommandUser nobody:g" /etc/ssh/sshd_config'
'c_import_users':
command: './import_users.sh'
cwd: '/opt'
services:
sysvinit:
sshd:
enabled: true
ensureRunning: true
commands:
- 'a_configure_sshd_command'
- 'b_configure_sshd_commanduser'
install:
packages:
yum:
ruby: []
rubygems:
'aws-sdk':
- '2.7.10'
daemons:
- '1.2.3'
files:
'/etc/cfn/cfn-hup.conf':
content: !Sub |
[main]
stack=${AWS::StackId}
region=${AWS::Region}
interval=1
mode: '000400'
owner: root
group: root
'/etc/cfn/hooks.d/cfn-auto-reloader.conf':
content: !Sub |
[cfn-auto-reloader-hook]
triggers=post.update
path=Resources.LaunchConfiguration.Metadata.AWS::CloudFormation::Init
action=/opt/aws/bin/cfn-init --verbose --stack=${AWS::StackName} --region=${AWS::Region} --resource=LaunchConfiguration
runas=root
'/etc/init.d/lifecycle-poller':
content: |
#!/usr/bin/env ruby
# chkconfig: - 80 20
APP_NAME = 'lifecycle-poller'
APP_PATH = '/opt/lifecycle-poller/daemon.rb'
case ARGV.first
when 'start'
puts "Starting #{APP_NAME}..."
system(APP_PATH, 'start')
exit($?.exitstatus)
when 'stop'
system(APP_PATH, 'stop')
exit($?.exitstatus)
when 'restart'
system(APP_PATH, 'restart')
exit($?.exitstatus)
when 'status'
system(APP_PATH, 'status')
exit($?.exitstatus)
end
unless %w{start stop restart status}.include? ARGV.first
puts "Usage: #{APP_NAME} {start|stop|restart|status}"
exit(1)
end
mode: '000755'
owner: root
group: root
'/opt/lifecycle-poller/poller.conf':
content: !Sub |
region: ${AWS::Region}
cluster: ${Cluster}
queueUrl: ${AutoScalingGroupLifecycleHookQueue}
maxWaitInSeconds: ${DrainingTimeoutInSeconds}
mode: '000400'
owner: root
group: root
'/opt/lifecycle-poller/daemon.rb':
content: |
#!/usr/bin/env ruby
require 'daemons'
Daemons.run(__dir__ + '/worker.rb', {:monitor => true})
mode: '000500'
owner: root
group: root
'/opt/lifecycle-poller/worker.rb':
content: |
#!/usr/bin/env ruby
require 'net/http'
require 'aws-sdk'
require 'json'
require 'uri'
require 'yaml'
require 'syslog/logger'
$log = Syslog::Logger.new 'poller'
$conf = YAML::load_file(__dir__ + '/poller.conf')
Aws.config.update(region: $conf['region'])
$log.info 'poller started'
def fetchContainerInstanceId(ec2InstanceId)
ecs = Aws::ECS::Client.new()
resp1 = ecs.list_container_instances({
cluster: $conf['cluster']
})
resp2 = ecs.describe_container_instances({
cluster: $conf['cluster'],
container_instances: resp1.container_instance_arns,
})
cis = resp2.container_instances.select {|ci| ci.ec2_instance_id == ec2InstanceId}
return cis.first().container_instance_arn.split("/").last()
end
def drainContainerInstanceId(containerInstanceId)
ecs = Aws::ECS::Client.new()
ecs.update_container_instances_state({
cluster: $conf['cluster'],
container_instances: [containerInstanceId],
status: "DRAINING"
})
end
def isContainerInstanceIdle(containerInstanceId)
ecs = Aws::ECS::Client.new()
resp = ecs.list_tasks({
cluster: $conf['cluster'],
container_instance: containerInstanceId
})
return resp.task_arns.empty?
end
def awaitContainerInstanceIdle(containerInstanceId)
endTime = Time.now.to_i + $conf['maxWaitInSeconds']
while Time.now.to_i < endTime do
if isContainerInstanceIdle containerInstanceId
$log.info "container instance #{containerInstanceId} is idle"
return true
end
sleep 5 # seconds
end
$log.error "container instance #{containerInstanceId} is not idle, but wait time elapsed"
return false
end
def deregisterContainerInstance(containerInstanceId)
ecs = Aws::ECS::Client.new()
resp = ecs.deregister_container_instance({
cluster: $conf['cluster'],
container_instance: containerInstanceId,
force: false
})
end
def completeLifecycleAction(token, hook, asg)
autoscaling = Aws::AutoScaling::Client.new()
autoscaling.complete_lifecycle_action(
lifecycle_hook_name: hook,
auto_scaling_group_name: asg,
lifecycle_action_token: token,
lifecycle_action_result: 'CONTINUE'
)
end
def pollSQS()
poller = Aws::SQS::QueuePoller.new($conf['queueUrl'])
poller.poll do |msg|
body = JSON.parse(msg.body)
$log.debug "message #{body}"
if body['Event'] == 'autoscaling:TEST_NOTIFICATION'
$log.info 'received test notification'
else
if body['LifecycleTransition'] == 'autoscaling:EC2_INSTANCE_TERMINATING'
$log.info "lifecycle transition for EC2 instance #{body['EC2InstanceId']}"
containerInstanceId = fetchContainerInstanceId body['EC2InstanceId']
$log.info "lifecycle transition for container instance #{containerInstanceId}"
drainContainerInstanceId containerInstanceId
awaitContainerInstanceIdle containerInstanceId
deregisterContainerInstance containerInstanceId
completeLifecycleAction body['LifecycleActionToken'], body['LifecycleHookName'], body['AutoScalingGroupName']
else
$log.error "received unsupported lifecycle transition: #{body['LifecycleTransition']}"
end
end
end
end
pollSQS
mode: '000500'
owner: root
group: root
services:
sysvinit:
cfn-hup:
enabled: true
ensureRunning: true
files:
- '/etc/cfn/cfn-hup.conf'
- '/etc/cfn/hooks.d/cfn-auto-reloader.conf'
lifecycle-poller:
enabled: true
ensureRunning: true
files:
- '/etc/init.d/lifecycle-poller'
- '/opt/lifecycle-poller/poller.conf'
- '/opt/lifecycle-poller/daemon.rb'
- '/opt/lifecycle-poller/worker.rb'
Properties:
ImageId: !FindInMap [RegionMap, !Ref 'AWS::Region', ECSAMI]
IamInstanceProfile: !Ref InstanceProfile
InstanceType: !Ref InstanceType
SecurityGroups:
- !Ref SecurityGroup
KeyName: !If [HasKeyName, !Ref KeyName, !Ref 'AWS::NoValue']
UserData:
'Fn::Base64': !Sub |
#!/bin/bash -x
bash -ex << "TRY"
echo "ECS_CLUSTER=${Cluster}" >> /etc/ecs/ecs.config
yum install -y aws-cfn-bootstrap
/opt/aws/bin/cfn-init -v --stack ${AWS::StackName} --resource LaunchConfiguration --region ${AWS::Region}
TRY
/opt/aws/bin/cfn-signal -e $? --stack ${AWS::StackName} --resource AutoScalingGroup --region ${AWS::Region}
AutoScalingGroup:
Type: 'AWS::AutoScaling::AutoScalingGroup'
Properties:
LaunchConfigurationName: !Ref LaunchConfiguration
MinSize: !Ref MinSize
MaxSize: !Ref MaxSize
DesiredCapacity: !Ref DesiredCapacity
HealthCheckGracePeriod: 300
HealthCheckType: ELB
TargetGroupARNs:
- !Ref DefaultTargetGroup
NotificationConfigurations: !If
- HasAlertTopic
- - NotificationTypes:
- 'autoscaling:EC2_INSTANCE_LAUNCH_ERROR'
- 'autoscaling:EC2_INSTANCE_TERMINATE_ERROR'
TopicARN:
'Fn::ImportValue': !Sub '${ParentAlertStack}-TopicARN'
- !Ref 'AWS::NoValue'
VPCZoneIdentifier: !Split
- ','
- 'Fn::ImportValue':
!Sub '${ParentVPCStack}-Subnets${SubnetsReach}'
Tags:
- Key: Name
Value: 'ecs-cluster'
PropagateAtLaunch: true
CreationPolicy:
ResourceSignal:
Timeout: PT15M
UpdatePolicy:
AutoScalingRollingUpdate:
PauseTime: PT15M
WaitOnResourceSignals: true
AutoScalingGroupLifecycleHookQueue:
Type: 'AWS::SQS::Queue'
Properties:
QueueName: !Sub '${AWS::StackName}-lifecycle-hook'
VisibilityTimeout: !Ref DrainingTimeoutInSeconds
RedrivePolicy:
deadLetterTargetArn: !GetAtt 'AutoScalingGroupLifecycleHookDeadLetterQueue.Arn'
maxReceiveCount: 5
AutoScalingGroupLifecycleHookDeadLetterQueue:
Type: 'AWS::SQS::Queue'
Properties:
QueueName: !Sub '${AWS::StackName}-lifecycle-hook-dlq'
AutoScalingGroupLifecycleHookIAMRole:
Type: 'AWS::IAM::Role'
Properties:
AssumeRolePolicyDocument:
Version: '2012-10-17'
Statement:
- Effect: Allow
Principal:
Service:
- 'autoscaling.amazonaws.com'
Action:
- 'sts:AssumeRole'
Path: '/'
Policies:
- PolicyName: sqs
PolicyDocument:
Version: '2012-10-17'
Statement:
- Sid: write
Effect: Allow
Action:
- 'sqs:SendMessage'
- 'sqs:GetQueueUrl'
Resource: !GetAtt 'AutoScalingGroupLifecycleHookQueue.Arn'
AutoScalingGroupTerminatingLifecycleHook:
Type: 'AWS::AutoScaling::LifecycleHook'
Properties:
HeartbeatTimeout: !Ref DrainingTimeoutInSeconds
DefaultResult: CONTINUE
AutoScalingGroupName: !Ref AutoScalingGroup
LifecycleTransition: 'autoscaling:EC2_INSTANCE_TERMINATING'
NotificationTargetARN: !GetAtt 'AutoScalingGroupLifecycleHookQueue.Arn'
RoleARN: !GetAtt 'AutoScalingGroupLifecycleHookIAMRole.Arn'
ScaleUpPolicy:
Type: 'AWS::AutoScaling::ScalingPolicy'
Properties:
AdjustmentType: PercentChangeInCapacity
MinAdjustmentStep: 1
AutoScalingGroupName: !Ref AutoScalingGroup
Cooldown: 300
ScalingAdjustment: 25
ScaleDownPolicy:
Type: 'AWS::AutoScaling::ScalingPolicy'
Properties:
AdjustmentType: PercentChangeInCapacity
MinAdjustmentStep: 1
AutoScalingGroupName: !Ref AutoScalingGroup
Cooldown: 300
ScalingAdjustment: -25
CPUReservationHighAlarm:
Type: 'AWS::CloudWatch::Alarm'
Properties:
AlarmDescription: 'Cluster is running out of CPU (reservation)'
Namespace: 'AWS/ECS'
Dimensions:
- Name: ClusterName
Value: !Ref Cluster
MetricName: CPUReservation
ComparisonOperator: GreaterThanThreshold
Statistic: Average # special rule because we scale on reservations and not utilization
Period: 60
EvaluationPeriods: 1
Threshold: 80
AlarmActions:
- !Ref ScaleUpPolicy
CPUReservationTooHighAlarm:
Condition: HasAlertTopic
Type: 'AWS::CloudWatch::Alarm'
Properties:
AlarmDescription: 'Average CPU reservation over last 10 minutes higher than 90%'
Namespace: 'AWS/ECS'
Dimensions:
- Name: ClusterName
Value: !Ref Cluster
MetricName: CPUReservation
ComparisonOperator: GreaterThanThreshold
Statistic: Average # special rule because we scale on reservations and not utilization
Period: 600
EvaluationPeriods: 1
Threshold: 90
AlarmActions:
- 'Fn::ImportValue': !Sub '${ParentAlertStack}-TopicARN'
CPUUtilizationTooHighAlarm:
Condition: HasAlertTopic
Type: 'AWS::CloudWatch::Alarm'
Properties:
AlarmDescription: 'Average CPU utilization over last 10 minutes higher than 80%'
Namespace: 'AWS/ECS'
Dimensions:
- Name: ClusterName
Value: !Ref Cluster
MetricName: CPUUtilization
ComparisonOperator: GreaterThanThreshold
Statistic: Average
Period: 600
EvaluationPeriods: 1
Threshold: 80
AlarmActions:
- 'Fn::ImportValue': !Sub '${ParentAlertStack}-TopicARN'
MemoryReservationHighAlarm:
Type: 'AWS::CloudWatch::Alarm'
Properties:
AlarmDescription: 'Cluster is running out of memory (reservation)'
Namespace: 'AWS/ECS'
Dimensions:
- Name: ClusterName
Value: !Ref Cluster
MetricName: MemoryReservation
ComparisonOperator: GreaterThanThreshold
Statistic: Average # special rule because we scale on reservations and not utilization
Period: 60
EvaluationPeriods: 1
Threshold: 80
AlarmActions:
- !Ref ScaleUpPolicy
MemoryReservationTooHighAlarm:
Condition: HasAlertTopic
Type: 'AWS::CloudWatch::Alarm'
Properties:
AlarmDescription: 'Average memory reservation over last 10 minutes higher than 90%'
Namespace: 'AWS/ECS'
Dimensions:
- Name: ClusterName
Value: !Ref Cluster
MetricName: MemoryReservation
ComparisonOperator: GreaterThanThreshold
Statistic: Average # special rule because we scale on reservations and not utilization
Period: 600
EvaluationPeriods: 1
Threshold: 90
AlarmActions:
- 'Fn::ImportValue': !Sub '${ParentAlertStack}-TopicARN'
CPUReservationLowAlarm:
Type: 'AWS::CloudWatch::Alarm'
Properties:
AlarmDescription: 'Cluster is wasting CPU (reservation)'
Namespace: 'AWS/ECS'
Dimensions:
- Name: ClusterName
Value: !Ref Cluster
MetricName: CPUReservation
ComparisonOperator: LessThanThreshold
Statistic: Average # special rule because we scale on reservations and not utilization
Period: 60
EvaluationPeriods: 1
Threshold: 20
AlarmActions:
- !Ref ScaleDownPolicy
MemoryReservationLowAlarm:
Type: 'AWS::CloudWatch::Alarm'
Properties:
AlarmDescription: 'Cluster is wasting memory (reservation)'
Namespace: 'AWS/ECS'
Dimensions:
- Name: ClusterName
Value: !Ref Cluster
MetricName: MemoryReservation
ComparisonOperator: LessThanThreshold
Statistic: Average # special rule because we scale on reservations and not utilization
Period: 60
EvaluationPeriods: 1
Threshold: 20
AlarmActions:
- !Ref ScaleDownPolicy
Outputs:
TemplateID:
Description: 'cloudonaut.io template id'
Value: 'ecs/cluster'
StackName:
Description: 'Stack name'
Value: !Sub '${AWS::StackName}'
Cluster:
Description: 'ECS cluster.'
Value: !Ref Cluster
Export:
Name: !Sub '${AWS::StackName}-Cluster'
HttpListener:
Description: 'ALB HTTP listener for services.'
Value: !Ref HttpListener
Export:
Name: !Sub '${AWS::StackName}-HttpListener'
HttpsListener:
Condition: HasLoadBalancerCertificateArn
Description: 'ALB HTTPS listener for services.'
Value: !Ref HttpsListener
Export:
Name: !Sub '${AWS::StackName}-HttpsListener'
VPC:
Description: 'VPC of ECS cluster.'
Value:
'Fn::ImportValue': !Sub '${ParentVPCStack}-VPC'
Export:
Name: !Sub '${AWS::StackName}-VPC'
SecurityGroup:
Description: 'Security Group of ECS cluster.'
Value: !Ref SecurityGroup
Export:
Name: !Sub '${AWS::StackName}-SecurityGroup'
LogGroup:
Description: 'Log group of ECS cluster.'
Value: !Ref LogGroup
Export:
Name: !Sub '${AWS::StackName}-LogGroup'
DNSName:
Description: 'The DNS name for the ECS cluster load balancer.'
Value: !GetAtt 'LoadBalancer.DNSName'
Export:
Name: !Sub '${AWS::StackName}-DNSName'
URL:
Description: 'URL to the ECS cluster.'
Value: !Sub 'http://${LoadBalancer.DNSName}'
Export:
Name: !Sub '${AWS::StackName}-URL'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment