Skip to content

Instantly share code, notes, and snippets.

@delijati
Forked from schlarpc/ecs.py
Created May 14, 2020 14:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save delijati/65ed6ae2b7a799075cd3ca28ddb5338a to your computer and use it in GitHub Desktop.
Save delijati/65ed6ae2b7a799075cd3ca28ddb5338a to your computer and use it in GitHub Desktop.
from troposphere import \
AWSHelperFn, Base64, Cidr, Condition, Equals, GetAtt, Join, Not, Output, Parameter, Ref, \
Region, Select, Split, StackName, Sub, Tags, Template
from troposphere.autoscaling import \
AutoScalingGroup, LaunchTemplateSpecification, LifecycleHookSpecification
from troposphere.awslambda import \
Code, Function, Permission
from troposphere.ec2 import \
CreditSpecification, IamInstanceProfile, InternetGateway, LaunchTemplate, LaunchTemplateData, \
Route, RouteTable, SecurityGroup, SecurityGroupRule, Subnet, SubnetRouteTableAssociation, VPC, \
VPCGatewayAttachment
from troposphere.ecs import \
Cluster, ContainerDefinition, DeploymentConfiguration, LoadBalancer as ServiceLoadBalancer, \
LogConfiguration, PortMapping, Service, TaskDefinition
from troposphere.elasticloadbalancingv2 import \
Action as ListenerAction, Listener, LoadBalancer, LoadBalancerAttributes, TargetGroup, \
TargetGroupAttribute
from troposphere.iam import \
InstanceProfile, Policy as NamedPolicy, Role
from troposphere.logs import \
LogGroup
from troposphere.sns import \
SubscriptionResource, Topic
from troposphere.policies import \
AutoScalingReplacingUpdate, CreationPolicy, ResourceSignal, UpdatePolicy
from awacs import autoscaling, ec2, ecs, elasticloadbalancing, route53, s3, sns, sts
from awacs.aws import Action, Allow, Policy, Principal, Statement
import inspect
import textwrap
def lifecycle_ecs_drain_handler(event, _context):
""" Self-contained to be scooped up with inspect """
import json
import logging
import time
import boto3
logger = logging.getLogger(__name__)
logging.root.setLevel(logging.INFO)
class RetryLater(Exception):
pass
sns_record = event['Records'][0]['Sns']
message = json.loads(sns_record['Message'])
logger.info('Processing message: %s', message)
if message.get('LifecycleTransition') != 'autoscaling:EC2_INSTANCE_TERMINATING':
logger.warning('Not an instance termination message, ignoring')
return
metadata = json.loads(message.get('NotificationMetadata', '{}'))
try:
ecs = boto3.client('ecs')
response = ecs.list_container_instances(
cluster=metadata['Cluster'],
filter='attribute:ec2-instance-id=={}'.format(message['EC2InstanceId']),
)
if not response['containerInstanceArns']:
logger.warning('EC2 instance %s not in cluster, ignoring', message['EC2InstanceId'])
return
container_instance_arn = response['containerInstanceArns'][0]
logger.info('Container instance ARN: %s', container_instance_arn)
response = ecs.describe_container_instances(
cluster=metadata['Cluster'],
containerInstances=[container_instance_arn],
)
container_instance = response['containerInstances'][0]
if container_instance['status'] == 'ACTIVE':
logger.info('Setting state to DRAINING')
ecs.update_container_instances_state(
cluster=metadata['Cluster'],
containerInstances=[container_instance['containerInstanceArn']],
status='DRAINING',
)
raise RetryLater('Container instance state changed to DRAINING')
if container_instance['runningTasksCount']:
raise RetryLater('{} tasks running'.format(container_instance['runningTasksCount']))
logger.info('Instance drained, completing lifecycle action')
boto3.client('autoscaling').complete_lifecycle_action(
LifecycleHookName=message['LifecycleHookName'],
AutoScalingGroupName=message['AutoScalingGroupName'],
LifecycleActionResult='CONTINUE',
InstanceId=message['EC2InstanceId'],
)
logger.info('Done, AutoScaling will now terminate the instance')
except RetryLater:
logger.exception('Retry required; republishing to SNS topic in 10 seconds')
time.sleep(10)
boto3.client('sns').publish(
TopicArn=sns_record['TopicArn'],
Message=sns_record['Message'],
)
logger.info('Republished')
def create_template():
t = Template(
Description='OMG, CFN 4 ECS on EC2 ASG w/ ALB',
)
ami_id = t.add_parameter(Parameter(
'ECSImageParameter',
Default='/aws/service/ecs/optimized-ami/amazon-linux/recommended/image_id',
Type='AWS::SSM::Parameter::Value<String>',
))
instance_type = t.add_parameter(Parameter(
'InstanceType',
Default='t2.small',
Type='String',
))
instance_count = t.add_parameter(Parameter(
'InstanceCount',
Default=3,
Type='Number',
))
task_count = t.add_parameter(Parameter(
'TaskCount',
Default=3,
Type='Number',
))
docker_image = t.add_parameter(Parameter(
'DockerImage',
Default='tutum/hello-world',
Type='String',
))
log_retention = t.add_parameter(Parameter(
'LogRetentionDays',
Default=90,
Type='Number',
))
az_suffixes = ['a', 'b', 'c']
vpc = t.add_resource(VPC(
'VPC',
CidrBlock='10.69.0.0/16',
Tags=Tags(
Name=StackName,
),
))
internet_gateway = t.add_resource(InternetGateway(
'InternetGateway',
Tags=Tags(
Name=StackName,
),
))
vpc_gateway_attachment = t.add_resource(VPCGatewayAttachment(
'VPCGatewayAttachment',
VpcId=Ref(vpc),
InternetGatewayId=Ref(internet_gateway),
))
subnets = []
routes = []
for idx, az_suffix in enumerate(az_suffixes):
subnet = t.add_resource(Subnet(
'PrivateSubnet{}'.format(idx),
VpcId=Ref(vpc),
CidrBlock=Select(idx, Cidr(vpc.CidrBlock, 32, 8)),
AvailabilityZone=Sub('${AWS::Region}${Suffix}', Suffix=az_suffix),
MapPublicIpOnLaunch=True,
Tags=Tags(
Name=StackName,
),
))
subnets.append(subnet)
route_table = t.add_resource(RouteTable(
'PrivateSubnetRouteTable{}'.format(idx),
VpcId=Ref(vpc),
Tags=Tags(
Name=StackName,
),
))
route_association = t.add_resource(SubnetRouteTableAssociation(
'PrivateSubnetRouteAssociation{}'.format(idx),
SubnetId=Ref(subnet),
RouteTableId=Ref(route_table),
))
route = t.add_resource(Route(
'PrivateSubnetInternetRoute{}'.format(idx),
RouteTableId=Ref(route_table),
DestinationCidrBlock='0.0.0.0/0',
GatewayId=Ref(internet_gateway),
DependsOn=[vpc_gateway_attachment.title],
))
routes.append(route)
target_group = t.add_resource(TargetGroup(
'LoadBalancerTargetGroup',
Port=80,
Protocol='HTTP',
VpcId=Ref(vpc),
TargetGroupAttributes=[
TargetGroupAttribute(
Key='deregistration_delay.timeout_seconds',
Value='30',
),
],
))
load_balancer_security_group = t.add_resource(SecurityGroup(
'LoadBalancerSecurityGroup',
GroupDescription=Sub('${AWS::StackName} load balancer'),
SecurityGroupIngress=[
SecurityGroupRule(
IpProtocol='tcp',
FromPort='80',
ToPort='80',
CidrIp='0.0.0.0/0',
),
SecurityGroupRule(
IpProtocol='tcp',
FromPort='443',
ToPort='443',
CidrIp='0.0.0.0/0',
),
],
VpcId=Ref(vpc),
))
load_balancer = t.add_resource(LoadBalancer(
'LoadBalancer',
Type='application',
Scheme='internet-facing',
LoadBalancerAttributes=[
LoadBalancerAttributes(
Key='routing.http2.enabled',
Value='true',
),
],
SecurityGroups=[Ref(load_balancer_security_group)],
Subnets=[Ref(subnet) for subnet in subnets],
DependsOn=[route.title for route in routes],
))
http_listener = t.add_resource(Listener(
'LoadBalancerHTTPListener',
Protocol='HTTP',
Port=80,
LoadBalancerArn=Ref(load_balancer),
DefaultActions=[
ListenerAction(
Type='forward',
TargetGroupArn=Ref(target_group),
),
],
))
cluster = t.add_resource(Cluster(
'Cluster',
))
instance_role = t.add_resource(Role(
'ECSInstanceRole',
AssumeRolePolicyDocument=Policy(
Version='2012-10-17',
Statement=[
Statement(
Effect=Allow,
Principal=Principal('Service', 'ec2.amazonaws.com'),
Action=[sts.AssumeRole],
),
],
),
ManagedPolicyArns=[
'arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceforEC2Role',
],
))
instance_profile = t.add_resource(InstanceProfile(
'ECSInstanceProfile',
Roles=[
Ref(instance_role),
],
))
drain_hook_topic = t.add_resource(Topic(
'DrainHookInvokeTopic',
))
drain_hook_role = t.add_resource(Role(
'DrainHookExecutionRole',
AssumeRolePolicyDocument=Policy(
Version='2012-10-17',
Statement=[
Statement(
Effect=Allow,
Principal=Principal('Service', 'lambda.amazonaws.com'),
Action=[sts.AssumeRole],
),
],
),
ManagedPolicyArns=[
'arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole',
],
Policies=[
NamedPolicy(
PolicyName='drain-ecs-policy',
PolicyDocument=Policy(
Version='2012-10-17',
Statement=[
Statement(
Effect=Allow,
Resource=[Ref(drain_hook_topic)],
Action=[sns.Publish],
),
Statement(
Effect=Allow,
Resource=['*'],
Action=[
autoscaling.CompleteLifecycleAction,
ecs.DescribeContainerInstances,
ecs.ListContainerInstances,
ecs.Action('UpdateContainerInstancesState'),
],
),
],
),
),
],
))
drain_hook_function = t.add_resource(Function(
'DrainHookFunction',
Runtime='python3.6',
Role=GetAtt(drain_hook_role, 'Arn'),
MemorySize=256,
Timeout=60,
Code=Code(
ZipFile=inspect.getsource(lifecycle_ecs_drain_handler),
),
Handler='.'.join(('index', lifecycle_ecs_drain_handler.__name__)),
))
drain_hook_log_group = t.add_resource(LogGroup(
'DrainHookLogGroup',
LogGroupName=Sub('/aws/lambda/${{{}}}'.format(drain_hook_function.title)),
RetentionInDays=Ref(log_retention),
))
drain_hook_permission = t.add_resource(Permission(
'DrainHookInvokePermission',
Action='lambda:InvokeFunction',
Principal='sns.amazonaws.com',
SourceArn=Ref(drain_hook_topic),
FunctionName=GetAtt(drain_hook_function, 'Arn'),
DependsOn=[drain_hook_log_group.title], # prevent invokes before log group is set up
))
drain_hook_subscription = t.add_resource(SubscriptionResource(
'DrainHookInvokeSubscription',
Endpoint=GetAtt(drain_hook_function, 'Arn'),
Protocol='lambda',
TopicArn=Ref(drain_hook_topic),
DependsOn=[drain_hook_permission.title],
))
lifecycle_notification_role = t.add_resource(Role(
'AutoScalingLifecycleNotificationRole',
AssumeRolePolicyDocument=Policy(
Version='2012-10-17',
Statement=[
Statement(
Effect=Allow,
Principal=Principal('Service', 'autoscaling.amazonaws.com'),
Action=[sts.AssumeRole],
),
],
),
ManagedPolicyArns=[
'arn:aws:iam::aws:policy/service-role/AutoScalingNotificationAccessRole',
],
))
instance_security_group = t.add_resource(SecurityGroup(
'InstanceSecurityGroup',
GroupDescription=Sub('${AWS::StackName} instance'),
SecurityGroupIngress=[
SecurityGroupRule(
IpProtocol='-1',
SourceSecurityGroupId=Ref(load_balancer_security_group),
),
],
VpcId=Ref(vpc),
))
CreditSpecification.props['CpuCredits'] = (str, False) # bugfix
launch_template = t.add_resource(LaunchTemplate(
'InstanceLaunchTemplate',
LaunchTemplateData=LaunchTemplateData(
CreditSpecification=CreditSpecification(
CpuCredits='unlimited',
),
ImageId=Ref(ami_id),
InstanceType=Ref(instance_type),
SecurityGroupIds=[Ref(instance_security_group)],
UserData=Base64(Sub(
textwrap.dedent("""
#!/bin/bash
echo ECS_CLUSTER=${ClusterName} >> /etc/ecs/ecs.config
INSTANCE_ID=$(curl -s 'http://169.254.169.254/latest/meta-data/instance-id')
ATTRIBUTES='{"ec2-instance-id": "'$INSTANCE_ID'"}'
echo ECS_INSTANCE_ATTRIBUTES=$ATTRIBUTES >> /etc/ecs/ecs.config
yum install -y aws-cfn-bootstrap
/opt/aws/bin/cfn-signal --success true --stack ${AWS::StackName} \
--resource ${AutoScalingGroupLogicalId} --region ${AWS::Region}
"""),
ClusterName=Ref(cluster),
AutoScalingGroupLogicalId='AutoScalingGroup', # circular ref
)),
IamInstanceProfile=IamInstanceProfile(
Arn=GetAtt(instance_profile, 'Arn'),
),
),
))
autoscaling_group = t.add_resource(AutoScalingGroup(
'AutoScalingGroup',
CreationPolicy=CreationPolicy(
ResourceSignal=ResourceSignal(
Timeout='PT15M',
),
),
UpdatePolicy=UpdatePolicy(
AutoScalingReplacingUpdate=AutoScalingReplacingUpdate(
WillReplace=True,
),
),
DesiredCapacity=Ref(instance_count),
MinSize=Ref(instance_count),
MaxSize=Ref(instance_count),
VPCZoneIdentifier=[Ref(subnet) for subnet in subnets],
LaunchTemplate=LaunchTemplateSpecification(
LaunchTemplateId=Ref(launch_template),
Version=GetAtt(launch_template, 'LatestVersionNumber'),
),
LifecycleHookSpecificationList=[
LifecycleHookSpecification(
LifecycleHookName='drain-ecs-tasks',
LifecycleTransition='autoscaling:EC2_INSTANCE_TERMINATING',
NotificationMetadata=Sub('{"Cluster":"${Cluster}"}'),
NotificationTargetARN=Ref(drain_hook_topic),
RoleARN=GetAtt(lifecycle_notification_role, 'Arn'),
HeartbeatTimeout=str(30 * 60),
),
],
DependsOn=[route.title for route in routes] + [drain_hook_subscription.title],
))
agent_role = t.add_resource(Role(
'ECSAgentRole',
AssumeRolePolicyDocument=Policy(
Version='2012-10-17',
Statement=[
Statement(
Effect=Allow,
Principal=Principal('Service', 'ecs-tasks.amazonaws.com'),
Action=[sts.AssumeRole],
),
],
),
ManagedPolicyArns=[
'arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy',
],
))
task_role = t.add_resource(Role(
'ECSTaskRole',
AssumeRolePolicyDocument=Policy(
Version='2012-10-17',
Statement=[
Statement(
Effect=Allow,
Principal=Principal('Service', 'ecs-tasks.amazonaws.com'),
Action=[sts.AssumeRole],
),
],
),
Policies=[
NamedPolicy(
PolicyName='s3-bucket-access',
PolicyDocument=Policy(
Version='2012-10-17',
Statement=[
Statement(
Effect=Allow,
Resource=['*'],
Action=[s3.Action('*')],
),
],
),
),
],
))
log_group = t.add_resource(LogGroup(
'TaskLogGroup',
LogGroupName=Sub('/${AWS::StackName}/ecs-task'),
RetentionInDays=Ref(log_retention),
))
task = t.add_resource(TaskDefinition(
'ClusterTaskDefinition',
Cpu='256',
Memory='0.5GB',
ContainerDefinitions=[
ContainerDefinition(
Name='service-container',
Image=Ref(docker_image),
PortMappings=[
PortMapping(
ContainerPort=80,
),
],
LogConfiguration=LogConfiguration(
LogDriver='awslogs',
Options={
'awslogs-group': Ref(log_group),
'awslogs-region': Region,
'awslogs-stream-prefix': 'ecs',
},
),
),
],
ExecutionRoleArn=GetAtt(agent_role, 'Arn'),
TaskRoleArn=GetAtt(task_role, 'Arn'),
))
elb_management_role = t.add_resource(Role(
'ECSLoadBalancerManagementRole',
AssumeRolePolicyDocument=Policy(
Version='2012-10-17',
Statement=[
Statement(
Effect=Allow,
Principal=Principal('Service', 'ecs.amazonaws.com'),
Action=[sts.AssumeRole],
),
],
),
Policies=[
NamedPolicy(
PolicyName='copied-AmazonECSServiceRolePolicy',
PolicyDocument=Policy(
Version='2012-10-17',
Statement=[
Statement(
Effect=Allow,
Resource=['*'],
Action=[
ec2.AttachNetworkInterface,
ec2.CreateNetworkInterface,
ec2.Action('CreateNetworkInterfacePermission'),
ec2.DeleteNetworkInterface,
ec2.Action('DeleteNetworkInterfacePermission'),
ec2.Action('Describe*'),
ec2.DetachNetworkInterface,
elasticloadbalancing.DeregisterInstancesFromLoadBalancer,
elasticloadbalancing.DeregisterTargets,
elasticloadbalancing.Action('Describe*'),
elasticloadbalancing.RegisterInstancesWithLoadBalancer,
elasticloadbalancing.RegisterTargets,
route53.ChangeResourceRecordSets,
route53.CreateHealthCheck,
route53.DeleteHealthCheck,
route53.Action('Get*'),
route53.Action('List*'),
route53.UpdateHealthCheck,
Action('servicediscovery', 'DeregisterInstance'),
Action('servicediscovery', 'Get*'),
Action('servicediscovery', 'List*'),
Action('servicediscovery', 'RegisterInstance'),
Action('servicediscovery', 'UpdateInstanceCustomHealthStatus'),
],
),
],
),
),
],
))
service = t.add_resource(Service(
'ClusterService',
Cluster=Ref(cluster),
DeploymentConfiguration=DeploymentConfiguration(
MinimumHealthyPercent=50,
),
LoadBalancers=[
ServiceLoadBalancer(
ContainerName=task.ContainerDefinitions[0].Name,
ContainerPort=80,
TargetGroupArn=Ref(target_group),
),
],
DesiredCount=Ref(task_count),
TaskDefinition=Ref(task),
DependsOn=[http_listener.title],
Role=GetAtt(elb_management_role, 'Arn'),
))
t.add_output(Output(
'LoadBalancerEndpoint',
Value=GetAtt(load_balancer, 'DNSName'),
))
return t
if __name__ == '__main__':
print(create_template().to_json())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment