This is an example architecture to connect an EC2 auto-scaling group with an ECS cluster such that all ECS tasks are drained from an EC2 instance before it is removed from the ASG.
Last active
July 11, 2018 21:52
-
-
Save mmdriley/aa8e2ff8dbb9603bc5825b3fdb59620e to your computer and use it in GitHub Desktop.
ECS+ASG in Pulumi
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import * as aws from "@pulumi/aws"; | |
import * as cloud from "@pulumi/cloud"; | |
import * as pulumi from "pulumi"; | |
import { RoleBuilder } from "./roleBuilder"; | |
// https://docs.aws.amazon.com/lambda/latest/dg/eventsources.html#eventsources-sns | |
// https://github.com/pulumi/pulumi-cloud/blob/0c6f1497efaf1148c70e9b9c89a0136791b6118e/aws/sns.ts#L17 | |
interface LambdaSnsEvent { | |
Records: SnsRecord[]; | |
} | |
interface SnsRecord { | |
EventVersion: string; | |
EventSubscriptionArn: string; | |
EventSource: string; | |
Sns: SnsItem; | |
} | |
interface SnsItem { | |
SignatureVersion: string; | |
Timestamp: string; | |
Signature: string; | |
SigningCertUrl: string; | |
MessageId: string; | |
Message: string; | |
MessageAttributes: { [key: string]: SnsMessageAttribute }; | |
Type: string; | |
UnsubscribeUrl: string; | |
TopicArn: string; | |
Subject: string; | |
} | |
interface SnsMessageAttribute { | |
Type: string; | |
Value: string; | |
} | |
// https://docs.aws.amazon.com/autoscaling/ec2/userguide/lifecycle-hooks.html#sns-notifications | |
interface LifecycleHookMessage { | |
LifecycleHookName: string; | |
AccountId: string; | |
RequestId: string; | |
LifecycleTransition: string; | |
AutoScalingGroupName: string; | |
Service: string; | |
Time: string; | |
EC2InstanceId: string; | |
LifecycleActionToken: string; | |
} | |
interface CloudwatchEvent<T> { | |
version: string; | |
id: string; | |
"detail-type": string; | |
source: string; | |
account: string; | |
time: string; | |
region: string; | |
resources: string[]; | |
detail: T; | |
} | |
// https://docs.aws.amazon.com/AmazonECS/latest/developerguide/ecs_cwe_events.html#ecs_container_instance_events | |
// https://docs.aws.amazon.com/AmazonECS/latest/APIReference/API_ContainerInstance.html | |
interface CpuResource { | |
name: "CPU"; | |
type: "INTEGER"; | |
integerValue: number; | |
} | |
interface MemoryResource { | |
name: "MEMORY"; | |
type: "INTEGER"; | |
integerValue: number; | |
} | |
interface PortsResource { | |
name: "PORTS"; | |
type: "STRINGSET"; | |
stringSetValue: string[]; | |
} | |
type EcsResource = CpuResource | MemoryResource | PortsResource; | |
interface ContainerInstanceEventDetails { | |
agentConnected: boolean; | |
attributes: { | |
name: string; | |
value?: string; | |
}[]; | |
clusterArn: string; | |
containerInstanceArn: string; | |
ec2InstanceId: string; | |
registeredResources: EcsResource[]; | |
remainingResources: EcsResource[]; | |
status: string; | |
version: number; | |
versionInfo: { | |
agentHash: string; | |
agentVersion: string; | |
dockerVersion: string; | |
}; | |
registeredAt: string; | |
updatedAt: string; | |
} | |
(async () => { | |
// | |
const lifecycleTopic = new aws.sns.Topic("lifecycleTopic"); | |
const lifecyclePublishRole = new RoleBuilder() | |
.forService("autoscaling.amazonaws.com") | |
.allowOn("sns:Publish", (await lifecycleTopic.arn)!) | |
.build("lifecyclePublishRole"); | |
const cluster = new aws.ecs.Cluster("cluster"); | |
const clusterArn = await cluster.arn; | |
const lifecycleHookName = "DrainEcsOnTerminate"; | |
const lifecycleHook = new aws.serverless.Function("lifecycleHook", { | |
policies: [ | |
aws.iam.AWSLambdaBasicExecutionRole, | |
aws.iam.AmazonEC2ContainerServiceFullAccess, | |
], | |
}, async (event: LambdaSnsEvent, context, callback) => { | |
const awssdk = await import("aws-sdk"); | |
const autoscaling = new awssdk.AutoScaling(); | |
const ecs = new awssdk.ECS(); | |
// https://aws.amazon.com/sns/faqs/#reliability | |
// "... all notification messages will contain a single published message." | |
const record = event.Records[0]; | |
console.log(`lifecycle message: ${record.Sns.Message}`); | |
const lifecycleEvent = <LifecycleHookMessage>JSON.parse(record.Sns.Message); | |
// Get the ECS container instance corresponding to this EC2 instance. | |
const listResult = await ecs.listContainerInstances({ | |
cluster: clusterArn, | |
filter: `attribute:pulumi.ec2.instance-id == ${lifecycleEvent.EC2InstanceId}` | |
}).promise(); | |
if (listResult.$response.error) { | |
// Without the container instance ARN there's nothing we can do. | |
// Let this event slip by and defer to the ASG lifecycle hook timeout. | |
console.log(`ListContainerInstances: ${listResult.$response.error.message}`); | |
callback(null, null); | |
return; | |
} | |
const arn = listResult.containerInstanceArns![0]; | |
console.log(`Mapped EC2 instance ${lifecycleEvent.EC2InstanceId} to container instance ${arn}`); | |
// Set the ECS container instance state to DRAINING. | |
const drainResult = await ecs.updateContainerInstancesState({ | |
cluster: clusterArn, | |
containerInstances: [ arn ], | |
status: "DRAINING", | |
}).promise(); | |
if (drainResult.$response.error) { | |
console.log(`UpdateContainerInstancesState: ${drainResult.$response.error.message}`); | |
} else { | |
const instance = drainResult.containerInstances![0]; | |
console.log(`Draining ${instance.containerInstanceArn}, ${instance.runningTasksCount} running tasks`); | |
} | |
callback(null, null); | |
}); | |
const lifecycleHookPermission = new aws.lambda.Permission("lifecycleHookPermission", { | |
action: "lambda:InvokeFunction", | |
function: lifecycleHook.lambda, | |
principal: "sns.amazonaws.com", | |
sourceArn: lifecycleTopic.arn, | |
}); | |
const lifecycleSubscription = new aws.sns.TopicSubscription("lifecycleSubscription", { | |
topic: lifecycleTopic, | |
endpoint: lifecycleHook.lambda.arn, | |
protocol: "lambda", | |
}); | |
const stackName = await new aws.s3.Bucket("cluster-stack").id; | |
// https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/EventTypes.html#ecs-event-types | |
// https://docs.aws.amazon.com/AmazonECS/latest/developerguide/ecs_cwe_events.html#ecs_container_instance_events | |
const ecsDrainingInstanceFilter = { | |
source: [ "aws.ecs" ], | |
"detail-type": [ "ECS Container Instance State Change" ], | |
detail: { | |
clusterArn: [ await cluster.arn! ], | |
status: [ "DRAINING" ], | |
// Ignore a last state-change where this goes to "false". | |
agentConnected: [ true ], | |
}, | |
}; | |
// Create a canonical summary of resources of interest. | |
function resourceSummary(resources: EcsResource[]): string { | |
let lines: string[] = []; | |
for (const r of resources) { | |
if (r.name === "CPU") { | |
lines.push(`cpu=${r.integerValue}`); | |
} else if (r.name === "MEMORY") { | |
lines.push(`memory=${r.integerValue}`); | |
} | |
} | |
return lines.sort().join(","); | |
} | |
const stateChangeListener = new aws.serverless.Function("stateChangeListener", { | |
policies: [ | |
aws.iam.AWSLambdaBasicExecutionRole, | |
aws.iam.AmazonEC2ReadOnlyAccess, | |
aws.iam.AutoScalingFullAccess | |
], | |
}, async (event: CloudwatchEvent<ContainerInstanceEventDetails>, context, callback) => { | |
// Thanks to the Cloudwatch Events filter, we know the instance is draining. | |
// Is this instance empty? | |
// Since the event details don't include runningTasksCount, we rely on resources as a proxy -- | |
// if registered and available resources are the same, the instance is empty. | |
const r1 = resourceSummary(event.detail.registeredResources); | |
const r2 = resourceSummary(event.detail.remainingResources); | |
if (r1 != r2) { | |
console.log(`Ignoring event for ${event.detail.containerInstanceArn} because it isn't empty.`); | |
console.log(`registered: ${r1} remaining: ${r2}`); | |
callback(null, null); | |
return; | |
} | |
// The instance is draining and empty. Complete the lifecycle event. | |
const instanceId = event.detail.ec2InstanceId; | |
const awssdk = await import("aws-sdk"); | |
const ec2 = new awssdk.EC2(); | |
// Read the group name from the tag autoscaling puts on the EC2 instance. | |
// Ideally we would retrieve this as an output from the CloudFormation stack and capture the value here. For that | |
// to work, this function would have to be created *after* the CFN stack, which means it would be torn down | |
// *before* the stack and wouldn't be in place to service final lifecycle events for the ASG. | |
const tagsResult = await ec2.describeTags({ | |
Filters: [ | |
{ Name: "resource-id", Values: [ instanceId ] }, | |
{ Name: "key", Values: [ "aws:autoscaling:groupName" ] }, | |
], | |
}).promise(); | |
if (tagsResult.$response.error) { | |
console.log(`DescribeTags on ${instanceId} failed: ${tagsResult.$response.error.message}`); | |
callback(null, null); | |
return; | |
} | |
if (tagsResult.Tags!.length < 1) { | |
console.log(`Couldn't get aws:autoscaling:groupName tag for EC2 instance ${instanceId}`); | |
callback(null, null); | |
return; | |
} | |
const autoScalingGroupName = tagsResult.Tags![0].Value!; | |
// We don't need the lifecycle action token; we can complete the lifecycle action with the EC2 instance ID. | |
// https://docs.aws.amazon.com/autoscaling/ec2/userguide/lifecycle-hooks.html#completing-lifecycle-hooks | |
console.log(`Completing lifecycle action for ${instanceId} in ${autoScalingGroupName}`); | |
const autoscaling = new awssdk.AutoScaling(); | |
const completeResult = await autoscaling.completeLifecycleAction({ | |
AutoScalingGroupName: autoScalingGroupName, | |
LifecycleHookName: lifecycleHookName, | |
InstanceId: instanceId, | |
LifecycleActionResult: "CONTINUE", | |
}).promise(); | |
if (completeResult.$response.error) { | |
// May already have been completed. | |
console.log(`CompleteLifecycleAction failed: ${completeResult.$response.error.message}`); | |
} | |
callback(null, null); | |
}); | |
const stateChangeRule = new aws.cloudwatch.EventRule("stateChangeRule", { | |
eventPattern: JSON.stringify(ecsDrainingInstanceFilter), | |
}); | |
const stateChangeTarget = new aws.cloudwatch.EventTarget("stateChangeTarget", { | |
rule: stateChangeRule.name, | |
arn: stateChangeListener.lambda.arn, | |
}); | |
const stateChangePermission = new aws.lambda.Permission("stateChangePermission", { | |
action: "lambda:InvokeFunction", | |
function: stateChangeListener.lambda, | |
principal: "events.amazonaws.com", | |
sourceArn: stateChangeRule.arn, | |
}); | |
const ecsAmi = await aws.getAmi({ | |
filter: [ | |
{ name: "name", values: [ "*amazon-ecs-optimized" ] }, | |
{ name: "owner-alias", values: [ "amazon" ] }, | |
], | |
mostRecent: true, | |
}); | |
const userData = `#cloud-config | |
packages: | |
- aws-cfn-bootstrap | |
bootcmd: | |
- echo ECS_CLUSTER='${await cluster.name}' >> /etc/ecs/ecs.config | |
# Include the EC2 instance ID as an attribute on the ECS container instance so we can search for it. | |
# ECS makes it easy to get the EC2 instance ID for a container instance ARN, but not vice-versa. | |
- echo ECS_INSTANCE_ATTRIBUTES='{"pulumi.ec2.instance-id":"'"$INSTANCE_ID"'"}' >> /etc/ecs/ecs.config | |
runcmd: | |
- /opt/aws/bin/cfn-signal \ | |
--region '${(await aws.getRegion({ current: true })).name}' \ | |
--stack '${stackName}' \ | |
--resource Instances | |
# 4 | |
`; | |
const instanceRole = new RoleBuilder() | |
.forService("ec2.amazonaws.com") | |
.attachPolicy(aws.iam.AmazonEC2ContainerServiceforEC2Role) | |
.attachPolicy(aws.iam.AmazonEC2RoleforSSM) | |
.build("instanceRole"); | |
const instanceProfile = new aws.iam.InstanceProfile("instanceProfile", { | |
role: instanceRole, | |
}); | |
const instanceLaunchConfiguration = new aws.ec2.LaunchConfiguration("launchConfiguration", { | |
imageId: ecsAmi.imageId, | |
instanceType: "t2.micro", | |
iamInstanceProfile: instanceProfile.id, | |
userData: userData, | |
}); | |
const azCount = 2; | |
const azNames = (await aws.getAvailabilityZones()).names.slice(0, azCount); | |
const instanceCount = 2; | |
const template = ` | |
AWSTemplateFormatVersion: '2010-09-09' | |
Outputs: | |
Instances: | |
Value: !Ref Instances | |
Resources: | |
Instances: | |
Type: AWS::AutoScaling::AutoScalingGroup | |
Properties: | |
MaxSize: ${instanceCount*2} | |
MinSize: ${instanceCount} | |
DesiredCapacity: ${instanceCount} | |
LaunchConfigurationName: "${await instanceLaunchConfiguration.id}" | |
AvailabilityZones: [ ${azNames.join(",")} ] | |
HealthCheckGracePeriod: 120 | |
HealthCheckType: EC2 | |
LifecycleHookSpecificationList: | |
- LifecycleHookName: ${lifecycleHookName} | |
LifecycleTransition: autoscaling:EC2_INSTANCE_TERMINATING | |
NotificationTargetARN: ${await lifecycleTopic.arn} | |
RoleARN: ${await lifecyclePublishRole.arn} | |
DefaultResult: CONTINUE | |
CreationPolicy: | |
ResourceSignal: | |
Count: ${instanceCount} | |
Timeout: PT5M | |
UpdatePolicy: | |
AutoScalingRollingUpdate: | |
MaxBatchSize: ${Math.ceil(instanceCount / 10)} | |
MinInstancesInService: ${instanceCount} | |
PauseTime: PT5M | |
SuspendProcesses: | |
- ScheduledActions | |
WaitOnResourceSignals: true | |
`; | |
const stack = new aws.cloudformation.Stack("clusterStack", { | |
name: stackName, | |
templateBody: template, | |
}); | |
})(); // close async IIFE |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import * as crypto from "crypto"; | |
import * as aws from "@pulumi/aws"; | |
function hash(s: string) { | |
return crypto.createHash("SHA256").update(s).digest("hex").substring(0, 8); | |
} | |
export class RoleBuilder { | |
private assumeRoleStatements: aws.iam.PolicyStatement[] = []; | |
private policyStatements: aws.iam.PolicyStatement[] = []; | |
private attachPolicyArns: aws.ARN[] = [] | |
constructor() {} | |
forService(serviceName: string): RoleBuilder { | |
this.assumeRoleStatements.push({ | |
Action: "sts:AssumeRole", | |
Effect: "Allow", | |
Principal: { | |
Service: serviceName, | |
}, | |
}); | |
return this; | |
} | |
addStatement(s: aws.iam.PolicyStatement): RoleBuilder { | |
this.policyStatements.push(s); | |
return this; | |
} | |
allow(...actions: string[]): RoleBuilder { | |
this.addStatement({ | |
Action: actions, | |
Effect: "Allow", | |
}); | |
return this; | |
} | |
allowOn(action: string | string[], resource: string | string[]): RoleBuilder { | |
this.addStatement({ | |
Action: action, | |
Effect: "Allow", | |
Resource: resource, | |
}); | |
return this; | |
} | |
attachPolicy(policyArn: aws.ARN): RoleBuilder { | |
this.attachPolicyArns.push(policyArn); | |
return this; | |
} | |
build(name: string): aws.iam.Role { | |
const role = new aws.iam.Role(name, { | |
assumeRolePolicy: JSON.stringify(<aws.iam.PolicyDocument>{ | |
Version: "2012-10-17", | |
Statement: this.assumeRoleStatements, | |
}), | |
}); | |
for (const policy of this.attachPolicyArns) { | |
const _ = new aws.iam.RolePolicyAttachment(name + "-" + hash(policy), { | |
role: role, | |
policyArn: policy, | |
}); | |
} | |
if (this.policyStatements.length > 0) { | |
const _ = new aws.iam.RolePolicy(name + "-policies", { | |
role: role.name, | |
policy: JSON.stringify(<aws.iam.PolicyDocument>{ | |
Version: "2012-10-17", | |
Statement: this.policyStatements, | |
}), | |
}); | |
} | |
return role; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment