Skip to content

Instantly share code, notes, and snippets.

@mmdriley
Last active July 11, 2018 21:52
Show Gist options
  • Save mmdriley/aa8e2ff8dbb9603bc5825b3fdb59620e to your computer and use it in GitHub Desktop.
Save mmdriley/aa8e2ff8dbb9603bc5825b3fdb59620e to your computer and use it in GitHub Desktop.
ECS+ASG in Pulumi

This is an example architecture to connect an EC2 auto-scaling group with an ECS cluster such that all ECS tasks are drained from an EC2 instance before it is removed from the ASG.

import * as aws from "@pulumi/aws";
import * as cloud from "@pulumi/cloud";
import * as pulumi from "pulumi";
import { RoleBuilder } from "./roleBuilder";
// https://docs.aws.amazon.com/lambda/latest/dg/eventsources.html#eventsources-sns
// https://github.com/pulumi/pulumi-cloud/blob/0c6f1497efaf1148c70e9b9c89a0136791b6118e/aws/sns.ts#L17
interface LambdaSnsEvent {
Records: SnsRecord[];
}
interface SnsRecord {
EventVersion: string;
EventSubscriptionArn: string;
EventSource: string;
Sns: SnsItem;
}
interface SnsItem {
SignatureVersion: string;
Timestamp: string;
Signature: string;
SigningCertUrl: string;
MessageId: string;
Message: string;
MessageAttributes: { [key: string]: SnsMessageAttribute };
Type: string;
UnsubscribeUrl: string;
TopicArn: string;
Subject: string;
}
interface SnsMessageAttribute {
Type: string;
Value: string;
}
// https://docs.aws.amazon.com/autoscaling/ec2/userguide/lifecycle-hooks.html#sns-notifications
interface LifecycleHookMessage {
LifecycleHookName: string;
AccountId: string;
RequestId: string;
LifecycleTransition: string;
AutoScalingGroupName: string;
Service: string;
Time: string;
EC2InstanceId: string;
LifecycleActionToken: string;
}
interface CloudwatchEvent<T> {
version: string;
id: string;
"detail-type": string;
source: string;
account: string;
time: string;
region: string;
resources: string[];
detail: T;
}
// https://docs.aws.amazon.com/AmazonECS/latest/developerguide/ecs_cwe_events.html#ecs_container_instance_events
// https://docs.aws.amazon.com/AmazonECS/latest/APIReference/API_ContainerInstance.html
interface CpuResource {
name: "CPU";
type: "INTEGER";
integerValue: number;
}
interface MemoryResource {
name: "MEMORY";
type: "INTEGER";
integerValue: number;
}
interface PortsResource {
name: "PORTS";
type: "STRINGSET";
stringSetValue: string[];
}
type EcsResource = CpuResource | MemoryResource | PortsResource;
interface ContainerInstanceEventDetails {
agentConnected: boolean;
attributes: {
name: string;
value?: string;
}[];
clusterArn: string;
containerInstanceArn: string;
ec2InstanceId: string;
registeredResources: EcsResource[];
remainingResources: EcsResource[];
status: string;
version: number;
versionInfo: {
agentHash: string;
agentVersion: string;
dockerVersion: string;
};
registeredAt: string;
updatedAt: string;
}
(async () => {
//
const lifecycleTopic = new aws.sns.Topic("lifecycleTopic");
const lifecyclePublishRole = new RoleBuilder()
.forService("autoscaling.amazonaws.com")
.allowOn("sns:Publish", (await lifecycleTopic.arn)!)
.build("lifecyclePublishRole");
const cluster = new aws.ecs.Cluster("cluster");
const clusterArn = await cluster.arn;
const lifecycleHookName = "DrainEcsOnTerminate";
const lifecycleHook = new aws.serverless.Function("lifecycleHook", {
policies: [
aws.iam.AWSLambdaBasicExecutionRole,
aws.iam.AmazonEC2ContainerServiceFullAccess,
],
}, async (event: LambdaSnsEvent, context, callback) => {
const awssdk = await import("aws-sdk");
const autoscaling = new awssdk.AutoScaling();
const ecs = new awssdk.ECS();
// https://aws.amazon.com/sns/faqs/#reliability
// "... all notification messages will contain a single published message."
const record = event.Records[0];
console.log(`lifecycle message: ${record.Sns.Message}`);
const lifecycleEvent = <LifecycleHookMessage>JSON.parse(record.Sns.Message);
// Get the ECS container instance corresponding to this EC2 instance.
const listResult = await ecs.listContainerInstances({
cluster: clusterArn,
filter: `attribute:pulumi.ec2.instance-id == ${lifecycleEvent.EC2InstanceId}`
}).promise();
if (listResult.$response.error) {
// Without the container instance ARN there's nothing we can do.
// Let this event slip by and defer to the ASG lifecycle hook timeout.
console.log(`ListContainerInstances: ${listResult.$response.error.message}`);
callback(null, null);
return;
}
const arn = listResult.containerInstanceArns![0];
console.log(`Mapped EC2 instance ${lifecycleEvent.EC2InstanceId} to container instance ${arn}`);
// Set the ECS container instance state to DRAINING.
const drainResult = await ecs.updateContainerInstancesState({
cluster: clusterArn,
containerInstances: [ arn ],
status: "DRAINING",
}).promise();
if (drainResult.$response.error) {
console.log(`UpdateContainerInstancesState: ${drainResult.$response.error.message}`);
} else {
const instance = drainResult.containerInstances![0];
console.log(`Draining ${instance.containerInstanceArn}, ${instance.runningTasksCount} running tasks`);
}
callback(null, null);
});
const lifecycleHookPermission = new aws.lambda.Permission("lifecycleHookPermission", {
action: "lambda:InvokeFunction",
function: lifecycleHook.lambda,
principal: "sns.amazonaws.com",
sourceArn: lifecycleTopic.arn,
});
const lifecycleSubscription = new aws.sns.TopicSubscription("lifecycleSubscription", {
topic: lifecycleTopic,
endpoint: lifecycleHook.lambda.arn,
protocol: "lambda",
});
const stackName = await new aws.s3.Bucket("cluster-stack").id;
// https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/EventTypes.html#ecs-event-types
// https://docs.aws.amazon.com/AmazonECS/latest/developerguide/ecs_cwe_events.html#ecs_container_instance_events
const ecsDrainingInstanceFilter = {
source: [ "aws.ecs" ],
"detail-type": [ "ECS Container Instance State Change" ],
detail: {
clusterArn: [ await cluster.arn! ],
status: [ "DRAINING" ],
// Ignore a last state-change where this goes to "false".
agentConnected: [ true ],
},
};
// Create a canonical summary of resources of interest.
function resourceSummary(resources: EcsResource[]): string {
let lines: string[] = [];
for (const r of resources) {
if (r.name === "CPU") {
lines.push(`cpu=${r.integerValue}`);
} else if (r.name === "MEMORY") {
lines.push(`memory=${r.integerValue}`);
}
}
return lines.sort().join(",");
}
const stateChangeListener = new aws.serverless.Function("stateChangeListener", {
policies: [
aws.iam.AWSLambdaBasicExecutionRole,
aws.iam.AmazonEC2ReadOnlyAccess,
aws.iam.AutoScalingFullAccess
],
}, async (event: CloudwatchEvent<ContainerInstanceEventDetails>, context, callback) => {
// Thanks to the Cloudwatch Events filter, we know the instance is draining.
// Is this instance empty?
// Since the event details don't include runningTasksCount, we rely on resources as a proxy --
// if registered and available resources are the same, the instance is empty.
const r1 = resourceSummary(event.detail.registeredResources);
const r2 = resourceSummary(event.detail.remainingResources);
if (r1 != r2) {
console.log(`Ignoring event for ${event.detail.containerInstanceArn} because it isn't empty.`);
console.log(`registered: ${r1} remaining: ${r2}`);
callback(null, null);
return;
}
// The instance is draining and empty. Complete the lifecycle event.
const instanceId = event.detail.ec2InstanceId;
const awssdk = await import("aws-sdk");
const ec2 = new awssdk.EC2();
// Read the group name from the tag autoscaling puts on the EC2 instance.
// Ideally we would retrieve this as an output from the CloudFormation stack and capture the value here. For that
// to work, this function would have to be created *after* the CFN stack, which means it would be torn down
// *before* the stack and wouldn't be in place to service final lifecycle events for the ASG.
const tagsResult = await ec2.describeTags({
Filters: [
{ Name: "resource-id", Values: [ instanceId ] },
{ Name: "key", Values: [ "aws:autoscaling:groupName" ] },
],
}).promise();
if (tagsResult.$response.error) {
console.log(`DescribeTags on ${instanceId} failed: ${tagsResult.$response.error.message}`);
callback(null, null);
return;
}
if (tagsResult.Tags!.length < 1) {
console.log(`Couldn't get aws:autoscaling:groupName tag for EC2 instance ${instanceId}`);
callback(null, null);
return;
}
const autoScalingGroupName = tagsResult.Tags![0].Value!;
// We don't need the lifecycle action token; we can complete the lifecycle action with the EC2 instance ID.
// https://docs.aws.amazon.com/autoscaling/ec2/userguide/lifecycle-hooks.html#completing-lifecycle-hooks
console.log(`Completing lifecycle action for ${instanceId} in ${autoScalingGroupName}`);
const autoscaling = new awssdk.AutoScaling();
const completeResult = await autoscaling.completeLifecycleAction({
AutoScalingGroupName: autoScalingGroupName,
LifecycleHookName: lifecycleHookName,
InstanceId: instanceId,
LifecycleActionResult: "CONTINUE",
}).promise();
if (completeResult.$response.error) {
// May already have been completed.
console.log(`CompleteLifecycleAction failed: ${completeResult.$response.error.message}`);
}
callback(null, null);
});
const stateChangeRule = new aws.cloudwatch.EventRule("stateChangeRule", {
eventPattern: JSON.stringify(ecsDrainingInstanceFilter),
});
const stateChangeTarget = new aws.cloudwatch.EventTarget("stateChangeTarget", {
rule: stateChangeRule.name,
arn: stateChangeListener.lambda.arn,
});
const stateChangePermission = new aws.lambda.Permission("stateChangePermission", {
action: "lambda:InvokeFunction",
function: stateChangeListener.lambda,
principal: "events.amazonaws.com",
sourceArn: stateChangeRule.arn,
});
const ecsAmi = await aws.getAmi({
filter: [
{ name: "name", values: [ "*amazon-ecs-optimized" ] },
{ name: "owner-alias", values: [ "amazon" ] },
],
mostRecent: true,
});
const userData = `#cloud-config
packages:
- aws-cfn-bootstrap
bootcmd:
- echo ECS_CLUSTER='${await cluster.name}' >> /etc/ecs/ecs.config
# Include the EC2 instance ID as an attribute on the ECS container instance so we can search for it.
# ECS makes it easy to get the EC2 instance ID for a container instance ARN, but not vice-versa.
- echo ECS_INSTANCE_ATTRIBUTES='{"pulumi.ec2.instance-id":"'"$INSTANCE_ID"'"}' >> /etc/ecs/ecs.config
runcmd:
- /opt/aws/bin/cfn-signal \
--region '${(await aws.getRegion({ current: true })).name}' \
--stack '${stackName}' \
--resource Instances
# 4
`;
const instanceRole = new RoleBuilder()
.forService("ec2.amazonaws.com")
.attachPolicy(aws.iam.AmazonEC2ContainerServiceforEC2Role)
.attachPolicy(aws.iam.AmazonEC2RoleforSSM)
.build("instanceRole");
const instanceProfile = new aws.iam.InstanceProfile("instanceProfile", {
role: instanceRole,
});
const instanceLaunchConfiguration = new aws.ec2.LaunchConfiguration("launchConfiguration", {
imageId: ecsAmi.imageId,
instanceType: "t2.micro",
iamInstanceProfile: instanceProfile.id,
userData: userData,
});
const azCount = 2;
const azNames = (await aws.getAvailabilityZones()).names.slice(0, azCount);
const instanceCount = 2;
const template = `
AWSTemplateFormatVersion: '2010-09-09'
Outputs:
Instances:
Value: !Ref Instances
Resources:
Instances:
Type: AWS::AutoScaling::AutoScalingGroup
Properties:
MaxSize: ${instanceCount*2}
MinSize: ${instanceCount}
DesiredCapacity: ${instanceCount}
LaunchConfigurationName: "${await instanceLaunchConfiguration.id}"
AvailabilityZones: [ ${azNames.join(",")} ]
HealthCheckGracePeriod: 120
HealthCheckType: EC2
LifecycleHookSpecificationList:
- LifecycleHookName: ${lifecycleHookName}
LifecycleTransition: autoscaling:EC2_INSTANCE_TERMINATING
NotificationTargetARN: ${await lifecycleTopic.arn}
RoleARN: ${await lifecyclePublishRole.arn}
DefaultResult: CONTINUE
CreationPolicy:
ResourceSignal:
Count: ${instanceCount}
Timeout: PT5M
UpdatePolicy:
AutoScalingRollingUpdate:
MaxBatchSize: ${Math.ceil(instanceCount / 10)}
MinInstancesInService: ${instanceCount}
PauseTime: PT5M
SuspendProcesses:
- ScheduledActions
WaitOnResourceSignals: true
`;
const stack = new aws.cloudformation.Stack("clusterStack", {
name: stackName,
templateBody: template,
});
})(); // close async IIFE
import * as crypto from "crypto";
import * as aws from "@pulumi/aws";
function hash(s: string) {
return crypto.createHash("SHA256").update(s).digest("hex").substring(0, 8);
}
export class RoleBuilder {
private assumeRoleStatements: aws.iam.PolicyStatement[] = [];
private policyStatements: aws.iam.PolicyStatement[] = [];
private attachPolicyArns: aws.ARN[] = []
constructor() {}
forService(serviceName: string): RoleBuilder {
this.assumeRoleStatements.push({
Action: "sts:AssumeRole",
Effect: "Allow",
Principal: {
Service: serviceName,
},
});
return this;
}
addStatement(s: aws.iam.PolicyStatement): RoleBuilder {
this.policyStatements.push(s);
return this;
}
allow(...actions: string[]): RoleBuilder {
this.addStatement({
Action: actions,
Effect: "Allow",
});
return this;
}
allowOn(action: string | string[], resource: string | string[]): RoleBuilder {
this.addStatement({
Action: action,
Effect: "Allow",
Resource: resource,
});
return this;
}
attachPolicy(policyArn: aws.ARN): RoleBuilder {
this.attachPolicyArns.push(policyArn);
return this;
}
build(name: string): aws.iam.Role {
const role = new aws.iam.Role(name, {
assumeRolePolicy: JSON.stringify(<aws.iam.PolicyDocument>{
Version: "2012-10-17",
Statement: this.assumeRoleStatements,
}),
});
for (const policy of this.attachPolicyArns) {
const _ = new aws.iam.RolePolicyAttachment(name + "-" + hash(policy), {
role: role,
policyArn: policy,
});
}
if (this.policyStatements.length > 0) {
const _ = new aws.iam.RolePolicy(name + "-policies", {
role: role.name,
policy: JSON.stringify(<aws.iam.PolicyDocument>{
Version: "2012-10-17",
Statement: this.policyStatements,
}),
});
}
return role;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment