Skip to content

Instantly share code, notes, and snippets.

@richardhboyd
Created July 8, 2020 19:04
Show Gist options
  • Save richardhboyd/3f3aef9b55583c90c8b3fae1b9841260 to your computer and use it in GitHub Desktop.
Save richardhboyd/3f3aef9b55583c90c8b3fae1b9841260 to your computer and use it in GitHub Desktop.
StepFunction that bootstraps a Cloud9 environment
AWSTemplateFormatVersion: "2010-09-09"
Transform: AWS::Serverless-2016-10-31
Parameters:
BucketName:
Description: "Region-specific assets S3 bucket name (e.g. ee-assets-prod-us-east-1)"
Type: String
Default: "cf-templates-1xnac3rwgtxo7-us-west-2"
EBSVolumeSize:
Description: "Size of EBS Volume (in GB)"
Type: Number
Default: 25
PolicyArn:
Description: "Arn for IAM Policy to attach to Instance Profile"
Type: String
Default: "arn:aws:iam::aws:policy/AdministratorAccess"
UserDataScript:
Description: "File name for user-data script"
Type: String
Default: "NONE"
Conditions:
AddPolicy: !Not [!Equals [ !Ref PolicyArn, "NONE" ]]
AddUserData: !Not [!Equals [ !Ref UserDataScript, "NONE" ]]
Resources:
BootStrapC9StateMachine:
Type: AWS::Serverless::StateMachine
Properties:
Definition:
StartAt: Environment Health Check
States:
Environment Health Check:
Type: Task
Resource: ${EnvironmentHealthCheckFunctionArn}
Retry:
- ErrorEquals:
- States.TaskFailed
IntervalSeconds: 45
MaxAttempts: 15
BackoffRate: 1.5
Next: Get or Create IAM Role
Get or Create IAM Role:
Type: Task
Resource: ${GetOrCreateIAMRoleFunctionArn}
Retry:
- ErrorEquals:
- States.TaskFailed
IntervalSeconds: 30
MaxAttempts: 5
BackoffRate: 1.5
Next: Wait Two Minutes
Wait Two Minutes:
Type: Wait
Seconds: 120
Next: Send Command
Send Command:
Type: Task
Resource: ${SendCommandFunctionArn}
Retry:
- ErrorEquals:
- States.TaskFailed
IntervalSeconds: 90
MaxAttempts: 10
BackoffRate: 1.5
Next: Wait To Stabilize
Wait To Stabilize:
Type: Task
Resource: ${WaitForStabilizeFunctionArn}
Retry:
- ErrorEquals:
- States.TaskFailed
IntervalSeconds: 15
MaxAttempts: 5
BackoffRate: 1.5
End: true
DefinitionSubstitutions:
EnvironmentHealthCheckFunctionArn: !GetAtt EnvironmentHealthCheckFunction.Arn
GetOrCreateIAMRoleFunctionArn: !GetAtt GetOrCreateIAMRoleFunction.Arn
SendCommandFunctionArn: !GetAtt SendCommandFunction.Arn
WaitForStabilizeFunctionArn: !GetAtt WaitForStabilizeFunction.Arn
Events:
NewCloud9Event:
Type: EventBridgeRule
Properties:
Pattern:
source:
- aws.tag
detail-type:
- Tag Change on Resource
detail:
changed-tag-keys:
- aws:cloudformation:stack-name
- aws:cloud9:environment
- aws:cloudformation:stack-id
- aws:cloudformation:logical-id
- aws:cloud9:owner
service:
- ec2
resource-type:
- instance
version:
- 1
Policies:
- LambdaInvokePolicy:
FunctionName: !Ref EnvironmentHealthCheckFunction
- LambdaInvokePolicy:
FunctionName: !Ref GetOrCreateIAMRoleFunction
- LambdaInvokePolicy:
FunctionName: !Ref SendCommandFunction
- LambdaInvokePolicy:
FunctionName: !Ref WaitForStabilizeFunction
EnvironmentHealthCheckFunction:
Type: AWS::Serverless::Function
Properties:
InlineCode: |
import boto3
import time
c9_client = boto3.client('cloud9')
ec2_client = boto3.client('ec2')
def lambda_handler(event, context):
instance_id = event['resources'][0].split("/")[1]
environment_id = event['detail']['tags']['aws:cloud9:environment']
environment_status = c9_client.describe_environment_status(environmentId=environment_id)
instance_status = ec2_client.describe_instances(InstanceIds=[instance_id])
try:
state = instance_status['Reservations'][0]['Instances'][0]['State']['Name']
except Exception as _:
state = 'UNKNOWN'
while environment_status['status'] != 'ready' and state != 'running':
print("sleeping")
time.sleep(10)
environment_status = c9_client.describe_environment_status(environmentId=environment_id)
instance_status = ec2_client.describe_instances(InstanceIds=[instance_id])
try:
state = instance_status['Reservations'][0]['Instances'][0]['State']['Name']
except Exception as e:
print(e)
state = 'UNKNOWN'
return {'instance_id': instance_id, 'environment_id': environment_id}
Handler: index.lambda_handler
Runtime: python3.6
Timeout: 900
Policies:
- Statement:
- Sid: Cloud9
Effect: Allow
Action:
- "cloud9:DescribeEnvironmentStatus"
Resource: "*"
- Sid: EC2
Effect: Allow
Action:
- "ec2:DescribeInstances"
Resource: "*"
GetOrCreateIAMRoleFunction:
Type: AWS::Serverless::Function
Properties:
InlineCode: |
import boto3
import json
import os
from time import sleep
iam_client= boto3.client('iam')
ec2_client = boto3.client('ec2')
def build_arn(profile_name: str) -> str:
account_id = boto3.client('sts').get_caller_identity()['Account']
return f'arn:aws:iam::{account_id}:instance-profile/{profile_name}'
def lambda_handler(event, context):
print(event)
instance_id = event["instance_id"]
environment_id = event["environment_id"]
role_name = f'cloud9-profile-{environment_id}'
instance_profile_name = f'cloud9-profile-{environment_id}'
try:
iam_client.create_role(
Path='/',
RoleName=role_name,
AssumeRolePolicyDocument=json.dumps(
{
'Version': '2012-10-17',
'Statement': {
'Effect': 'Allow',
'Principal': {'Service': 'ec2.amazonaws.com'},
'Action': 'sts:AssumeRole'
}
}),
Description='EC2 Instance Profile Role'
)
sleep(30)
except iam_client.exceptions.EntityAlreadyExistsException as _:
pass
try:
policy_arn = os.environ['PolicyArn']
iam_client.attach_role_policy(
RoleName=role_name,
PolicyArn=policy_arn
)
except Exception as _:
pass
try:
iam_client.attach_role_policy(
RoleName=role_name,
PolicyArn='arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore'
)
iam_client.attach_role_policy(
RoleName=role_name,
PolicyArn='arn:aws:iam::aws:policy/CloudWatchAgentServerPolicy'
)
# TODO Create Policy for resizing EBS
except Exception as e:
raise e
try:
create_instance_profile_response = iam_client.create_instance_profile(InstanceProfileName=instance_profile_name)
print(create_instance_profile_response)
sleep(30)
except iam_client.exceptions.EntityAlreadyExistsException as _:
pass
try:
response = iam_client.add_role_to_instance_profile(
InstanceProfileName=instance_profile_name,
RoleName=role_name
)
print(response)
response = ec2_client.associate_iam_instance_profile(
IamInstanceProfile={'Name': instance_profile_name},
InstanceId=instance_id
)
print(response)
except iam_client.exceptions.LimitExceededException as e:
print(e)
response = ec2_client.describe_iam_instance_profile_associations(Filters=[{'Name': 'instance-id','Values': [instance_id]},{'Name': 'state','Values': ['associated']}])
print(response)
while len(response['IamInstanceProfileAssociations']) < 1:
print("waiting for association to finish")
sleep(30)
response = ec2_client.describe_iam_instance_profile_associations(Filters=[{'Name': 'instance-id','Values': [instance_id]},{'Name': 'state','Values': ['associated']}])
print("Stopping instance")
response = ec2_client.stop_instances(InstanceIds=[instance_id])
print(response)
environment_info = {
"instance_id": instance_id,
"environment_id": environment_id,
"role_name": role_name,
"instance_profile_name": instance_profile_name
}
return environment_info
Handler: index.lambda_handler
Runtime: python3.6
Timeout: 900
Environment: !If
- AddPolicy
- Variables:
PolicyArn: !Ref PolicyArn
- !Ref "AWS::NoValue"
Policies:
- Statement:
- Sid: ResizePolicy
Effect: Allow
Action:
- "*"
Resource: "*"
SendCommandFunction:
Type: AWS::Serverless::Function
Properties:
InlineCode: |
import boto3
import json
import os
from io import BytesIO
ssm_client = boto3.client('ssm')
s3_resource = boto3.resource('s3')
def get_preamble():
return f"""
IN=$(curl http://169.254.169.254/latest/meta-data//hostname)
arrIN=(${{IN//./ }})
aws configure set profile.default.region ${{arrIN[1]}}
SIZE={int(os.environ.get('VolumeSize', '25'))}
# Get the ID of the environment host Amazon EC2 instance.
INSTANCEID=$(curl http://169.254.169.254/latest/meta-data//instance-id)
# Get the ID of the Amazon EBS volume associated with the instance.
VOLUMEID=$(aws ec2 describe-instances \
--instance-id $INSTANCEID \
--query "Reservations[0].Instances[0].BlockDeviceMappings[0].Ebs.VolumeId" \
--output text)
# Resize the EBS volume.
aws ec2 modify-volume --volume-id $VOLUMEID --size $SIZE
# Wait for the resize to finish.
while [ \
"$(aws ec2 describe-volumes-modifications \
--volume-id $VOLUMEID \
--filters Name=modification-state,Values="optimizing","completed" \
--query "length(VolumesModifications)"\
--output text)" != "1" ]; do
sleep 1
done
if [ $(readlink -f /dev/xvda) = "/dev/xvda" ]
then
# Rewrite the partition table so that the partition takes up all the space that it can.
sudo growpart /dev/xvda 1
# Expand the size of the file system.
sudo resize2fs /dev/xvda1
else
# Rewrite the partition table so that the partition takes up all the space that it can.
sudo growpart /dev/nvme0n1 1
# Expand the size of the file system.
sudo resize2fs /dev/nvme0n1p1
fi
"""
def ssm_ready(ssm_client, instance_id):
try:
response = ssm_client.describe_instance_information(Filters=[{'Key': 'InstanceIds', 'Values': [instance_id]}])
return len(response['InstanceInformationList'])>=1
except ssm_client.exceptions.InvalidInstanceId:
return False
def instance_ready(instance_id):
ec2_client = boto3.client('ec2')
response = ec2_client.describe_instances(InstanceIds=[instance_id])
try:
state = response['Reservations'][0]['Instances'][0]['State']['Name']
except Exception as e:
print(e)
return False
if state == 'stopped':
print("restarting instance")
response = ec2_client.start_instances(InstanceIds=[instance_id])
return False
if state in ['stopping', 'pending']:
return False
if state == 'running':
return True
raise Exception("IDK what's happening")
def lambda_handler(event, context):
print(event)
instance_id = event["instance_id"]
environment_id = event["environment_id"]
role_name = event["role_name"]
instance_profile_name = event['instance_profile_name']
if not instance_ready(instance_id):
raise Exception("Instance not ready yet")
if not ssm_ready(ssm_client, instance_id):
raise Exception("We're not ready yet")
try:
print(os.environ["S3Bucket"])
print(os.environ["S3Object"])
bucket = s3_resource.Bucket(os.environ["S3Bucket"])
obj = bucket.Object(os.environ["S3Object"])
output = BytesIO()
obj.download_fileobj(output)
commands = get_preamble() + '\n' + output.getvalue().decode('utf-8') + '\n'
except Exception as e:
print(e)
commands = get_preamble()
send_command_response = ssm_client.send_command(
InstanceIds=[instance_id],
DocumentName='AWS-RunShellScript',
Parameters={'commands': commands.split('\n')},
CloudWatchOutputConfig={
'CloudWatchLogGroupName': f'ssm-output-{instance_id}',
'CloudWatchOutputEnabled': True
}
)
environment_info = {
"instance_id": instance_id,
"command_id": send_command_response['Command']['CommandId']
}
return environment_info
Handler: index.lambda_handler
Runtime: python3.6
Environment:
Variables:
VolumeSize: !Ref EBSVolumeSize
S3Bucket: !If
- AddUserData
- !Ref BucketName
- !Ref "AWS::NoValue"
S3Object: !If
- AddUserData
- S3Object: !Ref UserDataScript
- !Ref "AWS::NoValue"
Policies:
- Statement:
- Sid: ResizePolicy
Effect: Allow
Action:
- "*"
Resource: "*"
WaitForStabilizeFunction:
Type: AWS::Serverless::Function
Properties:
InlineCode: |
import boto3
def lambda_handler(event, context):
command_id = event["command_id"]
instance_id = event["instance_id"]
ssm_client = boto3.client('ssm')
response = ssm_client.get_command_invocation(CommandId=command_id, InstanceId=instance_id)
if response['Status'] in ['Pending', 'InProgress', 'Delayed']:
raise Exception("Command in Progress")
else:
return
Handler: index.lambda_handler
Runtime: python3.6
Policies:
- Statement:
- Sid: ResizePolicy
Effect: Allow
Action:
- "ssm:GetCommandInvocation"
Resource: "*"
CustomFunction:
Type: AWS::Serverless::Function
Properties:
InlineCode: |
import boto3
import json
import cfnresponse
import os
def lambda_handler(event, context):
try:
client = boto3.client('events')
response = client.describe_rule(Name=os.environ.get('RuleName'))
json_event = json.loads(response['EventPattern'])
json_event['detail']['version'] = [1]
response = client.put_rule(Name=response['Name'],EventPattern=json.dumps(json_event),State=response['State'])
cfnresponse.send(event, context, cfnresponse.SUCCESS, response, event["RequestId"])
except Exception as _:
cfnresponse.send(event, context, cfnresponse.SUCCESS, {}, event["RequestId"])
Handler: index.lambda_handler
Runtime: python3.6
Environment:
Variables:
RuleName: !Ref BootStrapC9StateMachineNewCloud9Event
Policies:
- Statement:
- Sid: EBPolicy
Effect: Allow
Action:
- events:DescribeRule
- events:PutRule
Resource: !GetAtt BootStrapC9StateMachineNewCloud9Event.Arn
CustomResource:
Type: Custom::CustomResource
Properties:
ServiceToken: !GetAtt CustomFunction.Arn
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment