Skip to content

Instantly share code, notes, and snippets.

@gnilchee
Last active January 24, 2018 07:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gnilchee/cc852bd8519b4c4e656201ffa171f00a to your computer and use it in GitHub Desktop.
Save gnilchee/cc852bd8519b4c4e656201ffa171f00a to your computer and use it in GitHub Desktop.
Rotate out instances by bring up new instances making sure they are healthy and removing the old ones
#!/usr/bin/env python
import boto3
from time import sleep
from concurrent.futures import ProcessPoolExecutor, wait, as_completed
def get_asg_instances(asg_name):
'''
Responds with list of instance dictionaries that provides the following
:InstanceId - instance id
:HealthStatus - Check Status for ec2
:AvailabilityZone - what AZ this is launched into
'''
try:
asg_client = boto3.client('autoscaling')
response = asg_client.describe_auto_scaling_groups(
AutoScalingGroupNames=[
asg_name,
],
)
return response['AutoScalingGroups'][0]
except Exception as err:
raise SystemExit("ASG {asg} - get_asg_instances failed due to: {err}"
.format(asg=asg_name, err=err))
def get_instance_health(elb_name):
'''
Responds with dictionary containing the following
{InstanceId: State,...}
'''
try:
elb_client = boto3.client('elb')
response = elb_client.describe_instance_health(
LoadBalancerName=elb_name
)
response_dict = {}
for state in response['InstanceStates']:
response_dict[state['InstanceId']] = state['State']
return response_dict
except Exception as err:
raise SystemExit("ELB {elb} - get_instance_health failed due to: {err}".format(elb=elb_name, err=err))
def scale_asg(asg_name, asg_min=2, asg_desired=2, asg_max=8):
'''
Sets desired and minimum capacity during scale out or scale in
Returns status code of the API call to confirm success
'''
try:
asg_client = boto3.client('autoscaling')
response = asg_client.update_auto_scaling_group(
AutoScalingGroupName=asg_name,
MinSize=asg_min,
DesiredCapacity=asg_desired,
MaxSize=asg_max,
)
return response['ResponseMetadata']['HTTPStatusCode']
except Exception as err:
raise SystemExit("ASG {asg} - scale_asg failed due to: {err}"
.format(asg=asg_name, err=err))
def wait_for_elb_health(elb_name, timeout=10):
'''
Waits for health of the instance from get_instance_health function up
unit the timeout set.
'''
try:
retries=timeout*4
print("ELB {elb}: Waiting up to {tmout}min for Instances to become healthy"
.format(elb=elb_name, tmout=timeout))
while retries > 0:
health = get_instance_health(elb_name)
if 'OutOfService' in health.values():
print("ELB {}: Sleeping 15 seconds".format(elb_name))
sleep(15)
retries -= 1
else:
print("ELB {} - Instances are healthy".format(elb_name))
break
except Exception as err:
raise SystemExit("ELB {elb} - wait_for_elb_health failed due to: {err}"
.format(elb=elb_name, err=err))
def wait_for_asg_health(asg_name, expected_instances, timeout=5):
'''
Waits for health of ASG once scale_asg is run until all instances are
InService or returns helper text as to which condition its waiting on
'''
try:
retries=timeout*4
transition_states = ['Pending', 'Pending', 'Terminating']
print("ASG {asg}: Waiting up to {tmout}min for Instances to scale."
.format(asg=asg_name, tmout=timeout))
while retries > 0:
health_dict = {}
health = get_asg_instances(asg_name)
for server in health['Instances']:
health_dict[server['InstanceId']] = server['LifecycleState']
if len(health['Instances']) != expected_instances:
print("ASG {} NotExpectedLength: Sleeping 15 seconds".format(asg_name))
sleep(15)
retries -= 1
elif 'Pending' in health_dict.values():
print("ASG {} NotInService: Sleeping 15 seconds".format(asg_name))
sleep(15)
retries -= 1
else:
print("ASG {} scaled successfully".format(asg_name))
break
except Exception as err:
raise SystemExit("ASG {asg}: wait_for_asg_health failed due to: {err}".format(asg=asg_name, err=err))
def rotate_asg_now(asg_name, elb_name):
'''
Basically the :main function but named for action its completing.
'''
# Scale out
response = get_asg_instances(asg_name)
orig_min = response['MinSize']
orig_desired = response['DesiredCapacity']
orig_max = response['MaxSize']
#orig_num = len(response)
target_val = orig_desired*2
if target_val > orig_max:
target_max = target_val
else:
target_max = orig_max
orig_instances = []
for instance in response['Instances']:
orig_instances.append(instance['InstanceId'])
grow_asg_resp = scale_asg(asg_name, asg_min=target_val, asg_desired=target_val, asg_max=target_max)
if grow_asg_resp != 200:
raise SystemExit("Unexpected Response Code growing ASG")
wait_for_asg_health(asg_name, expected_instances=target_val, timeout=5)
wait_for_elb_health(elb_name, timeout=10)
# Scale in
response = get_asg_instances(asg_name)
instances_to_protect = []
for instances in response['Instances']:
if instances['InstanceId'] not in orig_instances:
instances_to_protect.append(instances['InstanceId'])
try:
asg_client = boto3.client('autoscaling')
protect_response = asg_client.set_instance_protection(
InstanceIds=instances_to_protect,
AutoScalingGroupName=asg_name,
ProtectedFromScaleIn=True
)
except Exception as err:
raise SystemExit("Issue setting instance protection due to: {}".format(err))
shrink_asg_resp = scale_asg(asg_name, asg_min=orig_min, asg_desired=orig_desired, asg_max=orig_max)
if shrink_asg_resp != 200:
raise SystemExit("Unexpected Response Code growing ASG")
wait_for_asg_health(asg_name, expected_instances=orig_desired, timeout=5)
response = get_asg_instances(asg_name)
instances_to_unprotect = []
for instance in response['Instances']:
instances_to_unprotect.append(instance['InstanceId'])
try:
asg_unprotect_client = boto3.client('autoscaling')
unprotect_response = asg_unprotect_client.set_instance_protection(
InstanceIds=instances_to_unprotect,
AutoScalingGroupName=asg_name,
ProtectedFromScaleIn=False
)
except Exception as err:
raise SystemExit("ASG {asg}: Issue unsetting instance protection due to: {err}"
.format(asg=asg_name, err=err))
if __name__ == '__main__':
try:
print("Autoscale Rotation Beginning")
pool = ProcessPoolExecutor(max_workers=2)
asg_dict = {'autoscale-group-1': 'autoscale-group-1-lb', 'autoscale-group-2': 'autoscale-group-2-lb'}
futures = [pool.submit(rotate_asg_now, asg_name=key, elb_name=value) for key, value in asg_dict.iteritems()]
results = [r.result() for r in as_completed(futures)]
print("Autoscale Rotation Successful")
except Exception as err:
raise SystemExit("There was an issue due to: {}".format(err))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment