Skip to content

Instantly share code, notes, and snippets.

@homingli
Created January 14, 2017 00:54
Show Gist options
  • Save homingli/764fad2263947a0d1243cee12b3f9c22 to your computer and use it in GitHub Desktop.
Save homingli/764fad2263947a0d1243cee12b3f9c22 to your computer and use it in GitHub Desktop.
Simple Opt-In Chaos Function
# choasfcn (python 2.7)
# purpose: test resiliency of the EC2 environment (opt-in with tag)
# implementation:
# 0) leverage CW Events to schedule trigger this function
# 1) retrieves list of instances that is participating
# 2) terminates a random sample of the participating instances
# TODO
# error handling
# no safe guard (ex. deleting all instances related to the same application)
# if desired, modify trigger for a less predictable run schedule
import boto3,random,json,logging
logger = logging.getLogger()
logger.setLevel(logging.INFO)
def lambda_handler(event, context):
logger.info('got event {}'.format(json.dumps(event)))
ec2 = boto3.resource('ec2')
# config
NUM_TO_KILL = 1 # can be fixed, or calculated (% of total fleet)
VPC_IDs = ['vpc-700cf415'] # comma separated list of VPCs, empty list for all VPCs
TAG_KEY = 'chaos-me' # opt-in with tag key, unset (comment line) to include all instances
# default participating criteria (running)
criteria = [{'Name': 'instance-state-name', 'Values': ['running']}]
# additional criteria
# opt-in with tag key
try:
TAG_KEY
except NameError:
pass
else:
criteria.append({'Name':'tag-key', 'Values':[TAG_KEY]})
# limit to specific vpc
if len(VPC_IDs) > 0:
criteria.append({'Name':'vpc-id', 'Values':VPC_IDs})
# see filters: https://boto3.readthedocs.io/en/latest/reference/services/ec2.html#EC2.Client.describe_instances
### 1) retrieves list of instances that is participating ###
logger.info('Criteria: {}'.format(criteria))
instances = ec2.instances.filter(Filters=criteria)
participants = [ins.id for ins in instances]
count = len(participants)
### 2) terminate random sample of the participating instances ###
if count > NUM_TO_KILL:
lucky_indices = random.sample(xrange(count),NUM_TO_KILL)
chosen = [participants[i] for i in lucky_indices]
# print ID of lucky instances
logger.info("Terminating Instances " + ','.join(chosen))
res = ec2.instances.filter(InstanceIds=chosen).terminate()
logger.info(json.dumps(res))
return res
else:
logger.info('Nothing to do!')
return {}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment