Skip to content

Instantly share code, notes, and snippets.

@jonathanwcrane
Last active March 27, 2024 09:42
Show Gist options
  • Star 10 You must be signed in to star a gist
  • Fork 8 You must be signed in to fork a gist
  • Save jonathanwcrane/5a00812201af9ea1222e to your computer and use it in GitHub Desktop.
Save jonathanwcrane/5a00812201af9ea1222e to your computer and use it in GitHub Desktop.
Purge CloudWatch alarms for instances that no longer exist
#Step 1: Get a list of all alarms in INSUFFICIENT_DATA status
#Step 2: Get a list of all instances (stopped and started)
#Step 3: Find all alarms on instances that don't exist, and delete them
###################################################
#Step 1: Get alarms in INSUFFICENT_DATA state
###################################################
#The max that we can get per loop is 100; all alarms for nonexistent instances will be in
#INSUFFICIENT_DATA state so let's just go through those.
insuff_alarms = []
loops = 1
alarms = cw.describe_alarms(StateValue='INSUFFICIENT_DATA',MaxRecords=100)
#print(alarms)
insuff_alarms.extend(alarms['MetricAlarms'])
while ('NextToken' in alarms):
alarms = cw.describe_alarms(StateValue='INSUFFICIENT_DATA',MaxRecords=100,NextToken=alarms['NextToken'])
#print('on loop',loops,'alarms is',alarms)
insuff_alarms.extend(alarms['MetricAlarms'])
loops += 1
print('Looped',loops,'times to generate list of ',len(insuff_alarms),'alarms in state INSUFFICIENT_DATA.')
####################################################
#Step 2: Get all instances
###################################################
#In this case we want all instances. If an instance is stopped, so be it, we don't delete the alarm.
#But if the instance is gone, then....
#Get all alarms
instances = [instance for instance in ec2.instances.all()]
instance_ids = [instance.id for instance in instances]
print('We have',len(instance_ids),'instances in our account right now.')
#print(instance_ids)
state_dict = {}
for inst in ec2.instances.all():
state = inst.state['Name']
if state in state_dict:
state_dict[state] += 1
else:
state_dict[state] = 1
print(state_dict)
###################################################
#Step 3: Find and delete orphan alarms
###################################################
our_dim = 'InstanceId'
num_orphan_alarms = 0
for insuff_alarm in insuff_alarms:
#Dimensions is a list of dicts.
dims = insuff_alarm['Dimensions']
#print(dim)
#print(insuff_alarm)
#print(insuff_alarm,insuff_alarm.namespace,insuff_alarm.dimensions)
inst_id = ''
for dim in dims:
#dim is a dict with two key/values: Name and Value. (yes, it's confusing. Welcome to boto3)
if dim['Name'] == our_dim:
inst_id = dim['Value']
if inst_id:
#this is an instance-level alarm
#print(insuff_alarm.dimensions)
if (inst_id not in instance_ids):
#This is an alarm for an instance that doesn't exist
name = insuff_alarm['AlarmName']
print('Alarm',name,"is for an instance that doesn't exist:",inst_id)
cw.delete_alarms(AlarmNames=[name])
num_orphan_alarms += 1
else:
#print(insuff_alarm.keys())
print(insuff_alarm['AlarmName'],'has dimensions',dims)
print(num_orphan_alarms,'orphan alarms found and deleted.')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment