Skip to content

Instantly share code, notes, and snippets.

@nirbhabbarat
Created February 12, 2018 17:34
Show Gist options
  • Save nirbhabbarat/6ec53599e1d7fadfc0203c1cb1b43f9f to your computer and use it in GitHub Desktop.
Save nirbhabbarat/6ec53599e1d7fadfc0203c1cb1b43f9f to your computer and use it in GitHub Desktop.
Create status check and 90% CPU alert on all EC2 machines in AWS
#!/usr/bin/env python
# find REPLACE_WITH_ARN and replace with real SNS ARN
import boto3
from dateutil.parser import parse
import datetime
aws_profile_name = 'prod'
def get_instance_name(fid):
instancename = ''
for tags in fid['Tags']:
if tags["Key"] == 'Name':
instancename = tags["Value"]
return instancename
def delete_cloudwatch_alarm(instance_id, instance_name):
cloudwatch_conn = boto3.client('cloudwatch')
status_name = instance_id + '-' + instance_name + "-status-alarm"
cpu_name = instance_id + '-' + instance_name + "-90%-CPU_Utilization"
cloudwatch_conn.delete_alarms(
AlarmNames=[
status_name, cpu_name
]
)
print "Deleting - " + status_name
print "Deleting - " + cpu_name
def create_status_alarm(instance_id, instance_name):
cloudwatch_conn = boto3.client('cloudwatch')
cloudwatch_conn.put_metric_alarm(
AlarmName = instance_id + '-' + instance_name + "-status-alarm",
ComparisonOperator='GreaterThanOrEqualToThreshold',
EvaluationPeriods=2,
MetricName='StatusCheckFailed',
Namespace='AWS/EC2',
Period=60,
Statistic='Maximum',
Threshold=1.0,
AlarmDescription='Status check for ' + instance_id + '-' + instance_name,
OKActions = ['REPLACE_WITH_ARN'],
AlarmActions = ['REPLACE_WITH_ARN'],
Dimensions=[
{
'Name': 'InstanceId',
'Value': instance_id
},
],
)
print "Creating - " + instance_id + '-' + instance_name + "-status-alarm"
cloudwatch_conn.put_metric_alarm(
AlarmName = instance_id + '-' + instance_name + "-90%-CPU_Utilization",
ComparisonOperator='GreaterThanOrEqualToThreshold',
EvaluationPeriods=1,
MetricName='CPUUtilization',
Namespace='AWS/EC2',
Period=300,
Statistic='Maximum',
Threshold=90.0,
OKActions = ['arn:aws:sns:ap-southeast-1:928401551325:AllowSNSToSlackExecutionFromSNS'],
AlarmActions = ['arn:aws:sns:ap-southeast-1:928401551325:AllowSNSToSlackExecutionFromSNS'],
AlarmDescription='Alarm when server CPU exceeds 90% for ' + instance_id + '-' + instance_name,
Dimensions=[
{
'Name': 'InstanceId',
'Value': instance_id
},
],
Unit='Percent'
)
print "Creating - " + instance_id + '-' + instance_name + "-90%-CPU_Utilization"
boto3.setup_default_session(profile_name = aws_profile_name)
ec2 = boto3.client('ec2')
instances = ec2.describe_instances()
for reservation in instances["Reservations"]:
for instance in reservation["Instances"]:
instance_name = get_instance_name(instance)
if(instance["State"]["Name"] == "running"):
create_status_alarm(instance["InstanceId"], instance_name)
else:
delete_cloudwatch_alarm(instance["InstanceId"], instance_name)
# break
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment