Skip to content

Instantly share code, notes, and snippets.

server:
log_level: info
metrics:
global:
scrape_interval: 1m
remote_write:
- url: ${URL}
basic_auth:
username: ${USERNAME}
...
/aws/lambda/drain-ecs-lambda 2019/03/14/[$LATEST]fbb61cba26b245de85f0d2c7a59b118f [INFO] 2019-03-14T23:50:23.638Z ad7258ee-7bc7-40f2-8721-105d99001367 Starting execution
/aws/lambda/drain-ecs-lambda 2019/03/14/[$LATEST]fbb61cba26b245de85f0d2c7a59b118f [INFO] 2019-03-14T23:50:23.638Z ad7258ee-7bc7-40f2-8721-105d99001367 Found container instance id arn:aws:ecs:ap-southeast-2:677050795231:container-instance/d48a81e1-3d92-4803-b2d5-5c45a1cc4bd6 in cluster arn:aws:ecs:ap-southeast-2:677050795231:cluster/demo-cluster
/aws/lambda/drain-ecs-lambda 2019/03/14/[$LATEST]fbb61cba26b245de85f0d2c7a59b118f [INFO] 2019-03-14T23:50:23.638Z ad7258ee-7bc7-40f2-8721-105d99001367 Getting tasks running on arn:aws:ecs:ap-southeast-2:677050795231:container-instance/d48a81e1-3d92-4803-b2d5-5c45a1cc4bd6...
/aws/lambda/drain-ecs-lambda 2019/03/14/[$LATEST]fbb61cba26b245de85f0d2c7a59b118f [INFO] 2019-03-14T23:50:23.639Z ad7258ee-7bc7-40f2-8721-105d99001367 Resetting dropped connection: ecs.ap-southeast-2.amazonaws.com
/aws/lambda/dr
module "ecs_update_lambdas" {
source = "git::https://github.com/xero-oss/ecs-cluster-update-lambda.git//src"
region = "${var.region}"
}
# Send notifications to the SNS topic created by ecs_update_lambdas module on all important Auto Scaling events
resource "aws_autoscaling_notification" "asg-terminate" {
group_names = [
"${aws_autoscaling_group.ecs-autoscaling-group.name}"
]
resource "aws_ecs_cluster" "demo-ecs-cluster" {
name = "${var.ecs_cluster}"
}
# TODO convert to aws_launch_template https://www.terraform.io/docs/providers/aws/r/launch_template.html
resource "aws_launch_configuration" "ecs-launch-configuration" {
# Notice create_before_destroy lifecycle setting and name_prefix. As we can’t create a new resource with
# the same name as the old one, we don’t hard-code the name and only specify the prefix. Terraform adds
# a random postfix to it, so the new configuration doesn’t clash with the old one before it is destroyed.
name_prefix = "demo-cluster-lc"
# image_id = "${data.aws_ami.latest_ecs.id}"
slack_configs:
- api_url: "<slack_auth url here>"
channel: '#devops'
send_resolved: true
title: "{{ .GroupLabels.alertname }} alert is {{ .Status|toUpper }} in {{ .CommonLabels.env }}"
text: |
Affected instances {{ range .Alerts }}{{ .Labels.instance }}/{{ .Labels.job }} {{ .Labels.target }} {{ end }}
{{ .CommonAnnotations.description }}
See http://docs.airtame.cloud/alerts/{{ .GroupLabels.alertname }}
curl -XPOST -d"$alerts" http://<alertmanager address here>:9093/api/v1/alerts
@eskp
eskp / send_alert.sh
Last active December 4, 2017 13:32
alerts='[
{
"status": "resolved",
"labels": {
"alertname": "InstanceHighCpu",
"instance": "dev-foo",
"env": "dev",
"job": "ec2_instances"
},
"annotations": {
ALERT InstanceHighCpu
IF 100 - (avg by (instance) (irate(node_cpu{mode="idle"}[5m])) * 100) > 90
FOR 20m
ANNOTATIONS {
summary = "High CPU Usage on {{ $labels.instance }}",
description = "CPU usage exceeds threshold (currently {{ $value|humanize }}% in use)",
}
relabel_configs:
# Only monitor instances with a Name starting with the regex
- source_labels: [__meta_ec2_tag_Name]
regex: prod-instance.*
action: keep
relabel_configs:
# Use the instance tag as the instance label
- source_labels: [__meta_ec2_tag_Name]
target_label: instance