ronardcaktus/sample_cloudwatch.yml

## sample_cloudwatch.yml
---
- name: kubernetes cluster management
  hosts: cluster
  vars:
    ansible_connection: local
    ansible_python_interpreter: "{{ ansible_playbook_python }}"
  gather_facts: false
  roles:
    - role: caktus-role
  tasks:
    - name: Create Amazon CloudWatch Metrics namespace
      tags: cloudwatch
      community.kubernetes.k8s:
        context: "{{ k8s_context|mandatory }}"
        kubeconfig: "{{ k8s_kubeconfig }}"
        name: amazon-cloudwatch
        api_version: v1
        kind: Namespace
        state: present
    - name: Add AWS CloudWatch Metrics helm chart (monitoring)
      tags: cloudwatch
      community.kubernetes.helm:
        context: "{{ k8s_context|mandatory }}"
        kubeconfig: "{{ k8s_kubeconfig }}"
        chart_repo_url: "https://aws.github.io/eks-charts"
        chart_ref: aws-cloudwatch-metrics
        # https://artifacthub.io/packages/helm/aws/aws-cloudwatch-metrics
        chart_version: "0.0.9"
        release_name: aws-cloudwatch-metrics
        release_namespace: amazon-cloudwatch
        release_values:
          clusterName: ronard-cluster
        wait: yes
    - name: Create alarms
      tags: cloudwatch
      amazon.aws.cloudwatch_metric_alarm:
        state: present
        region: us-east-1
        name: "{{ item.name }}"
        description: "{{ item.description }}"
        metric: "{{ item.metric }}"
        namespace: "ContainerInsights"
        dimensions:
          ClusterName: ronard-cluster
        statistic: Average
        comparison: "{{ item.comparison }}"
        threshold: "{{ item.threshold }}"
        period: "{{ item.period }}"
        evaluation_periods: "{{ item.evaluation_periods }}"
        alarm_actions:
          - arn:aws:sns:us-east-1:0000fake_nums0000:MyApplication_Name_Errors_CloudWatch_Alarms_Topic
      loop:
        - name: node-cpu-high
          description: This will alarm when a instance's CPU usage average is greater than 50% for 15 minutes.
          metric: node_cpu_utilization
          comparison: GreaterThanOrEqualToThreshold
          threshold: 50
          period: 300
          evaluation_periods: 3
        - name: node-count-low
          description: This will alarm when a cluster's node count drops below 2 for 15 minutes.
          metric: cluster_node_count
          comparison: LessThanThreshold
          threshold: 2
          period: 300
          evaluation_periods: 3
	---
	- name: kubernetes cluster management
	hosts: cluster
	vars:
	ansible_connection: local
	ansible_python_interpreter: "{{ ansible_playbook_python }}"
	gather_facts: false
	roles:
	- role: caktus-role
	tasks:
	- name: Create Amazon CloudWatch Metrics namespace
	tags: cloudwatch
	community.kubernetes.k8s:
	context: "{{ k8s_context\|mandatory }}"
	kubeconfig: "{{ k8s_kubeconfig }}"
	name: amazon-cloudwatch
	api_version: v1
	kind: Namespace
	state: present
	- name: Add AWS CloudWatch Metrics helm chart (monitoring)
	tags: cloudwatch
	community.kubernetes.helm:
	context: "{{ k8s_context\|mandatory }}"
	kubeconfig: "{{ k8s_kubeconfig }}"
	chart_repo_url: "https://aws.github.io/eks-charts"
	chart_ref: aws-cloudwatch-metrics
	# https://artifacthub.io/packages/helm/aws/aws-cloudwatch-metrics
	chart_version: "0.0.9"
	release_name: aws-cloudwatch-metrics
	release_namespace: amazon-cloudwatch
	release_values:
	clusterName: ronard-cluster
	wait: yes
	- name: Create alarms
	tags: cloudwatch
	amazon.aws.cloudwatch_metric_alarm:
	state: present
	region: us-east-1
	name: "{{ item.name }}"
	description: "{{ item.description }}"
	metric: "{{ item.metric }}"
	namespace: "ContainerInsights"
	dimensions:
	ClusterName: ronard-cluster
	statistic: Average
	comparison: "{{ item.comparison }}"
	threshold: "{{ item.threshold }}"
	period: "{{ item.period }}"
	evaluation_periods: "{{ item.evaluation_periods }}"
	alarm_actions:
	- arn:aws:sns:us-east-1:0000fake_nums0000:MyApplication_Name_Errors_CloudWatch_Alarms_Topic
	loop:
	- name: node-cpu-high
	description: This will alarm when a instance's CPU usage average is greater than 50% for 15 minutes.
	metric: node_cpu_utilization
	comparison: GreaterThanOrEqualToThreshold
	threshold: 50
	period: 300
	evaluation_periods: 3
	- name: node-count-low
	description: This will alarm when a cluster's node count drops below 2 for 15 minutes.
	metric: cluster_node_count
	comparison: LessThanThreshold
	threshold: 2
	period: 300
	evaluation_periods: 3