jaycdave88/Datadog + Ansible + Applications running in Docker

## Datadog + Ansible + Applications running in Docker
- hosts: servers
  roles:
    - { role: Datadog.datadog, become: yes }
  vars:
    datadog_api_key: "123456"
    datadog_agent_version: "1:6.0.0-1" # for apt-based platforms, use a `6.0.0-1` format on yum-based platforms
    datadog_config:
      tags: "mytag0, mytag1"
      log_level: INFO
      apm_enabled: "true" # has to be set as a string
      logs_enabled: true  # log collection is available on agent 6
    datadog_config_ex:
      trace.config:
        env: dev
      trace.concentrator:
        extra_aggregators: version
    datadog_checks:
      process:
        init_config:
        instances:
          - name: ssh
            search_string: ['ssh', 'sshd' ]
          - name: syslog
            search_string: ['rsyslog' ]
            cpu_check_interval: 0.2
            exact_match: true
            ignore_denied_access: true
      docker:
        init_config:
          # Change the root directory to look at to get cgroup statistics. Useful when running inside a
          # container with host directories mounted on a different folder. Default: /.
          # Example for the docker-dd-agent container:
          # docker_root: /host

          # Timeout in seconds for the connection to the docker daemon
          # Default: 5 seconds
          #
          # timeout: 10

          # The version of the API the client will use. Specify 'auto' to use the API version provided by the server.
          # api_version: auto

          # Use TLS encryption while communicating with the Docker API
          #
          # tls: False
          # tls_client_cert: /path/to/client-cert.pem
          # tls_client_key: /path/to/client-key.pem
          # tls_cacert: /path/to/ca.pem
          # tls_verify: True

          # Initialization retries
          #
          # if the agent is expected to start before Docker,
          # use these settings to configure the retry policy.
          #
          # init_retry_interval defines how long (in seconds) the docker client
          # will wait before retrying initialization.
          # Defaults to 0.
          #
          # init_retry_interval: 20
          #
          # init_retries configures how many retries are made before failing permanently.
          # Defaults to 0.
          #
          # init_retries: 5

        instances:
          - ## Daemon and system configuration
            ##

            # URL of the Docker daemon socket to reach the Docker API. HTTP/HTTPS also works.
            # Warning: if that's a non-local daemon, we won't be able to collect performance metrics.
            #
            url: "unix://var/run/docker.sock"
            ##NOTE: This URL might need to change for Alpine or Ubuntu

            ## Data collection
            ##

            # Create events whenever a container status change.
            # Defaults to true.
            #
            # collect_events: false

            # By default we do not collect events with a status ['top', 'exec_start', 'exec_create', 'exec_die'].
            # Here can be added additional statuses to be filtered.
            # List of available statuses can be found here https://docs.docker.com/engine/reference/commandline/events/#object-types
            # filtered_event_types:
            #    - 'top'
            #    - 'exec_start'
            #    - 'exec_create'
            #    - 'exec_die'

            # Collect disk usage per container with docker.container.size_rw and
            # docker.container.size_rootfs metrics.
            # Warning: This might take time for Docker daemon to generate,
            # ensure that `docker ps -a -q` run fast before enabling it.
            # Defaults to false.
            #
            # collect_container_size: true

            # Do you use custom cgroups for this particular instance?
            # Note: enabling this option modifies the way in which we inspect the containers and causes
            #       some overhead - if you run a high volume of containers we may timeout.
            #
            # custom_cgroups: false

            # Report docker container healthcheck events as service checks
            # Note: enabling this option modifies the way in which we inspect the containers and causes
            #       some overhead - if you run a high volume of containers we may timeout.
            #       Container Healthchecks are available starting with docker 1.12, enabling with older
            #       versions will result in an UNKNOWN state for the service check.
            #
            # You must whitelist the containers you wish to submit health service checks for.
            # Use the same mechanism as the tagging system (see Tag:performance_tags section).
            # Example: ["docker_image:tomcat", "container_name:web_front_nginx"]
            #
            # health_service_check_whitelist: []

            # Collect the container count tagged by state (running, paused, exited, dead)
            # Defaults to false.
            #
            # collect_container_count: true

            # Collect the volume count for attached and dangling volumes.
            # Defaults to false.
            #
            # collect_volume_count: true

            # Collect images stats
            # Number of available active images and intermediate images as gauges.
            # Defaults to false.
            #
            # collect_images_stats: true

            # Collect disk usage per image with docker.image.size and docker.image.virtual_size metrics.
            # The check gets this size with the `docker images` command.
            # Requires collect_images_stats to be enabled.
            # Defaults to false.
            #
            # collect_image_size: true

            # Collect disk metrics (total, used, free) through the docker info command for data and metadata.
            # This is useful when these values can't be obtained by the disk check.
            # Example: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/ecs-optimized_AMI.html
            # Note that it only works when the storage driver is devicemapper.
            # Explanation of these metrics can be found here:
            # https://github.com/docker/docker/blob/v1.11.1/daemon/graphdriver/devmapper/README.md
            # Defaults to false.
            #
            # collect_disk_stats: true

            # Collect containers exit codes and send service checks critical when exit code is not 0
            # Defaults to false.
            #
            # collect_exit_codes: true


            # Exclude containers based on their tags
            # An excluded container will not get any individual container metric reported for it.
            # However it will still appear in the container count since ignoring it here would give
            # a wrong impression about the docker daemon load.
            #
            # The rule is a regex on the tags.
            #
            # How it works: exclude first.
            # If a tag matches an exclude rule, it won't be included unless it also matches an include rule.
            # Examples:
            # exclude all, except ubuntu and debian.
            # exclude: ["docker_image:.*"]
            # include: ["docker_image:ubuntu", "docker_image:debian"]
            #
            # include all, except ubuntu and Kubernetes pause containers.
            # exclude: ["docker_image:ubuntu", "image_name:gcr.io/google_containers/pause.*", "image_name:openshift/origin-pod"]
            # include: []
            #
            # Default: include all containers except for Kubernetes pause containers.
            # Warning: pause containers exclusion works only if you deploy the agent the recommended way (in a pod).
            # To customize this default behavior, override exclude.
            # If you do so, default exclusion patterns won't apply anymore and will need to be added explicitly.


            ## Tagging
            ##

            # You can add extra tags to your Docker metrics and ServiceCheck with the tags list option.
            # Example: ["extra_tag", "env:testing"]
            #
            # tags: []

            # If the agent is running in an Amazon ECS task, tags container metrics with the ECS task name and version.
            # Default: true
            #
            # ecs_tags: false

            # Custom metrics tagging
            # Define which Docker tags to apply on metrics.
            # Since it impacts the aggregation, modify it carefully (only if you really need it).
            #
            # Tags for performance metrics.
            # Available:
            #   - image_name: Name of the image (example: "nginx")
            #   - image_tag: Tag of the image (example: "latest")
            #   - docker_image: LEGACY. The full image name:tag string (example: "nginx:latest")
            #   - container_name: Name of the container (example: "boring_euclid")
            #   - container_command: Command ran by the container (example: "echo 1")
            #   - container_id: Id of the container
            #
            # performance_tags: ["container_name", "image_name", "image_tag", "docker_image"]

            # Tags for containers count metrics.
            # Available: ["image_name", "image_tag", "docker_image", "container_command"]
            #
            # container_tags: ["image_name", "image_tag", "docker_image"]

            # Option to tag docker metrics with container label names listed.
            # Takes precedence over docker_labels_as_tags for docker metrics.
            # Only use if you want different labels tagged for autodiscovery and docker_daemon metrics.
            # Default to None
            # Example:
            # collect_labels_as_tags: ["com.docker.compose.service", "com.docker.compose.project"]
            # List of docker event attributes to add as tags of the datadog events
            # Defaults to None.
            #
            # event_attributes_as_tags: ["signal"]

            ## Rate Filtering
            ##

            # Allows ad-hoc spike filtering if the system reports incorrect metrics.
            # This will drop points if the computed rate is higher than the cap value
            # capped_metrics:
            #   docker.cpu.user: 1000
            #   docker.cpu.system: 100
	- hosts: servers
	roles:
	- { role: Datadog.datadog, become: yes }
	vars:
	datadog_api_key: "123456"
	datadog_agent_version: "1:6.0.0-1" # for apt-based platforms, use a `6.0.0-1` format on yum-based platforms
	datadog_config:
	tags: "mytag0, mytag1"
	log_level: INFO
	apm_enabled: "true" # has to be set as a string
	logs_enabled: true # log collection is available on agent 6
	datadog_config_ex:
	trace.config:
	env: dev
	trace.concentrator:
	extra_aggregators: version
	datadog_checks:
	process:
	init_config:
	instances:
	- name: ssh
	search_string: ['ssh', 'sshd' ]
	- name: syslog
	search_string: ['rsyslog' ]
	cpu_check_interval: 0.2
	exact_match: true
	ignore_denied_access: true
	docker:
	init_config:
	# Change the root directory to look at to get cgroup statistics. Useful when running inside a
	# container with host directories mounted on a different folder. Default: /.
	# Example for the docker-dd-agent container:
	# docker_root: /host

	# Timeout in seconds for the connection to the docker daemon
	# Default: 5 seconds
	#
	# timeout: 10

	# The version of the API the client will use. Specify 'auto' to use the API version provided by the server.
	# api_version: auto

	# Use TLS encryption while communicating with the Docker API
	#
	# tls: False
	# tls_client_cert: /path/to/client-cert.pem
	# tls_client_key: /path/to/client-key.pem
	# tls_cacert: /path/to/ca.pem
	# tls_verify: True

	# Initialization retries
	#
	# if the agent is expected to start before Docker,
	# use these settings to configure the retry policy.
	#
	# init_retry_interval defines how long (in seconds) the docker client
	# will wait before retrying initialization.
	# Defaults to 0.
	#
	# init_retry_interval: 20
	#
	# init_retries configures how many retries are made before failing permanently.
	# Defaults to 0.
	#
	# init_retries: 5

	instances:
	- ## Daemon and system configuration
	##

	# URL of the Docker daemon socket to reach the Docker API. HTTP/HTTPS also works.
	# Warning: if that's a non-local daemon, we won't be able to collect performance metrics.
	#
	url: "unix://var/run/docker.sock"
	##NOTE: This URL might need to change for Alpine or Ubuntu

	## Data collection
	##

	# Create events whenever a container status change.
	# Defaults to true.
	#
	# collect_events: false

	# By default we do not collect events with a status ['top', 'exec_start', 'exec_create', 'exec_die'].
	# Here can be added additional statuses to be filtered.
	# List of available statuses can be found here https://docs.docker.com/engine/reference/commandline/events/#object-types
	# filtered_event_types:
	# - 'top'
	# - 'exec_start'
	# - 'exec_create'
	# - 'exec_die'

	# Collect disk usage per container with docker.container.size_rw and
	# docker.container.size_rootfs metrics.
	# Warning: This might take time for Docker daemon to generate,
	# ensure that `docker ps -a -q` run fast before enabling it.
	# Defaults to false.
	#
	# collect_container_size: true

	# Do you use custom cgroups for this particular instance?
	# Note: enabling this option modifies the way in which we inspect the containers and causes
	# some overhead - if you run a high volume of containers we may timeout.
	#
	# custom_cgroups: false

	# Report docker container healthcheck events as service checks
	# Note: enabling this option modifies the way in which we inspect the containers and causes
	# some overhead - if you run a high volume of containers we may timeout.
	# Container Healthchecks are available starting with docker 1.12, enabling with older
	# versions will result in an UNKNOWN state for the service check.
	#
	# You must whitelist the containers you wish to submit health service checks for.
	# Use the same mechanism as the tagging system (see Tag:performance_tags section).
	# Example: ["docker_image:tomcat", "container_name:web_front_nginx"]
	#
	# health_service_check_whitelist: []

	# Collect the container count tagged by state (running, paused, exited, dead)
	# Defaults to false.
	#
	# collect_container_count: true

	# Collect the volume count for attached and dangling volumes.
	# Defaults to false.
	#
	# collect_volume_count: true

	# Collect images stats
	# Number of available active images and intermediate images as gauges.
	# Defaults to false.
	#
	# collect_images_stats: true

	# Collect disk usage per image with docker.image.size and docker.image.virtual_size metrics.
	# The check gets this size with the `docker images` command.
	# Requires collect_images_stats to be enabled.
	# Defaults to false.
	#
	# collect_image_size: true

	# Collect disk metrics (total, used, free) through the docker info command for data and metadata.
	# This is useful when these values can't be obtained by the disk check.
	# Example: https://docs.aws.amazon.com/AmazonECS/latest/developerguide/ecs-optimized_AMI.html
	# Note that it only works when the storage driver is devicemapper.
	# Explanation of these metrics can be found here:
	# https://github.com/docker/docker/blob/v1.11.1/daemon/graphdriver/devmapper/README.md
	# Defaults to false.
	#
	# collect_disk_stats: true

	# Collect containers exit codes and send service checks critical when exit code is not 0
	# Defaults to false.
	#
	# collect_exit_codes: true


	# Exclude containers based on their tags
	# An excluded container will not get any individual container metric reported for it.
	# However it will still appear in the container count since ignoring it here would give
	# a wrong impression about the docker daemon load.
	#
	# The rule is a regex on the tags.
	#
	# How it works: exclude first.
	# If a tag matches an exclude rule, it won't be included unless it also matches an include rule.
	# Examples:
	# exclude all, except ubuntu and debian.
	# exclude: ["docker_image:.*"]
	# include: ["docker_image:ubuntu", "docker_image:debian"]
	#
	# include all, except ubuntu and Kubernetes pause containers.
	# exclude: ["docker_image:ubuntu", "image_name:gcr.io/google_containers/pause.*", "image_name:openshift/origin-pod"]
	# include: []
	#
	# Default: include all containers except for Kubernetes pause containers.
	# Warning: pause containers exclusion works only if you deploy the agent the recommended way (in a pod).
	# To customize this default behavior, override exclude.
	# If you do so, default exclusion patterns won't apply anymore and will need to be added explicitly.



	## Tagging
	##

	# You can add extra tags to your Docker metrics and ServiceCheck with the tags list option.
	# Example: ["extra_tag", "env:testing"]
	#
	# tags: []

	# If the agent is running in an Amazon ECS task, tags container metrics with the ECS task name and version.
	# Default: true
	#
	# ecs_tags: false

	# Custom metrics tagging
	# Define which Docker tags to apply on metrics.
	# Since it impacts the aggregation, modify it carefully (only if you really need it).
	#
	# Tags for performance metrics.
	# Available:
	# - image_name: Name of the image (example: "nginx")
	# - image_tag: Tag of the image (example: "latest")
	# - docker_image: LEGACY. The full image name:tag string (example: "nginx:latest")
	# - container_name: Name of the container (example: "boring_euclid")
	# - container_command: Command ran by the container (example: "echo 1")
	# - container_id: Id of the container
	#
	# performance_tags: ["container_name", "image_name", "image_tag", "docker_image"]

	# Tags for containers count metrics.
	# Available: ["image_name", "image_tag", "docker_image", "container_command"]
	#
	# container_tags: ["image_name", "image_tag", "docker_image"]

	# Option to tag docker metrics with container label names listed.
	# Takes precedence over docker_labels_as_tags for docker metrics.
	# Only use if you want different labels tagged for autodiscovery and docker_daemon metrics.
	# Default to None
	# Example:
	# collect_labels_as_tags: ["com.docker.compose.service", "com.docker.compose.project"]
	# List of docker event attributes to add as tags of the datadog events
	# Defaults to None.
	#
	# event_attributes_as_tags: ["signal"]

	## Rate Filtering
	##

	# Allows ad-hoc spike filtering if the system reports incorrect metrics.
	# This will drop points if the computed rate is higher than the cap value
	# capped_metrics:
	# docker.cpu.user: 1000
	# docker.cpu.system: 100