Skip to content

Instantly share code, notes, and snippets.

@sonnysideup
Created November 27, 2017 19:27
Show Gist options
  • Save sonnysideup/7e35b40d534100e346559e4687913eb7 to your computer and use it in GitHub Desktop.
Save sonnysideup/7e35b40d534100e346559e4687913eb7 to your computer and use it in GitHub Desktop.
Datadog Kafka integration configuration using Kubernetes Pod Annotations
# You can apply these annotations to your Kafka Pod/Deployment/StatefulSet in order to correctly
# enable the Datadog core monitoring integration for Kafka using Agent v5. When Agent v6 is released,
# you can use `"collect_default_metrics": true to apply the same config.
#
# NOTE: Make sure you replace "mykafka" (see below) with the container name in your PodSpec.
#
annotations:
service-discovery.datadoghq.com/mykafka.check_names: '["kafka"]'
service-discovery.datadoghq.com/mykafka.init_configs: '[{"is_jmx":true,"collect_default_metrics":true,"conf":[{"include":{"domain":"kafka.producer","bean_regex":"kafka\\.producer:type=ProducerRequestMetrics,name=ProducerRequestRateAndTimeMs,clientId=.*","attribute":{"Count":{"metric_type":"rate","alias":"kafka.producer.request_rate"}}}},{"include":{"domain":"kafka.producer","bean_regex":"kafka\\.producer:type=ProducerRequestMetrics,name=ProducerRequestRateAndTimeMs,clientId=.*","attribute":{"Mean":{"metric_type":"gauge","alias":"kafka.producer.request_latency_avg"}}}},{"include":{"domain":"kafka.producer","bean_regex":"kafka\\.producer:type=ProducerTopicMetrics,name=BytesPerSec,clientId=.*","attribute":{"Count":{"metric_type":"rate","alias":"kafka.producer.bytes_out"}}}},{"include":{"domain":"kafka.producer","bean_regex":"kafka\\.producer:type=ProducerTopicMetrics,name=MessagesPerSec,clientId=.*","attribute":{"Count":{"metric_type":"rate","alias":"kafka.producer.message_rate"}}}},{"include":{"domain":"kafka.producer","bean_regex":"kafka\\.producer:type=producer-metrics,client-id=.*","attribute":{"response-rate":{"metric_type":"gauge","alias":"kafka.producer.response_rate"}}}},{"include":{"domain":"kafka.producer","bean_regex":"kafka\\.producer:type=producer-metrics,client-id=.*","attribute":{"request-rate":{"metric_type":"gauge","alias":"kafka.producer.request_rate"}}}},{"include":{"domain":"kafka.producer","bean_regex":"kafka\\.producer:type=producer-metrics,client-id=.*","attribute":{"request-latency-avg":{"metric_type":"gauge","alias":"kafka.producer.request_latency_avg"}}}},{"include":{"domain":"kafka.producer","bean_regex":"kafka\\.producer:type=producer-metrics,client-id=.*","attribute":{"outgoing-byte-rate":{"metric_type":"gauge","alias":"kafka.producer.bytes_out"}}}},{"include":{"domain":"kafka.producer","bean_regex":"kafka\\.producer:type=producer-metrics,client-id=.*","attribute":{"io-wait-time-ns-avg":{"metric_type":"gauge","alias":"kafka.producer.io_wait"}}}},{"include":{"domain":"kafka.consumer","bean_regex":"kafka\\.consumer:type=ConsumerFetcherManager,name=MaxLag,clientId=.*","attribute":{"Value":{"metric_type":"gauge","alias":"kafka.consumer.max_lag"}}}},{"include":{"domain":"kafka.consumer","bean_regex":"kafka\\.consumer:type=ConsumerFetcherManager,name=MinFetchRate,clientId=.*","attribute":{"Value":{"metric_type":"gauge","alias":"kafka.consumer.fetch_rate"}}}},{"include":{"domain":"kafka.consumer","bean_regex":"kafka\\.consumer:type=ConsumerTopicMetrics,name=BytesPerSec,clientId=.*","attribute":{"Count":{"metric_type":"rate","alias":"kafka.consumer.bytes_in"}}}},{"include":{"domain":"kafka.consumer","bean_regex":"kafka\\.consumer:type=ConsumerTopicMetrics,name=MessagesPerSec,clientId=.*","attribute":{"Count":{"metric_type":"rate","alias":"kafka.consumer.messages_in"}}}},{"include":{"domain":"kafka.consumer","bean_regex":"kafka\\.consumer:type=ZookeeperConsumerConnector,name=ZooKeeperCommitsPerSec,clientId=.*","attribute":{"Count":{"metric_type":"rate","alias":"kafka.consumer.zookeeper_commits"}}}},{"include":{"domain":"kafka.consumer","bean_regex":"kafka\\.consumer:type=ZookeeperConsumerConnector,name=KafkaCommitsPerSec,clientId=.*","attribute":{"Count":{"metric_type":"rate","alias":"kafka.consumer.kafka_commits"}}}},{"include":{"domain":"kafka.consumer","bean_regex":"kafka\\.consumer:type=consumer-fetch-manager-metrics,client-id=.*","attribute":{"bytes-consumed-rate":{"metric_type":"gauge","alias":"kafka.consumer.bytes_in"}}}},{"include":{"domain":"kafka.consumer","bean_regex":"kafka\\.consumer:type=consumer-fetch-manager-metrics,client-id=.*","attribute":{"records-consumed-rate":{"metric_type":"gauge","alias":"kafka.consumer.messages_in"}}}},{"include":{"domain":"kafka.server","bean":"kafka.server:type=BrokerTopicMetrics,name=BytesOutPerSec","attribute":{"Count":{"metric_type":"rate","alias":"kafka.net.bytes_out.rate"}}}},{"include":{"domain":"kafka.server","bean":"kafka.server:type=BrokerTopicMetrics,name=BytesInPerSec","attribute":{"Count":{"metric_type":"rate","alias":"kafka.net.bytes_in.rate"}}}},{"include":{"domain":"kafka.server","bean":"kafka.server:type=BrokerTopicMetrics,name=MessagesInPerSec","attribute":{"Count":{"metric_type":"rate","alias":"kafka.messages_in.rate"}}}},{"include":{"domain":"kafka.server","bean":"kafka.server:type=BrokerTopicMetrics,name=BytesRejectedPerSec","attribute":{"Count":{"metric_type":"rate","alias":"kafka.net.bytes_rejected.rate"}}}},{"include":{"domain":"kafka.server","bean":"kafka.server:type=BrokerTopicMetrics,name=FailedFetchRequestsPerSec","attribute":{"Count":{"metric_type":"rate","alias":"kafka.request.fetch.failed.rate"}}}},{"include":{"domain":"kafka.server","bean":"kafka.server:type=BrokerTopicMetrics,name=FailedProduceRequestsPerSec","attribute":{"Count":{"metric_type":"rate","alias":"kafka.request.produce.failed.rate"}}}},{"include":{"domain":"kafka.network","bean":"kafka.network:type=RequestMetrics,name=RequestsPerSec,request=Produce","attribute":{"Count":{"metric_type":"rate","alias":"kafka.request.produce.rate"}}}},{"include":{"domain":"kafka.network","bean":"kafka.network:type=RequestMetrics,name=TotalTimeMs,request=Produce","attribute":{"Mean":{"metric_type":"gauge","alias":"kafka.request.produce.time.avg"},"99thPercentile":{"metric_type":"gauge","alias":"kafka.request.produce.time.99percentile"}}}},{"include":{"domain":"kafka.network","bean":"kafka.network:type=RequestMetrics,name=RequestsPerSec,request=FetchConsumer","attribute":{"Count":{"metric_type":"rate","alias":"kafka.request.fetch_consumer.rate"}}}},{"include":{"domain":"kafka.network","bean":"kafka.network:type=RequestMetrics,name=RequestsPerSec,request=FetchFollower","attribute":{"Count":{"metric_type":"rate","alias":"kafka.request.fetch_follower.rate"}}}},{"include":{"domain":"kafka.network","bean":"kafka.network:type=RequestMetrics,name=TotalTimeMs,request=FetchConsumer","attribute":{"Mean":{"metric_type":"gauge","alias":"kafka.request.fetch_consumer.time.avg"},"99thPercentile":{"metric_type":"gauge","alias":"kafka.request.fetch_consumer.time.99percentile"}}}},{"include":{"domain":"kafka.network","bean":"kafka.network:type=RequestMetrics,name=TotalTimeMs,request=FetchFollower","attribute":{"Mean":{"metric_type":"gauge","alias":"kafka.request.fetch_follower.time.avg"},"99thPercentile":{"metric_type":"gauge","alias":"kafka.request.fetch_follower.time.99percentile"}}}},{"include":{"domain":"kafka.network","bean":"kafka.network:type=RequestMetrics,name=TotalTimeMs,request=UpdateMetadata","attribute":{"Mean":{"metric_type":"gauge","alias":"kafka.request.update_metadata.time.avg"},"99thPercentile":{"metric_type":"gauge","alias":"kafka.request.update_metadata.time.99percentile"}}}},{"include":{"domain":"kafka.network","bean":"kafka.network:type=RequestMetrics,name=TotalTimeMs,request=Metadata","attribute":{"Mean":{"metric_type":"gauge","alias":"kafka.request.metadata.time.avg"},"99thPercentile":{"metric_type":"gauge","alias":"kafka.request.metadata.time.99percentile"}}}},{"include":{"domain":"kafka.network","bean":"kafka.network:type=RequestMetrics,name=TotalTimeMs,request=Offsets","attribute":{"Mean":{"metric_type":"gauge","alias":"kafka.request.offsets.time.avg"},"99thPercentile":{"metric_type":"gauge","alias":"kafka.request.offsets.time.99percentile"}}}},{"include":{"domain":"kafka.server","bean":"kafka.server:type=KafkaRequestHandlerPool,name=RequestHandlerAvgIdlePercent","attribute":{"OneMinuteRate":{"metric_type":"gauge","alias":"kafka.request.handler.avg.idle.pct.rate"}}}},{"include":{"domain":"kafka.server","bean":"kafka.server:type=ProducerRequestPurgatory,name=PurgatorySize","attribute":{"Value":{"metric_type":"gauge","alias":"kafka.request.producer_request_purgatory.size"}}}},{"include":{"domain":"kafka.server","bean":"kafka.server:type=FetchRequestPurgatory,name=PurgatorySize","attribute":{"Value":{"metric_type":"gauge","alias":"kafka.request.fetch_request_purgatory.size"}}}},{"include":{"domain":"kafka.server","bean":"kafka.server:type=ReplicaManager,name=UnderReplicatedPartitions","attribute":{"Value":{"metric_type":"gauge","alias":"kafka.replication.under_replicated_partitions"}}}},{"include":{"domain":"kafka.server","bean":"kafka.server:type=ReplicaManager,name=IsrShrinksPerSec","attribute":{"Count":{"metric_type":"rate","alias":"kafka.replication.isr_shrinks.rate"}}}},{"include":{"domain":"kafka.server","bean":"kafka.server:type=ReplicaManager,name=IsrExpandsPerSec","attribute":{"Count":{"metric_type":"rate","alias":"kafka.replication.isr_expands.rate"}}}},{"include":{"domain":"kafka.controller","bean":"kafka.controller:type=ControllerStats,name=LeaderElectionRateAndTimeMs","attribute":{"Count":{"metric_type":"rate","alias":"kafka.replication.leader_elections.rate"}}}},{"include":{"domain":"kafka.controller","bean":"kafka.controller:type=ControllerStats,name=UncleanLeaderElectionsPerSec","attribute":{"Count":{"metric_type":"rate","alias":"kafka.replication.unclean_leader_elections.rate"}}}},{"include":{"domain":"kafka.controller","bean":"kafka.controller:type=KafkaController,name=OfflinePartitionsCount","attribute":{"Value":{"metric_type":"gauge","alias":"kafka.replication.offline_partitions_count"}}}},{"include":{"domain":"kafka.controller","bean":"kafka.controller:type=KafkaController,name=ActiveControllerCount","attribute":{"Value":{"metric_type":"gauge","alias":"kafka.replication.active_controller_count"}}}},{"include":{"domain":"kafka.server","bean":"kafka.server:type=ReplicaManager,name=PartitionCount","attribute":{"Value":{"metric_type":"gauge","alias":"kafka.replication.partition_count"}}}},{"include":{"domain":"kafka.server","bean":"kafka.server:type=ReplicaManager,name=LeaderCount","attribute":{"Value":{"metric_type":"gauge","alias":"kafka.replication.leader_count"}}}},{"include":{"domain":"kafka.server","bean":"kafka.server:type=ReplicaFetcherManager,name=MaxLag,clientId=Replica","attribute":{"Value":{"metric_type":"gauge","alias":"kafka.replication.max_lag"}}}},{"include":{"domain":"kafka.log","bean":"kafka.log:type=LogFlushStats,name=LogFlushRateAndTimeMs","attribute":{"Count":{"metric_type":"rate","alias":"kafka.log.flush_rate.rate"}}}}]}]'
service-discovery.datadoghq.com/mykafka.instances: '[{"host": "%%host%%", "port": "%%port_1%%", "tags": {"kafka": "broker"}}]'
@nutant-h
Copy link

Did you had to do some other settings apart from this annotations? This does not seem to work out of the box.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment