sourabh-agrawal/elastalert-kafka-cpu-critical.yml

## elastalert-kafka-cpu-critical.yml
index: metricbeat-live*
description: "This alert is fired when avg normalized cpu over past 15 minutes has crossed 90% mark"
name: central-kafka-cpu-critical
type: metric_aggregation
alert: slack

# Query elasticsearch every 1hr
run_every:
  hours: 1
# Poll last 15minutes data
buffer_time:
  minutes: 15
# Realert after 1hr
realert:
  hours: 1

doc_type: _doc
# Here tags_hash.id is BrokerId. e.g (1,2,3)
query_key: tags_hash.id
metric_agg_key: system.cpu.total.norm.pct
metric_agg_type: avg
max_threshold: 0.90
# Usually i am generating events every 20s, so there will be 45 events in 15min. Set bar as 30.
min_doc_count: 30

# metric_aggregation doesn't provide access to keys other then query_key and
# aggregation result, to be used in alert_text. So if we really want to see
# other fields in slack then following is a work around.
# With top_count_keys elastalert will do a term query to elasticsearch to get top
# top_count_number values of each field defined in top_count_keys.
#top_count_keys:
#  - tags_hash.id
#  - tags_hash.application
#  - tags_hash.cluster
#  - host.name

# By default elastalert append ".raw/.keyword" in the fields defined in
# top_count_keys. Disable this dehaviour with raw_count_keys.
#raw_count_keys: false
#top_count_number: 1

filter:
  - query:
      query_string:
        query: "tags_hash.cluster: central AND tags_hash.application: kafka AND metricset.name: cpu"

# Slack alert configuration
slack_webhook_url: "<paste_your_webhook_url_here>"
slack_text_string: ":WARNING::mega: CPU Alert :boom::fire:"

slack_emoji_override: ":robot_face:"
# Available colors: good, warning, danger
slack_msg_color: 'danger'

alert_text_type: alert_text_only
alert_text: "\n
*CPU*\t\t\t\t  :\t{0}\n
*Cluster*\t\t\t:\tCentral\n
*Application*\t:\tKafka\n
*BrokerId*\t\t :\t{1}\n\n
_This alert is fired when avg normalized cpu over past 15 minutes has crossed 90% mark_"

# Calculated aggregated cpu avg is stored in metric_system.cpu.total.norm.pct_avg. We don't need to include these.
alert_text_args: ["metric_system.cpu.total.norm.pct_avg", "tags_hash.id"]
	index: metricbeat-live*
	description: "This alert is fired when avg normalized cpu over past 15 minutes has crossed 90% mark"
	name: central-kafka-cpu-critical
	type: metric_aggregation
	alert: slack

	# Query elasticsearch every 1hr
	run_every:
	hours: 1
	# Poll last 15minutes data
	buffer_time:
	minutes: 15
	# Realert after 1hr
	realert:
	hours: 1

	doc_type: _doc
	# Here tags_hash.id is BrokerId. e.g (1,2,3)
	query_key: tags_hash.id
	metric_agg_key: system.cpu.total.norm.pct
	metric_agg_type: avg
	max_threshold: 0.90
	# Usually i am generating events every 20s, so there will be 45 events in 15min. Set bar as 30.
	min_doc_count: 30

	# metric_aggregation doesn't provide access to keys other then query_key and
	# aggregation result, to be used in alert_text. So if we really want to see
	# other fields in slack then following is a work around.
	# With top_count_keys elastalert will do a term query to elasticsearch to get top
	# top_count_number values of each field defined in top_count_keys.
	#top_count_keys:
	# - tags_hash.id
	# - tags_hash.application
	# - tags_hash.cluster
	# - host.name

	# By default elastalert append ".raw/.keyword" in the fields defined in
	# top_count_keys. Disable this dehaviour with raw_count_keys.
	#raw_count_keys: false
	#top_count_number: 1

	filter:
	- query:
	query_string:
	query: "tags_hash.cluster: central AND tags_hash.application: kafka AND metricset.name: cpu"

	# Slack alert configuration
	slack_webhook_url: "<paste_your_webhook_url_here>"
	slack_text_string: ":WARNING::mega: CPU Alert :boom::fire:"

	slack_emoji_override: ":robot_face:"
	# Available colors: good, warning, danger
	slack_msg_color: 'danger'

	alert_text_type: alert_text_only
	alert_text: "\n
	CPU\t\t\t\t :\t{0}\n
	Cluster\t\t\t:\tCentral\n
	Application\t:\tKafka\n
	BrokerId\t\t :\t{1}\n\n
	_This alert is fired when avg normalized cpu over past 15 minutes has crossed 90% mark_"

	# Calculated aggregated cpu avg is stored in metric_system.cpu.total.norm.pct_avg. We don't need to include these.
	alert_text_args: ["metric_system.cpu.total.norm.pct_avg", "tags_hash.id"]