Watcher Webinar - 2016-07-28 - Example Watches
# This is the sense buffer used in the Getting Started with Watcher Webinar. Follow along with the video here: | |
# https://www.elastic.co/webinars/getting-started-with-alerting-for-elasticsearch | |
# Step 1: Craft a query that can retrieve the data you would like to alert on. | |
# Here, we're grabbing the most recent cluster state, which contains the cluster_state.status | |
GET /.marvel-es-1*/cluster_state/_search | |
{ | |
"query": { | |
"match_all": {} | |
}, | |
"sort": [ | |
{ | |
"timestamp": { | |
"order": "desc" | |
} | |
} | |
], | |
"size": 1 | |
} | |
# Now, Let's create the Watch. Note the 4 top-level objects: Trigger, Input, Condition, and Actions | |
# The Trigger defines when the input should be retrieved. Today, these are schedule-based. | |
# The Input defines what data to gather. This is often an Elasticsearch query, but could be an HTTP input as well | |
# The Condition looks at the data gathered from the Input, and decides whether the actions should be fired. | |
# The Actions are a set of actions that should be run when the condition is met. We support Logging, Email, Slack, Hipchat, Pagerduty, and Webhook actions. | |
# | |
# Note that the here, the Input is the same as the query above. | |
PUT _watcher/watch/cluster_health | |
{ | |
"trigger": { | |
"schedule": { | |
"interval": "10s" | |
} | |
}, | |
"input": { | |
"search": { | |
"request": { | |
"indices": ".marvel-es-1*", | |
"types": "cluster_state", | |
"body": { | |
"query": { | |
"match_all": {} | |
}, | |
"sort": [ | |
{ | |
"timestamp": { | |
"order": "desc" | |
} | |
} | |
], | |
"size": 1 | |
} | |
} | |
} | |
}, | |
"condition": { | |
"compare": { | |
"ctx.payload.hits.hits.0._source.cluster_state.status": { | |
"eq": "yellow" | |
} | |
} | |
}, | |
"actions": { | |
"logging": { | |
"logging": { | |
"text": "Cluster State is Yellow!" | |
} | |
} | |
} | |
} | |
# Once we create the watch, we can start looking at the watch history. Here, we retrieve the | |
# 1 most recent history entry. The most interesting section is "results" as this section | |
# contains the results of the input (this is the data available to the watch condition, the | |
# condition output, and information about the success/failure of the actions) | |
GET .watch_history*/_search | |
{ | |
"query": { | |
"query_string": { | |
"query": "watch_id: cluster_health" | |
} | |
}, | |
"size": 1, | |
"sort": [ | |
{ | |
"result.execution_time": { | |
"order": "desc" | |
} | |
} | |
] | |
} | |
# The watch history entries have a lot of information. You can filter it down by adding this: | |
# ?filter_path=hits.hits._source.result.condition,hits.hits._source.result.execution_time | |
# Now, let's make the cluster status yellow. To do this, let's create an index, and then | |
# request more replicas than we have nodes. The cluster won't be able to make the requested | |
# number of copies, so the index will have unassigned replicas, which puts the cluster in a yellow state. | |
POST test/test/1 | |
{ | |
"foo": "bar baz" | |
} | |
PUT /test/_settings | |
{ | |
"index" : { | |
"number_of_replicas" : 10 | |
} | |
} | |
# Here, re-check the watch history, and you'll see that the condition is being met. When you're | |
# done inspecting the watch history, you can delete this test index, and the cluster will be | |
# green again (assuming it was green before). | |
DELETE test | |
# For our second example, let's use topbeat (note: topbeat will become metricbeat in 5.0). | |
# Prerequisite: install Topbeat on at least one machine. In this example, we want to be notified | |
# when a specific process is running during business hours - the gaming engine Steam. | |
# First, create a query that will pull back records that show when the "steam_osx" process was | |
# running within the last 5 minutes | |
GET /topbeat-*/process/_search | |
{ | |
"query": { | |
"bool": { | |
"must": [ | |
{ | |
"query_string": { | |
"query": "proc.name:steam_osx" | |
} | |
}, | |
{ | |
"range": { | |
"@timestamp": { | |
"gte": "now-5m", | |
"lte": "now" | |
} | |
} | |
} | |
] | |
} | |
} | |
} | |
# Now, let's use this query to create a watch! | |
# Trigger: Run every 10s | |
# Input: The query we defined above. | |
# Condition: Use a script condition to check if the number of results > 0 (have we seen the "steam_osx" | |
# process running with the last 5 minutes?), and the current time is during working hours (8am - 6pm GMT). | |
# Action: Post a message to slack. Note that a slack incoming webhook was configured in the | |
# elasticsearch.yml file. The action contains a throttle_period, so it won't flood the slack channel | |
# with messages! | |
PUT _watcher/watch/no_mo_steam/ | |
{ | |
"trigger": { | |
"schedule": { | |
"interval": "10s" | |
} | |
}, | |
"input": { | |
"search": { | |
"request": { | |
"indices": "topbeat-*", | |
"types": "process", | |
"body": { | |
"query": { | |
"bool": { | |
"must": [ | |
{ | |
"query_string": { | |
"query": "proc.name:steam_osx" | |
} | |
}, | |
{ | |
"range": { | |
"@timestamp": { | |
"gte": "now-5m", | |
"lte": "now" | |
} | |
} | |
} | |
] | |
} | |
} | |
} | |
} | |
} | |
}, | |
"condition": { | |
"script": { | |
"inline": "def hour = ctx.execution_time.hourOfDay().get(); if ( hour >= 8 && hour <= 18 && ctx.payload.hits.total > 0) return true; return false;" | |
} | |
}, | |
"actions": { | |
"notify-slack": { | |
"throttle_period": "15m", | |
"slack": { | |
"message": { | |
"to": [ | |
"@steve" | |
], | |
"text": ":charmander: No gaming during work hours! Put your :gameface: on and get back to work! " | |
} | |
} | |
} | |
} | |
} | |
# Now we can look at the watch history to see the result status of the watch we created. Note the condition, | |
# and whether it's in state "met" and the status of the actions. | |
GET .watch_history*/_search | |
{ | |
"query": { | |
"query_string": { | |
"query": "watch_id: no_mo_steam" | |
} | |
}, | |
"size": 3, | |
"sort": [ | |
{ | |
"result.execution_time": { | |
"order": "desc" | |
} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment