Created
August 3, 2016 17:32
-
-
Save skearns64/773dfd64c51d3007baf489be83549e0c to your computer and use it in GitHub Desktop.
Watcher Webinar - 2016-07-28 - Example Watches
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This is the sense buffer used in the Getting Started with Watcher Webinar. Follow along with the video here: | |
# https://www.elastic.co/webinars/getting-started-with-alerting-for-elasticsearch | |
# Step 1: Craft a query that can retrieve the data you would like to alert on. | |
# Here, we're grabbing the most recent cluster state, which contains the cluster_state.status | |
GET /.marvel-es-1*/cluster_state/_search | |
{ | |
"query": { | |
"match_all": {} | |
}, | |
"sort": [ | |
{ | |
"timestamp": { | |
"order": "desc" | |
} | |
} | |
], | |
"size": 1 | |
} | |
# Now, Let's create the Watch. Note the 4 top-level objects: Trigger, Input, Condition, and Actions | |
# The Trigger defines when the input should be retrieved. Today, these are schedule-based. | |
# The Input defines what data to gather. This is often an Elasticsearch query, but could be an HTTP input as well | |
# The Condition looks at the data gathered from the Input, and decides whether the actions should be fired. | |
# The Actions are a set of actions that should be run when the condition is met. We support Logging, Email, Slack, Hipchat, Pagerduty, and Webhook actions. | |
# | |
# Note that the here, the Input is the same as the query above. | |
PUT _watcher/watch/cluster_health | |
{ | |
"trigger": { | |
"schedule": { | |
"interval": "10s" | |
} | |
}, | |
"input": { | |
"search": { | |
"request": { | |
"indices": ".marvel-es-1*", | |
"types": "cluster_state", | |
"body": { | |
"query": { | |
"match_all": {} | |
}, | |
"sort": [ | |
{ | |
"timestamp": { | |
"order": "desc" | |
} | |
} | |
], | |
"size": 1 | |
} | |
} | |
} | |
}, | |
"condition": { | |
"compare": { | |
"ctx.payload.hits.hits.0._source.cluster_state.status": { | |
"eq": "yellow" | |
} | |
} | |
}, | |
"actions": { | |
"logging": { | |
"logging": { | |
"text": "Cluster State is Yellow!" | |
} | |
} | |
} | |
} | |
# Once we create the watch, we can start looking at the watch history. Here, we retrieve the | |
# 1 most recent history entry. The most interesting section is "results" as this section | |
# contains the results of the input (this is the data available to the watch condition, the | |
# condition output, and information about the success/failure of the actions) | |
GET .watch_history*/_search | |
{ | |
"query": { | |
"query_string": { | |
"query": "watch_id: cluster_health" | |
} | |
}, | |
"size": 1, | |
"sort": [ | |
{ | |
"result.execution_time": { | |
"order": "desc" | |
} | |
} | |
] | |
} | |
# The watch history entries have a lot of information. You can filter it down by adding this: | |
# ?filter_path=hits.hits._source.result.condition,hits.hits._source.result.execution_time | |
# Now, let's make the cluster status yellow. To do this, let's create an index, and then | |
# request more replicas than we have nodes. The cluster won't be able to make the requested | |
# number of copies, so the index will have unassigned replicas, which puts the cluster in a yellow state. | |
POST test/test/1 | |
{ | |
"foo": "bar baz" | |
} | |
PUT /test/_settings | |
{ | |
"index" : { | |
"number_of_replicas" : 10 | |
} | |
} | |
# Here, re-check the watch history, and you'll see that the condition is being met. When you're | |
# done inspecting the watch history, you can delete this test index, and the cluster will be | |
# green again (assuming it was green before). | |
DELETE test | |
# For our second example, let's use topbeat (note: topbeat will become metricbeat in 5.0). | |
# Prerequisite: install Topbeat on at least one machine. In this example, we want to be notified | |
# when a specific process is running during business hours - the gaming engine Steam. | |
# First, create a query that will pull back records that show when the "steam_osx" process was | |
# running within the last 5 minutes | |
GET /topbeat-*/process/_search | |
{ | |
"query": { | |
"bool": { | |
"must": [ | |
{ | |
"query_string": { | |
"query": "proc.name:steam_osx" | |
} | |
}, | |
{ | |
"range": { | |
"@timestamp": { | |
"gte": "now-5m", | |
"lte": "now" | |
} | |
} | |
} | |
] | |
} | |
} | |
} | |
# Now, let's use this query to create a watch! | |
# Trigger: Run every 10s | |
# Input: The query we defined above. | |
# Condition: Use a script condition to check if the number of results > 0 (have we seen the "steam_osx" | |
# process running with the last 5 minutes?), and the current time is during working hours (8am - 6pm GMT). | |
# Action: Post a message to slack. Note that a slack incoming webhook was configured in the | |
# elasticsearch.yml file. The action contains a throttle_period, so it won't flood the slack channel | |
# with messages! | |
PUT _watcher/watch/no_mo_steam/ | |
{ | |
"trigger": { | |
"schedule": { | |
"interval": "10s" | |
} | |
}, | |
"input": { | |
"search": { | |
"request": { | |
"indices": "topbeat-*", | |
"types": "process", | |
"body": { | |
"query": { | |
"bool": { | |
"must": [ | |
{ | |
"query_string": { | |
"query": "proc.name:steam_osx" | |
} | |
}, | |
{ | |
"range": { | |
"@timestamp": { | |
"gte": "now-5m", | |
"lte": "now" | |
} | |
} | |
} | |
] | |
} | |
} | |
} | |
} | |
} | |
}, | |
"condition": { | |
"script": { | |
"inline": "def hour = ctx.execution_time.hourOfDay().get(); if ( hour >= 8 && hour <= 18 && ctx.payload.hits.total > 0) return true; return false;" | |
} | |
}, | |
"actions": { | |
"notify-slack": { | |
"throttle_period": "15m", | |
"slack": { | |
"message": { | |
"to": [ | |
"@steve" | |
], | |
"text": ":charmander: No gaming during work hours! Put your :gameface: on and get back to work! " | |
} | |
} | |
} | |
} | |
} | |
# Now we can look at the watch history to see the result status of the watch we created. Note the condition, | |
# and whether it's in state "met" and the status of the actions. | |
GET .watch_history*/_search | |
{ | |
"query": { | |
"query_string": { | |
"query": "watch_id: no_mo_steam" | |
} | |
}, | |
"size": 3, | |
"sort": [ | |
{ | |
"result.execution_time": { | |
"order": "desc" | |
} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment