Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save patcon/6126490 to your computer and use it in GitHub Desktop.
Save patcon/6126490 to your computer and use it in GitHub Desktop.
#!/usr/bin/env ruby
#
# this is a special meta-check. It runs ping checks against all hosts in
# the /endpoints API and sends individual results directly to sensu-client via
# the udp/3030 client socket. this is different from the normal sensu check model
# where individual scripts run and their exit status and output is used to create
# a single event.
#
# the reason for this check is to be able to dynamically ping a list of hosts
# without the race conditions and timing issues involved with creating individual
# sensu check definitions using chef.
#
require 'rubygems'
require 'json'
require 'popen4'
require 'forkmanager' # gem install parallel-forkmanager
require 'rest-client'
require 'sensu-plugin/check/cli'
class PantheonCheckPingEndpoints < Sensu::Plugin::Check::CLI
# option :host, :short => '-h HOST', :long => "--host HOST", :required => true
option :critical_rtt, :short => '-c MS', :long => "--critical-rtt MS", :default => nil, :proc => Proc.new { |c| c.to_f }
option :warning_rtt, :short => '-w MS', :long => "--warning-rtt MS", :default => nil, :proc => Proc.new { |w| w.to_f }
option :critical_loss, :long => "--critical-loss COUNT", :default => 0, :proc => Proc.new { |c| c.to_f }
option :warning_loss, :long => "--warning-loss COUNT", :default => 0, :proc => Proc.new { |w| w.to_f }
option :options, :short => '-f OPTIONS', :long => "--fping-args OPTIONS", :default => nil
option :verbose, :short => '-v', :long => "--verbose", :boolean => true, :default => false
option :handler, :short => '-l HANDLER', :long => '--handler HANDLER', :default => 'default'
option :zone, :short => '-z ZONE', :long => '--zone ZONE', :required => true
option :procs, :short => '-p NUM_PROCS', :long => '--procs NUM_PROCS', :default => 50, :proc => Proc.new { |p| p.to_i }
# pantheon api
option :api, :long => '--api API_URL', :default => 'https://redacted:443'
option :timeout, :long => '--api-timeout SECONDS', :default => 30
option :client_cert, :long => '--client-cert FILE', :default => 'cert.pem'
option :ca_file, :long => '--ca-file FILE', :default => 'ca.pem'
def pantheon_api(resource, jsonify=true)
begin
request = RestClient::Resource.new(config[:api] + resource, {
:timeout => config[:timeout],
:ssl_client_cert => OpenSSL::X509::Certificate.new(File.read(config[:client_cert])),
:ssl_client_key => OpenSSL::PKey::RSA.new(File.read(config[:client_cert])),
:ssl_ca_file => config[:ca_file],
:verify_ssl => OpenSSL::SSL::VERIFY_NONE
})
if jsonify
JSON.parse(request.get, :symbolize_names => true)
else
request.get
end
rescue Errno::ECONNREFUSED
warning "Connection refused"
rescue RestClient::RequestFailed
warning "Request failed"
rescue RestClient::RequestTimeout
warning "Connection timed out"
rescue RestClient::Unauthorized
warning "Missing or incorrect Pantheon API credentials"
rescue JSON::ParserError
warning "Pantheon API returned invalid JSON"
end
end
def endpoints
pantheon_api('/endpoints?extended=0&source=check_ping')
end
def sensu_client_socket(msg)
u = UDPSocket.new
u.send(msg + "\n", 0, '127.0.0.1', 3030)
end
def send_ok(check_name, msg)
d = { 'name' => check_name, 'status' => 0, 'output' => 'OK: ' + msg, 'handler' => config[:handler] }
sensu_client_socket d.to_json
end
def send_warning(check_name, msg)
d = { 'name' => check_name, 'status' => 1, 'output' => 'WARNING: ' + msg, 'handler' => config[:handler] }
sensu_client_socket d.to_json
end
def send_critical(check_name, msg)
d = { 'name' => check_name, 'status' => 2, 'output' => 'CRITICAL: ' + msg, 'handler' => config[:handler] }
sensu_client_socket d.to_json
end
def run_fping(host)
cmd = "fping -s #{host} #{config[:options]}"
puts "Command:\n#{cmd}" if config[:verbose]
stats = nil
errors = nil
result = POpen4::popen4(cmd) do |stdin, stdout, stderr, pid|
stats = stdout.read
begin
errors = stderr.read
rescue
# stderr is not always open for reading.
end
end
exit_status = result ? result.exitstatus : nil
puts "Output:\n#{stats}" if config[:verbose]
puts "Exit Status:\n#{exit_status}" if config[:verbose]
return [cmd, exit_status, stats, errors]
end
def get_max_rtt(stats)
stats.match('(\d+[\.\d+]*) ms \(max round trip time\)')[1].to_f
end
def get_lost_packet_count(stats)
sent = stats.match('\d+ ICMP Echos sent')[1].to_i
received = stats.match('\d+ ICMP Echos sent')[1].to_i
sent - received
end
def ping_host(check_name, hostname, host)
cmd, exit_status, stats, errors = run_fping(host)
puts "results from #{host} #{hostname}: #{exit_status}, #{stats}" if config[:verbose]
case exit_status
when 0
begin
max_rtt = get_max_rtt(stats)
lost_packets = get_lost_packet_count(stats)
if config[:critical_rtt] && max_rtt > config[:critical_rtt]
send_critical check_name, "Host '#{host}' reached in #{max_rtt} ms, which is greater than specified RTT of #{config[:critical_rtt]} ms"
elsif config[:warning_rtt] && max_rtt > config[:warning_rtt]
warning "Host '#{host}' reached in #{max_rtt} ms, which is greater than specified RTT of #{config[:warning_rtt]} ms"
elsif config[:critical_loss] && lost_packets > config[:critical_loss]
send_critical check_name, "Host '#{host}' dropped #{lost_packets}, which is greater than allowed loss of #{config[:critical_loss]} packet"
elsif config[:warning_loss] && lost_packets > config[:warning_loss]
send_warning check_name, "Host '#{host}' dropped #{lost_packets}, which is greater than allowed loss of #{config[:warning_loss]} packet"
else
send_ok check_name, "Host '#{host}' reached in #{max_rtt} ms dropping #{lost_packets} packets"
end
rescue
send_critical check_name, "Error extracting results: [#{cmd}, #{exit_status}, #{stats}, #{errors}]"
end
when 1
send_critical check_name, "Host '#{host}' is unreachable"
when 2
send_warning check_name, "Invalid IP address: #{host}"
when 3
send_warning check_name, "Invalid fping command: #{cmd}"
when 4
send_warning check_name, "Fping system call error: #{cmd}"
when nil
send_warning check_name, "Cannot locate 'fping', please add to your system path."
end
end
# this is the main method executed in the child processes
def process_endpoint(uuid, meta)
puts "in child process: pid: #{$$}, endpoint: #{uuid}" if config[:verbose]
if meta[:host].nil? or meta[:hostname].nil?
puts "skipping endpoint #{uuid}, missing 'host' or 'hostname' attributes."
return
end
public_ip_check_name = "#{meta[:hostname]}_ping_check"
private_ip_check_name = "#{meta[:hostname]}_private_ip_ping_check"
if meta[:pool] == 'down'
# endpoint is marked down, cleanup any open alerts in sensu by sending an 'OK' event
send_ok public_ip_check_name, "host is marked down. no ping necessary."
send_ok private_ip_check_name, "host is marked down. no ping necessary."
else
ping_host public_ip_check_name, meta[:hostname], meta[:host]
# only check private_ip if the endpoint is in the same zone specified by the '-z' arg
if config[:zone] == meta[:zone]
ping_host private_ip_check_name, meta[:hostname], meta[:private_ip]
end
end
end
def run
pm = Parallel::ForkManager.new(config[:procs])
endpoints.each do |uuid, meta|
pm.start(uuid) and next # block until new process slot is available
process_endpoint(uuid, meta)
pm.finish(0)
end
pm.wait_all_children
ok "Finished ping checks."
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment