Skip to content

Instantly share code, notes, and snippets.

@asquelt
Created January 30, 2014 14:11
Show Gist options
  • Save asquelt/8709174 to your computer and use it in GitHub Desktop.
Save asquelt/8709174 to your computer and use it in GitHub Desktop.
# puppet managed file, for more info 'puppet-find-resources $filename'
# BEFORE YOU MAKE ANY CHANGES, READ https://stonka.non.3dart.com/wiki/wiki/Puppet#Zarz.C4.85dzanie_konfiguracjami
# Hostname where NSCA (Nagios Service Check Adaptor) reports should be sent
$nsca_host="nsca"
require 'puppet/reports'
require 'open3'
Puppet::Reports.register_report(:nsca) do
desc "Sends reports summary to Nagios NSCA (Nagios Service Check Adaptor) and syslog."
# http://projects.puppetlabs.com/projects/1/wiki/Reports_And_Reporting
# http://projects.puppetlabs.com/projects/puppet/wiki/Report_Format_2
# http://projects.puppetlabs.com/projects/puppet/wiki/Report_Format_1
def process
service = "MANIFEST"
status = 3 # better be safe than sorry
status_str = nil
hostname = self.host.gsub(/\..*/,"")
manifest = nil
# 2.6+ only
if self.respond_to?('configuration_version') and self.configuration_version
manifest = self.configuration_version
else
# 0.25 only
self.logs.each do |log|
if !manifest and log.message =~ /Applying.configuration.version.'(.*)'/
manifest = $1
elsif !manifest and log.respond_to?('version') and log.version
# :version method has been removed from Puppet::Util::Log class in 2.6
manifest = log.version
end
end
end
noops = 0
changes = 0
failures = 0
resources = 0
ignores = 0
time = 0
self.metrics.each do |name, metric|
if metric.name == 'resources'
metric.values.each do |name, label, value|
if name.to_s == 'failed'
failures = value
elsif name.to_s == 'total'
resources = value
end
end
elsif metric.name == 'events'
metric.values.each do |name, label, value|
if name.to_s == 'noop'
noops = value
end
end
elsif metric.name == 'time'
metric.values.each do |name, label, value|
if name.to_s == 'total'
time = value.to_i
end
end
elsif metric.name == 'changes'
metric.values.each do |name, label, value|
changes = value
end
end
end
self.logs.each do |log|
if log.tagged?('x-dont-count-as-change')
ignores += 1
end
end
if ignores <= changes
changes = changes - ignores
end
if ignores <= noops
noops = noops - ignores
end
if ! manifest
manifest = "unknown"
end
noauto = nil
noauto_source = nil
if self.respond_to?('resource_statuses') and self.resource_statuses.respond_to?('each')
# unwrap puppetnoauto status from reports
self.resource_statuses.each do |name,resource|
resource.tags.each do |tag|
if tag == 'nsca::puppetnoauto'
noauto = 1
noauto_source = 'report'
break
end
end
break if noauto
end
else
# unwrap puppetnoauto status from facts - this is expensive
facts_file = File.join(Puppet[:yamldir], 'facts', self.host + '.yaml')
if !noauto and File.exists?(facts_file) and ( changes > 0 or noops > 0 )
facts = YAML.load_file(facts_file)
facts.values.each do |name,value|
if name == "puppetnoauto" and value == "true"
noauto = 1
noauto_source = 'facts'
break
end
end
end
end
message = sprintf("%s change%s, %s noop%s, %s failure%s, %s ignore%s, catalog %s%s",
changes == 0 ? 'no' : changes,
changes == 1 ? '' : 's',
noops == 0 ? 'no' : noops,
noops == 1 ? '' : 's',
failures == 0 ? 'no' : failures,
failures == 1 ? '' : 's',
ignores == 0 ? 'no' : ignores,
ignores == 1 ? '' : 's',
manifest,
noauto == 1 ? ', noauto/' << noauto_source : ''
)
if failures > 0
# problems, return CRIT + error logs
status = 2
extmessage = ""
self.logs.each do |log|
if log.level.to_s == 'err'
extmessage += log.source.sub(/\/\/#{self.host}/,'').sub(/^\/+/,'') + " " + log.message.tr('{<','(').tr('}>',')')[0..80] + "; "
end
end
message = extmessage + message
elsif noauto and ( changes > 0 or noops > 0 )
# noauto, return UNKN + change logs
status = 3
status_str = sprintf("%s PENDING NOAUTO - do code review and apply/revoke changes ie. 'puppetd --reviewed --test'", changes+noops)
extmessage = ""
self.logs.each do |log|
if log.source !~ /Puppet$/ and log.level.to_s != 'info'
extmessage += log.source.sub(/\/\/#{self.host}/,'').sub(/^\/+/,'') + " "
end
end
message = extmessage + message
elsif resources == 0
# no resources? suspicious, return UNKN
status = 3
message = sprintf("No resources - %s", manifest =='unknown' ? "Couldn't get catalog" : "Node is not set, catalog #{manifest}")
extmessage = ""
self.logs.each do |log|
if log.level.to_s == 'err'
# if prerun_command failed on excessive load, return UNKN (notifications not sent from nagios)
if log.message =~ /prerun_command:.*Machine is overloaded/m
status = 3
# if catalog compilation has failed or prerun_command failed, return WARN
elsif log.message =~ /Error \d+ on SERVER|prerun_command/
status = 1
end
extmessage += log.source.sub(/\/\/#{self.host}/,'').sub(/^\/+/,'') + " " + log.message.tr('{<','(').tr('}>',')').gsub(/\n/,'') + "; "
end
end
message = extmessage + message
elsif changes == 0 and noops == 0 and failures ==0
# all perfect, return OK
status = 0
elsif failures == 0
# all good, return OK
status = 0
else
# shouldn't be here, just return UNKN
end
if time > 0
message = sprintf("%s (%s)",message,Time.at(time).gmtime.strftime('%M:%S'))
end
if ! status_str
status_str = case status
when 0 then sprintf("%s OK",resources)
when 1 then sprintf("%s WARNING",failures)
when 2 then sprintf("%s CRITICAL",failures)
when 3 then sprintf("%s UNKNOWN",changes+noops)
else "UNDEFINED"
end
end
status_str.sub!(/^0\s+/,'')
line = sprintf("%s:%s:%s:%s: %s", hostname, service, status, status_str, message)
nsca_exe = "/usr/sbin/send_nsca -H #{$nsca_host} -to 5 -d :"
stdin, stdout, stderr = Open3.popen3(nsca_exe)
stdin.puts line + "\n"
stdin.close
nsca_out = stdout.gets.to_s.chomp
nsca_out += stderr.gets.to_s.chomp
stdout.close
stderr.close
nsca_exit = $?.to_i
Puppet.notice "NSCA sent: \"#{line}\" to: \"#{nsca_exe}\" result: \"#{nsca_out}\" exit: #{nsca_exit}"
end
end
@asquelt
Copy link
Author

asquelt commented Jan 30, 2014

nagios MANIFEST passive resource should look alike:

define command {
        command_name                   check_dummy_comment
        command_line                   $USER1$/check_dummy $ARG1$ $ARG2$
}

define service {
        use                            generic-service
        service_description            MANIFEST
        host_name                      foo
        contact_groups                 foo
        check_command                  check_dummy_comment!3!'Not reporting for last 1 hour'
        check_period                   24x7
        normal_check_interval          60
        retry_check_interval           5
        max_check_attempts             2
        notification_period            24x7
        notification_interval          1440
        notification_options           c,r,w # without UNKNOWNs!
        active_checks_enabled          0
        passive_checks_enabled         1
        is_volatile                    0
        check_freshness                1
        freshness_threshold            4500
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment