Skip to content

Instantly share code, notes, and snippets.

@yudhiwidyatama
Created August 8, 2020 05:00
Show Gist options
  • Save yudhiwidyatama/8df476c42206dfac2203f70af3aab939 to your computer and use it in GitHub Desktop.
Save yudhiwidyatama/8df476c42206dfac2203f70af3aab939 to your computer and use it in GitHub Desktop.
check infra nodes for anomaly / spike
require "yaml"
hosts = ["infra1","infra2","infra3"]
WORKDIR="/home/telegram/ALERT_TELEGRAM"
WORKDIR2="/root/ALERT_TELEGRAM"
TELEGRAM_USER="OPENSHIFT_ALERT"
TELEGRAM_BASE2="#{WORKDIR2}/telegram_send.sh #{TELEGRAM_USER}"
hostsFull = []
hosts.each {
| h|
hostsFull.push(h + ".paas.telkom.co.id")
}
hostsFull.each {
| h |
if (!system("timeout 10 ssh -l root -o ConnectTimeout=10 -o BatchMode=yes "+h+" sysctl fs.file-nr > checkinfra."+h))
print "Failure ssh to "+h+"\n"
cmd = "#{TELEGRAM_BASE2} \"AlertSystem/Openshift: unable to contact #{h}\""
print cmd + "\n"
system(cmd)
else
output = File.new("checkinfra."+h).read
cols = output.split()
openfiles = cols[2].to_i
system("timeout 10 ssh -l root -o ConnectTimeout=10 -o BatchMode=yes "+h+" ps --ppid 2 -p 2 --deselect | wc -l > checkinfra."+h)
pscount = File.new("checkinfra."+h).read.to_i
system("timeout 10 ssh -l root -o ConnectTimeout=10 -o BatchMode=yes "+h+" netstat -nA inet | wc -l > checkinfra."+h)
tcpcount = File.new("checkinfra."+h).read.to_i
memfilename = "memfile-" + h
hasAvg = false
if File.exist?(memfilename)
memfile = File.new(memfilename)
mem = YAML.load(memfile)
hasAvg = true
avg = mem["history"].reduce(:+) / mem["history"].size
avgPscount = mem["historypscount"].reduce(:+) / mem["historypscount"].size
avgTcpcount = mem["historytcpcount"].reduce(:+) / mem["historytcpcount"].size
else
mem = { "history" => [], "historypscount" => [], "historytcpcount" => [] }
end
if hasAvg
highLimit = avg * 1.1
highLimitPscount = avgPscount * 1.1
highLimitTcpcount = avgTcpcount * 1.1
if openfiles > highLimit
cmd = "#{TELEGRAM_BASE2} \"AlertSystem/Openshift: open files count on #{h} = #{openfiles} , average #{avg} \""
print cmd + "\n"
system(cmd)
end
if pscount > highLimitPscount
cmd = "#{TELEGRAM_BASE2} \"AlertSystem/Openshift: process count on #{h} = #{pscount} , average #{avgPscount} \""
print cmd + "\n"
system(cmd)
end
if tcpcount > highLimitTcpcount
cmd = "#{TELEGRAM_BASE2} \"AlertSystem/Openshift: tcp socket count on #{h} = #{tcpcount} , average #{avgTcpcount} \""
print cmd + "\n"
system(cmd)
end
end
mem["history"].push(openfiles)
while mem["history"].size > 5 do
x = mem["history"].shift()
end
mem["historypscount"].push(pscount)
while mem["historypscount"].size > 5 do
x = mem["historypscount"].shift()
end
mem["historytcpcount"].push(tcpcount)
while mem["historytcpcount"].size > 5 do
x = mem["historytcpcount"].shift()
end
memfile = File.new(memfilename,"w")
YAML.dump(mem,memfile)
end
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment