Skip to content

Instantly share code, notes, and snippets.

@rodjek
Created January 13, 2011 08:40
Show Gist options
  • Save rodjek/777588 to your computer and use it in GitHub Desktop.
Save rodjek/777588 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import sys
import time
import subprocess
import math
import os
import syslog
import signal
LMC = "/sbin/lmc"
SYSLOG_IDENT = "silverline-oom"
class SilverlineUtil:
# Convert an arbitrary size value into bytes.
#
# value - The Integer value to be converted.
# unit - The units that the value is expressed in as a String. Valid
# values are "GB", "MB", "KB" or "B".
#
# Returns the value converted to bytes as an Integer.
def to_bytes(self, val, unit):
return int({
"GB": lambda x: float(x) * (1024 << 20),
"MB": lambda x: float(x) * (1024 << 10),
"KB": lambda x: float(x) * 1024,
"B": lambda x: float(x),
}[unit](val))
def log(self, message, error=False):
if error:
priority = syslog.LOG_ERR
else:
priority = syslog.LOG_NOTICE
syslog.syslog(priority, message)
class SilverlineProcess(SilverlineUtil):
# Instantiate a new SilverlineProcess object.
#
# data - An Array of four Strings which contain the output of
# `lmc -xm -stm <tag>` for a single process
#
# Returns a SilverlineProcess instance.
def __init__(self, data):
self.pid = None
self.name = None
self.mem_total = None
self.mem_rss = None
self.mem_shared = None
self.mem_code = None
self.__parse(data)
# Retrieve the command line for the process from /proc.
#
# Returns the command line as a String.
def __get_cmdline(self):
fd = open("/proc/%s/cmdline" % self.pid, 'r')
data = fd.read()
fd.close()
return data.replace('\0', ' ')
# Retrieve the environment for the process from /proc.
#
# Returns the environment as a space delimited String.
def __get_environ(self):
fd = open("/proc/%s/environ" % self.pid, 'r')
data = fd.read()
fd.close()
return data.replace('\0', ' ')
# Retrieve a list of paths to files that the process currently holds
# file descriptors for. Result does not include stdin, stdout or stderr.
#
# Returns an Array of path Strings.
def __get_open_files(self):
fds = os.listdir("/proc/%s/fd" % self.pid)
return map(lambda r: os.path.realpath("/proc/%s/fd/%s" % (self.pid, r)), fds[3:])
# Parse the output from `lmc -xm -stm <tag>` for an individual process.
#
# data - An Array of four Strings containing the process information from
# Silverline.
#
# Returns nothing.
def __parse(self, data):
data = map(lambda r: r.strip(), data)
proc_line = data[0].split()
xm_line = data[3].split()
self.pid = proc_line[1]
self.name = proc_line[2]
self.mem_total = self.to_bytes(xm_line[2][1:], xm_line[3][:-1])
self.mem_rss = self.to_bytes(xm_line[4], xm_line[5][:-1])
self.mem_shared = self.to_bytes(xm_line[6], xm_line[7][:-1])
self.mem_code = self.to_bytes(xm_line[8], xm_line[9][:-1])
# Kill the process described by this object.
#
# force - A Boolean controlling the signal that is sent to the process.
# SIGKILL if True, SIGTERM if False.
#
# Returns nothing.
def kill(self, force=False):
try:
environ = self.__get_environ()
cmdline = self.__get_cmdline()
files = self.__get_open_files()
if force:
os.kill(int(self.pid), signal.SIGKILL)
self.log("[%s] Signalling %s with SIGKILL" % (self.pid, self.name))
else:
os.kill(int(self.pid), signal.SIGTERM)
self.log("[%s] Signalling %s with SIGTERM" % (self.pid, self.name))
self.log("[%s] %s" % (self.pid, cmdline))
self.log("[%s] %s" % (self.pid, environ))
for file in files:
self.log("[%s] %s" % (self.pid, file))
except:
# Process has already died of natural causes, do nothing
pass
# Check if the process is running.
#
# Returns a Boolean which is True if the process is running.
def is_alive(self):
return os.path.exists("/proc/%s" % self.pid)
class SilverlineOOMKiller(SilverlineUtil):
# Instantiate a new SilverlineOOMKiller object.
#
# argv[1] - The hostname String of the server.
# argv[2] - The current UNIX timestamp as a String.
# argv[3] - The String Silverline tag name.
# argv[4] - The String name of the monitor that triggered the alert.
# argv[5] - The String value of the trigger threshold.
# argv[6] - The String value of the monitor at the time of the alert.
# argv[7] - The String action of the script ('trigger' or 'cancel').
# argv[8] - The String value of the cancel threshold.
#
# Returns a SilverlineOOMKiller instance.
def __init__(self, argv):
self.hostname = argv[1]
self.timestamp = time.localtime(float(argv[2]))
self.tag = argv[3]
self.monitor = argv[4]
self.trigger_threshold = argv[5]
self.value = argv[6]
self.action = argv[7]
self.cancel_threshold = self.to_bytes(argv[8][:-2], "MB")
syslog.openlog(SYSLOG_IDENT)
# Retrieve a list of potentially killable processes and kill as many of
# the largest processes as required until the total RSS of all processes
# on the system is less than the cancel threshold value.
#
# Returns nothing.
def run(self):
procs = sorted(self.procs(), key = lambda r: r.mem_rss)
procs.reverse()
for proc in procs:
if self.system_rss() > self.cancel_threshold:
proc.kill()
time.sleep(1)
if proc.is_alive():
proc.kill(force=True)
time.sleep(1)
time.sleep(2)
else:
sys.exit(0)
# Retrieve a list of processes that match the Silverline tag name
#
# Returns an Array of instantiated SilverlineProcess objects.
def procs(self):
p = subprocess.Popen([LMC, "-xm", "-stm", self.tag], stdout=subprocess.PIPE)
data = p.communicate()[0]
if p.returncode == 0:
data = data.splitlines()[1:]
split_array = lambda v, l: [v[i*l:(i+1)*l] for i in range(int(math.ceil(len(v)/float(l))))]
data = split_array(data, 4)
return map(lambda r: SilverlineProcess(r), data)
else:
self.log("%s -xm -stm %s returned %s. Bailing!" % (LMC, self.tag, p.returncode), error=True)
sys.exit(1)
# Retrieve the combined Resident Set Size (RSS) value for all processes
# on the server in bytes.
#
# Returns an Integer RSS value.
def system_rss(self):
p = subprocess.Popen([LMC, "-xm", "--show-load"],
stdout=subprocess.PIPE)
data = p.communicate()[0]
if p.returncode == 0:
data = data.splitlines()[-1].strip().split()
value = data[4]
unit = data[5][:-1]
return int(self.to_bytes(value, unit))
else:
self.log("%s -xm --show-load returned %s. Bailing!" % (LMC, p.returncode), error=True)
sys.exit(1)
if __name__ == '__main__':
sys.exit(SilverlineOOMKiller(sys.argv).run())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment