Skip to content

Instantly share code, notes, and snippets.

@arianvp
Created February 27, 2017 21:22
Show Gist options
  • Save arianvp/2a239fe85c6f0f6081e635a13fe41ad6 to your computer and use it in GitHub Desktop.
Save arianvp/2a239fe85c6f0f6081e635a13fe41ad6 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import argparse
import logging
import os
import re
import subprocess
import sys
import toml
from systemd import journal
logger = logging.getLogger(__name__)
logger.propagate = False
logger.addHandler(journal.JournalHandler())
def read_config(config_dir):
config = {}
for config_file in os.listdir(config_dir):
(name, ext) = os.path.splitext(config_file)
if ext != ".monitor":
continue
with open(config_file, 'r') as f:
monitor_config = toml.loads(f.read())['Monitor']
if "Unit" in monitor_config:
unit = monitor_config["Unit"]
else:
unit = name + ".service"
# Precompile the match regex for sick performance
if 'Regex' not in monitor_config:
raise KeyError('key "Regex" not in: {}'
.format(os.path.join(config_dir, config_file)))
monitor_config['Regex'] = re.compile(monitor_config['Regex'])
config[unit] = monitor_config
return config
def alert(config, entry):
# Interpret placement specifiers
def interpret(arg):
if "%n" in arg:
return arg.replace("%n", entry["_SYSTEMD_UNIT"])
elif "%M" in arg:
return arg.replace("%M", entry["MESSAGE"])
else:
return arg
for alert in config['Alerts']:
# Replace specifiers in the alert
interpreted_alert = []
for arg in alert:
interpreted_alert.append(interpret(arg))
try:
# TODO(arianvp|duijf): sanitize environment?
subprocess.run(interpreted_alert, check=True)
except subprocess.CalledProcessError as e:
logger.error("Alert script failed to execute".format(alert),
extra={"ALERT_CMD": e.cmd, "ALERT_STDERR": e.stderr})
def main():
parser = argparse.ArgumentParser()
parser.add_argument("-f", "--files", help="CSV list of journal files to monitor")
parser.add_argument("-d", "--directory", help="directory of journal files to monitor")
parser.add_argument("-c", "--config-dir",
help="config file directory for journal-monitor (default: /etc/journal-monitor.conf.d/)", # noqa
default="/etc/journal-monitor.conf.d/")
parser.add_argument("--poll-timeout-secs",
help="timeout between journald polls (default: 1)",
default=1)
args = parser.parse_args()
config = read_config(args.config_dir)
reader = journal.Reader(files=args.files, path=args.directory)
for unit in config.keys():
reader.add_match(_SYSTEMD_UNIT=unit)
reader.seek_tail()
while True:
# We poll with a timeout so Python still responds to
# signals and can terminate cleanly.
reader.wait(timeout=args.poll_timeout)
for entry in reader:
unit = entry['_SYSTEMD_UNIT']
if config[unit]['Regex'].match(entry['MESSAGE']):
alert(config[unit], entry)
if __name__ == '__main__':
main()
[Monitor]
Unit="monitor.service"
Regex=".*online"
Alerts=[["/bin/echo", "error in unit", "%n", "%M"]]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment