-
-
Save wolph/f569ba733499a67f1179be97ff69825b to your computer and use it in GitHub Desktop.
# vim: set ft=dosini: | |
# Supervisord config for the barman exporter | |
[program:barman_exporter] | |
environment=PATH=/usr/local/bin:%(ENV_PATH)s | |
command=/usr/local/bin/env python3 /var/barman/barman_exporter.py | |
user=barman | |
autostart=true | |
stopasgroup=true | |
killasgroup=true | |
autorestart=true | |
startretries=10000 | |
stderr_logfile=/var/log/%(program_name)s.err.log | |
stdout_logfile=/var/log/%(program_name)s.out.log | |
stdout_logfile_maxbytes=200MB | |
stdout_logfile_backups=1 | |
stderr_logfile_maxbytes=200MB | |
stderr_logfile_backups=1 |
#!/usr/bin/env python3.6 | |
import sys | |
import time | |
import contextlib | |
import collections | |
from datetime import datetime | |
import prometheus_client | |
from prometheus_client import core | |
from barman import cli | |
from barman import output | |
from barman import backup | |
from barman.server import CheckOutputStrategy | |
class Output(output.ConsoleOutputWriter): | |
results = collections.defaultdict(dict) | |
def result_check(self, server_name, check, status, hint=None): | |
self.results[check] = dict(status=status, hint=hint) | |
class BarmanCollector: | |
def __init__(self, args): | |
self.args = args | |
self.results = output._writer.results | |
def collect(self): | |
cli.global_config(self.args) | |
servers = cli.get_server_list(self.args) | |
collectors = dict( | |
barman_backups=core.GaugeMetricFamily( | |
'barman_backups', 'total backups available', | |
labels=['server']), | |
barman_last_backup=core.GaugeMetricFamily( | |
'barman_last_backup', 'last backup timestamp', | |
labels=['server']), | |
barman_last_backup_age=core.GaugeMetricFamily( | |
'barman_last_backup_age', 'seconds since last backup', | |
labels=['server']), | |
barman_status=core.GaugeMetricFamily( | |
'barman_status', 'Several barman status checks', | |
labels=['server', 'check']) | |
) | |
for server_name, server in servers.items(): | |
backups = len(server.backup_manager.get_available_backups( | |
status_filter=(backup.BackupInfo.DONE,))) | |
collectors['barman_backups'].add_metric([server_name], backups) | |
last_backup = server.backup_manager.get_last_backup_id() | |
if last_backup: | |
now = datetime.now() | |
last_backup = datetime.strptime(last_backup, '%Y%m%dT%H%M%S') | |
collectors['barman_last_backup'].add_metric( | |
[server_name], time.mktime(last_backup.timetuple())) | |
collectors['barman_last_backup_age'].add_metric( | |
[server_name], (now - last_backup).total_seconds()) | |
with contextlib.closing(server): | |
check_strategy = CheckOutputStrategy() | |
# Check WAL archive | |
server.check_archive(check_strategy) | |
# Postgres configuration is not available on passive nodes | |
if not server.passive_node: | |
server.check_postgres(check_strategy) | |
# Check barman directories from barman configuration | |
server.check_directories(check_strategy) | |
# Check retention policies | |
server.check_retention_policy_settings(check_strategy) | |
# Check for backup validity | |
server.check_backup_validity(check_strategy) | |
# Executes the backup manager set of checks | |
server.backup_manager.check(check_strategy) | |
# Check if the msg_list of the server | |
# contains messages and output eventual failures | |
server.check_configuration(check_strategy) | |
# Executes check() for every archiver, passing | |
# remote status information for efficiency | |
for archiver in server.archivers: | |
archiver.check(check_strategy) | |
# Check archiver errors | |
server.check_archiver_errors(check_strategy) | |
collector = collectors['barman_status'] | |
for name, value in self.results.items(): | |
key = name.replace(' ', '_').replace('-', '_').lower() | |
if value['hint']: | |
continue | |
collector.add_metric([server_name, key], int(value['status'])) | |
for collector in collectors.values(): | |
yield collector | |
if __name__ == '__main__': | |
output.set_output_writer(Output()) | |
class Args: | |
server_name = ['all'] | |
quiet = output._writer | |
debug = output._writer | |
color = 'auto' | |
format = debug | |
args = Args() | |
core.REGISTRY.register(BarmanCollector(args)) | |
# Start up the server to expose the metrics. | |
prometheus_client.start_http_server(8000) | |
# Generate some requests. | |
while True: | |
time.sleep(1) |
Yeah, I initially went the same route but that didn't work too great. It's obvious that Barman was written for a single purpose by someone that is used to writing languages other than Python :)
Your module looks quite nice, I'll probably switch to that one soon. Thanks for packaging it so nicely!
@wolph If you are wondering how to fix your code to not open log files indefinitely you can add:
# [...]
import logging
logging.disable(logging.CRITICAL)
class BarmanCollector:
# [...]
def collect(self):
cli.global_config(self.args)
for handler in logging.root.handlers[:]:
logging.root.removeHandler(handler)
# [...]
My pull request with JSON output formatter has been added to Barman 2.9+ and now we can use it. I store the metrics as textfile which is digested by node-exporter
. It works better that way because executing barman
commands can take a while (even minutes).
Barman exporter here: https://github.com/ahes/prometheus-barman-exporter
You can also install barman-exporter with pip:
pip3 install barman-exporter
Excellent work, that looks great already.
Adding server.backup_manager._backup_cache = None
before you use the backup_manager makes sure that backup_manager obtains any new backups, fixing the exporter not updating after a new backup.
Hi,
Here is my barman-exporter: https://github.com/ahes/prometheus-barman-exporter
It exports similar metrics but I did some naming and convention changes to align with prometheus manual about writing exporters.
I started by writing
JsonOutputWriter
class and using barman's cli.py directly but after two hours I decided to let go. Instead I parse barman cli output. It is way simpler and works just fine.Thank you for your gist. It was a great inspiration.