gillg/rrd_to_openmetrics.py

## rrd_to_openmetrics.py
#!/usr/bin/env python
from __future__ import print_function

'''
Usage:
./rrd_to_openmetrics.py /path/rrd_directory/ > openmetrics.txt
'''

'''
Directory structure :
host1/
  load/
     load.rrd
  cpu-0/
     cpu-idle.rrd
     cpu-interrupt.rrd
     cpu-user.rrd
host2/
  load/
     load.rrd
  cpu-0/
     cpu-idle.rrd
     cpu-interrupt.rrd
     cpu-user.rrd
  fpm-site/
     gauge.rrd
host3/
  load/
     load.rrd
  cpu-0/
     cpu-idle.rrd
     cpu-interrupt.rrd
     cpu-user.rrd
'''

'''
Sample output metric:
collectd_cpu{host="myserver.priv.example.com",cpu="0",dimension="idle",cf="average"} 9.9844166667e+01 1582101000000
collectd_load{host="myserver.priv.example.com",type="longterm",cf="average"} 1.9000000000e-01 1582026420000
collectd_fpm_gaugne{host="myserver.priv.example.com",fpm="site",cf="max"} 9.9844166667e+01 1582026420000

'''

import argparse
import os
import sys
import re
import tempfile

from lxml import etree
import rrdtool

# In my case, rrdtool xport not works correctly.
# Times are "None", and history seems not full...
use_dump = True

def rrd_dump():
      tmp_file = tempfile.NamedTemporaryFile()
      database = tmp_file.name
      rrdtool.dump(os.path.join(root, filename), database)
      parser = etree.XMLParser(remove_comments=False)
      tree = etree.parse(database, parser)

      if debug:
          step = int(tree.xpath("/rrd/step/text()")[0])
          print("File : " + os.path.join(root, filename), file=sys.stderr)
          print("Metric : " + global_metric_name, file=sys.stderr)
          print("CF :", file=sys.stderr)
          print(tree.xpath("/rrd/rra/cf/text()"), file=sys.stderr)
          print("RRA Samplings in seconds :", file=sys.stderr)
          print([step * int(pdp) for pdp in tree.xpath("/rrd/rra/pdp_per_row/text()")], file=sys.stderr)


      # Get all internal datasources (DS)
      datasource_name = [s.strip() for s in tree.xpath("/rrd/ds/name/text()")]
      datasource_type = [s.strip() for s in tree.xpath("/rrd/ds/type/text()")]
      consolidation_functions = list(set(tree.xpath("/rrd/rra/cf/text()")))

      if debug:
          print(str(datasource_name), file=sys.stderr)

      for func in consolidation_functions:
          # Fetch all times comments, implicit for each rows
          times = tree.xpath('/rrd/rra[cf[text()="{cf}"]]/database/comment()'.format(cf=func))
          for i,row in enumerate(tree.xpath('/rrd/rra[cf[text()="{cf}"]]/database/row'.format(cf=func))):
              metric_time = None
              if len(times) > 0:
                  metric_time = re.search("[\w]+ / ([0-9]+)", str(times[i]))
                  metric_time = metric_time.group(1)
              # Skip row if no time found
              if not metric_time:
                  continue

              for key,v in enumerate(row):
                  metric_name = global_metric_name
                  if datasource_type[key] == "COUNTER":
                      metric_name += "_total"

                  row_labels = labels[:]
                  row_labels.append('cf="{}"'.format(func.lower()))
                  # Apend label "type" if DS != value (default)
                  if datasource_name[key] != 'value':
                      row_labels.append('type="{}"'.format(datasource_name[key]))
                  metric_value = v.text

                  if metric_value == "NaN":
                      continue
                  print("{metric}{{{labels}}} {value} {timestamp}".format(
                      metric=metric_name,
                      labels=",".join(row_labels),
                      value=metric_value,
                      timestamp=int(metric_time)*1000))

def rrd_xport():
      rra_cf="AVERAGE"
      infos = rrdtool.info(os.path.join(root, filename))
      for x in infos.keys():
        match = re.search('^ds\[(.+)?\]', x)
        if match:
           first_ds = match.group(1)
           break

      rrdtype = infos["ds[{}].type".format(first_ds)]

      # By convention, add "_total" suffix to counter type metrics
      metric_name = global_metric_name
      if rrdtype == "COUNTER":
          metric_name += "_total"

      database = rrdtool.xport("-t", "-m 100000000", "DEF:a={}:{}:{}".format(os.path.join(root, filename), first_ds, rra_cf),"XPORT:a:export_metric")
      for metric_data in database['data']:
         if len(metric_data) > 1:
            metric_value, metric_time = metric_data
         else:
            metric_time = None
            metric_value = metric_data[0]
         if metric_value is None:
            continue
         print("{metric}{{{labels}}} {value} {timestamp}".format(
              metric=metric_name,
              labels=",".join(labels),
              value=metric_value,
              timestamp=metric_time))


parser = argparse.ArgumentParser(description="Convert a set of RRD files to openmetrics.")
parser.add_argument(
    'rrd_dir',
    default='.',
    help="RRD root dir (one dir by host, one dir by metric type, one rrd file by metric)"
)
parser.add_argument(
    '-p','--prefix',
    dest='prefix',
    default='collectd',
    help="Prefix for all metrics"
)
parser.add_argument(
    '-l','--add-label',
    dest='additionnal_labels',
    action='append',
    help="Aditionals labels to put on each sample. Example: -l 'customer=\"mycustomer\"'"
)
parser.add_argument(
    '-d','--debug',
    dest='debug',
    default=False,
    action='store_true',
    help="Debug mode"
)
args = parser.parse_args()

metric_name_prefix = args.prefix
rrd_dir = args.rrd_dir
debug = args.debug

for root, dirs, files in os.walk(rrd_dir):
    if not files:
      continue
    host = os.path.basename(os.path.dirname(root))
    metric_infos = os.path.basename(root).split('-', 1)
    metric_labels = ""
    if len(metric_infos) > 1:
      metric_labels = metric_infos[0] + '="' + metric_infos[1] + '"'
    for filename in files:
      if not filename.endswith('rrd'):
        continue
      metric_details = os.path.basename(filename).replace('.rrd', '').split('-', 1)
      global_metric_name = metric_name_prefix
      if metric_infos[0] not in metric_details[0]:
          global_metric_name += '_' + metric_infos[0]
      global_metric_name += '_' + metric_details[0]
      metric_dim = ""
      if len(metric_details) > 1:
          metric_dim = metric_details[1]

      labels = ['host="' + host + '"']
      if args.additionnal_labels:
          labels += args.additionnal_labels
      if metric_labels:
          labels.append(metric_labels)
      if metric_dim:
          labels.append('dimension="' + metric_dim + '"')

      if use_dump:
          rrd_dump()
      else:
          rrd_xport()
	#!/usr/bin/env python
	from __future__ import print_function

	'''
	Usage:
	./rrd_to_openmetrics.py /path/rrd_directory/ > openmetrics.txt
	'''

	'''
	Directory structure :
	host1/
	load/
	load.rrd
	cpu-0/
	cpu-idle.rrd
	cpu-interrupt.rrd
	cpu-user.rrd
	host2/
	load/
	load.rrd
	cpu-0/
	cpu-idle.rrd
	cpu-interrupt.rrd
	cpu-user.rrd
	fpm-site/
	gauge.rrd
	host3/
	load/
	load.rrd
	cpu-0/
	cpu-idle.rrd
	cpu-interrupt.rrd
	cpu-user.rrd
	'''

	'''
	Sample output metric:
	collectd_cpu{host="myserver.priv.example.com",cpu="0",dimension="idle",cf="average"} 9.9844166667e+01 1582101000000
	collectd_load{host="myserver.priv.example.com",type="longterm",cf="average"} 1.9000000000e-01 1582026420000
	collectd_fpm_gaugne{host="myserver.priv.example.com",fpm="site",cf="max"} 9.9844166667e+01 1582026420000

	'''

	import argparse
	import os
	import sys
	import re
	import tempfile

	from lxml import etree
	import rrdtool

	# In my case, rrdtool xport not works correctly.
	# Times are "None", and history seems not full...
	use_dump = True

	def rrd_dump():
	tmp_file = tempfile.NamedTemporaryFile()
	database = tmp_file.name
	rrdtool.dump(os.path.join(root, filename), database)
	parser = etree.XMLParser(remove_comments=False)
	tree = etree.parse(database, parser)

	if debug:
	step = int(tree.xpath("/rrd/step/text()")[0])
	print("File : " + os.path.join(root, filename), file=sys.stderr)
	print("Metric : " + global_metric_name, file=sys.stderr)
	print("CF :", file=sys.stderr)
	print(tree.xpath("/rrd/rra/cf/text()"), file=sys.stderr)
	print("RRA Samplings in seconds :", file=sys.stderr)
	print([step * int(pdp) for pdp in tree.xpath("/rrd/rra/pdp_per_row/text()")], file=sys.stderr)


	# Get all internal datasources (DS)
	datasource_name = [s.strip() for s in tree.xpath("/rrd/ds/name/text()")]
	datasource_type = [s.strip() for s in tree.xpath("/rrd/ds/type/text()")]
	consolidation_functions = list(set(tree.xpath("/rrd/rra/cf/text()")))

	if debug:
	print(str(datasource_name), file=sys.stderr)

	for func in consolidation_functions:
	# Fetch all times comments, implicit for each rows
	times = tree.xpath('/rrd/rra[cf[text()="{cf}"]]/database/comment()'.format(cf=func))
	for i,row in enumerate(tree.xpath('/rrd/rra[cf[text()="{cf}"]]/database/row'.format(cf=func))):
	metric_time = None
	if len(times) > 0:
	metric_time = re.search("[\w]+ / ([0-9]+)", str(times[i]))
	metric_time = metric_time.group(1)
	# Skip row if no time found
	if not metric_time:
	continue

	for key,v in enumerate(row):
	metric_name = global_metric_name
	if datasource_type[key] == "COUNTER":
	metric_name += "_total"

	row_labels = labels[:]
	row_labels.append('cf="{}"'.format(func.lower()))
	# Apend label "type" if DS != value (default)
	if datasource_name[key] != 'value':
	row_labels.append('type="{}"'.format(datasource_name[key]))
	metric_value = v.text

	if metric_value == "NaN":
	continue
	print("{metric}{{{labels}}} {value} {timestamp}".format(
	metric=metric_name,
	labels=",".join(row_labels),
	value=metric_value,
	timestamp=int(metric_time)*1000))

	def rrd_xport():
	rra_cf="AVERAGE"
	infos = rrdtool.info(os.path.join(root, filename))
	for x in infos.keys():
	match = re.search('^ds\[(.+)?\]', x)
	if match:
	first_ds = match.group(1)
	break

	rrdtype = infos["ds[{}].type".format(first_ds)]

	# By convention, add "_total" suffix to counter type metrics
	metric_name = global_metric_name
	if rrdtype == "COUNTER":
	metric_name += "_total"

	database = rrdtool.xport("-t", "-m 100000000", "DEF:a={}:{}:{}".format(os.path.join(root, filename), first_ds, rra_cf),"XPORT:a:export_metric")
	for metric_data in database['data']:
	if len(metric_data) > 1:
	metric_value, metric_time = metric_data
	else:
	metric_time = None
	metric_value = metric_data[0]
	if metric_value is None:
	continue
	print("{metric}{{{labels}}} {value} {timestamp}".format(
	metric=metric_name,
	labels=",".join(labels),
	value=metric_value,
	timestamp=metric_time))


	parser = argparse.ArgumentParser(description="Convert a set of RRD files to openmetrics.")
	parser.add_argument(
	'rrd_dir',
	default='.',
	help="RRD root dir (one dir by host, one dir by metric type, one rrd file by metric)"
	)
	parser.add_argument(
	'-p','--prefix',
	dest='prefix',
	default='collectd',
	help="Prefix for all metrics"
	)
	parser.add_argument(
	'-l','--add-label',
	dest='additionnal_labels',
	action='append',
	help="Aditionals labels to put on each sample. Example: -l 'customer=\"mycustomer\"'"
	)
	parser.add_argument(
	'-d','--debug',
	dest='debug',
	default=False,
	action='store_true',
	help="Debug mode"
	)
	args = parser.parse_args()

	metric_name_prefix = args.prefix
	rrd_dir = args.rrd_dir
	debug = args.debug

	for root, dirs, files in os.walk(rrd_dir):
	if not files:
	continue
	host = os.path.basename(os.path.dirname(root))
	metric_infos = os.path.basename(root).split('-', 1)
	metric_labels = ""
	if len(metric_infos) > 1:
	metric_labels = metric_infos[0] + '="' + metric_infos[1] + '"'
	for filename in files:
	if not filename.endswith('rrd'):
	continue
	metric_details = os.path.basename(filename).replace('.rrd', '').split('-', 1)
	global_metric_name = metric_name_prefix
	if metric_infos[0] not in metric_details[0]:
	global_metric_name += '_' + metric_infos[0]
	global_metric_name += '_' + metric_details[0]
	metric_dim = ""
	if len(metric_details) > 1:
	metric_dim = metric_details[1]

	labels = ['host="' + host + '"']
	if args.additionnal_labels:
	labels += args.additionnal_labels
	if metric_labels:
	labels.append(metric_labels)
	if metric_dim:
	labels.append('dimension="' + metric_dim + '"')

	if use_dump:
	rrd_dump()
	else:
	rrd_xport()