| #!/usr/bin/python | |
| from __future__ import print_function | |
| import cProfile | |
| from datetime import datetime | |
| import os | |
| import pstats | |
| import resource | |
| import StringIO | |
| import sys | |
| import time | |
| import cpmapi as c_api | |
| from pcp import pmapi | |
| USE_PROFILER = True | |
| TOP_PROFILED_FUNCTIONS = 15 | |
| class _Options(object): | |
| def __init__(self): | |
| self.input_file = "" | |
| self.start_time = None | |
| self.end_time = None | |
| self.include = [] | |
| self.exclude = [] | |
| self.custom_graphs = [] | |
| self.raw = False | |
| self.output_file = "output.pdf" | |
| self.interval = None | |
| self.opts = self.setup() | |
| def setup(self): | |
| """ Setup default command line argument option handling """ | |
| opts = pmapi.pmOptions() | |
| opts.pmSetOptionCallback(self.option_callback) | |
| opts.pmSetOverrideCallback(self.override) | |
| opts.pmSetShortOptions("?S:T:t:a:") | |
| opts.pmSetOptionFlags(c_api.PM_OPTFLAG_BOUNDARIES) | |
| opts.pmSetOptionFlags(c_api.PM_OPTFLAG_MULTI) | |
| opts.pmSetLongOptionStart() | |
| opts.pmSetLongOptionFinish() | |
| opts.pmSetLongOptionInterval() | |
| opts.pmSetLongOptionArchive() | |
| opts.pmSetLongOptionVersion() | |
| opts.pmSetLongOptionHelp() | |
| return opts | |
| def override(self, opt): | |
| """ Override a few standard PCP options to match free(1) """ | |
| return 0 | |
| def option_callback(self, opt, optarg, index): | |
| """ Perform setup for an individual command line option """ | |
| # pylint: disable=W0613 | |
| if opt == "S": | |
| self.start_time = optarg | |
| elif opt == "T": | |
| self.end_time = optarg | |
| elif opt == "t": | |
| self.interval = optarg | |
| elif opt == "a": | |
| self.input_file = optarg | |
| class Test(object): | |
| def __init__(self): | |
| self.pmns = {} | |
| self.counter = 0 | |
| self.profile = cProfile.Profile() | |
| self.profile.enable() | |
| self.start_time = time.time() | |
| usage = resource.getrusage(resource.RUSAGE_SELF) | |
| print("Before parsing: usertime={0} systime={1} mem={2} MB" | |
| .format(usage[0], usage[1], (usage[2] / 1024.0))) | |
| if USE_PROFILER: | |
| # Set up profiling for pdf generation | |
| self.profile.enable() | |
| def profile_report(self): | |
| if USE_PROFILER: | |
| self.profile.disable() | |
| str_io = StringIO.StringIO() | |
| sortby = 'cumulative' | |
| pstat = pstats.Stats(self.profile, stream=str_io).sort_stats(sortby) | |
| pstat.print_stats(TOP_PROFILED_FUNCTIONS) | |
| print("\nProfiling of parse()") | |
| print(str_io.getvalue()) | |
| def pmns_callback(self, label): | |
| pmid = self.context.pmLookupName(label) | |
| desc = self.context.pmLookupDesc(pmid[0]) | |
| self.pmns[label] = (desc.type, desc.sem, desc.contents.units) | |
| self.counter += 1 | |
| def _timestamp_to_secs(self, tstamp): | |
| '''Convert a timestamp object (tv_sec + tv_usec) to seconds''' | |
| secs = tstamp.tv_sec + (tstamp.tv_usec * 10**-6) | |
| return secs | |
| def get_metrics(self): | |
| '''Returns a list of metric labels of all the metrics contained in | |
| the archive''' | |
| return self.pmns.keys() | |
| def get_metric_info(self, metric): | |
| '''Given a metric label, return (type, sem, units)''' | |
| return self.pmns[metric] | |
| def get_pmids(self, metrics): | |
| '''Given a list of metrics, returns a list of PMIDs''' | |
| return self.context.pmLookupName(metrics) | |
| def _extract_value(self, result, desc, i, inst=0): | |
| '''Return python value given a pmExtractValue set of parameters''' | |
| mtype = desc.contents.type | |
| value = self.context.pmExtractValue( | |
| result.contents.get_valfmt(i), | |
| result.contents.get_vlist(i, inst), | |
| mtype, mtype) | |
| if mtype == c_api.PM_TYPE_U64: | |
| retval = value.ull | |
| elif mtype == c_api.PM_TYPE_U32: | |
| retval = value.ul | |
| elif mtype == c_api.PM_TYPE_64: | |
| retval = value.ll | |
| elif mtype == c_api.PM_TYPE_32: | |
| retval = value.l | |
| elif mtype == c_api.PM_TYPE_STRING: | |
| retval = value.cp | |
| elif mtype == c_api.PM_TYPE_FLOAT: | |
| retval = value.f | |
| elif mtype == c_api.PM_TYPE_DOUBLE: | |
| retval = value.d | |
| else: | |
| raise Exception("Metric has unknown type: [%s]" % (mtype)) | |
| return retval | |
| def parse(self): | |
| '''Returns a dictionary of dictionary containing all the data within | |
| a PCP archive log file. Data will be returned as a a tuple | |
| (data, skipped_metrics). skipped_metrics is a list of metrics skipped | |
| because the archive log was corrupted. data will be in the following | |
| form: | |
| return[metric1] = {'indom1': [(ts0, ts1, .., tsN), (v0, v1, .., vN)], | |
| .... | |
| 'indomN': [(ts0, ts1, .., tsN), (v0, v1, .., vN)]} | |
| return[metric2] = {'indom1': [(ts0, ts1, .., tsX), (v0, v1, .., vX)], | |
| .... | |
| 'indomN': [(ts0, ts1, .., tsX), (v0, v1, .., vX)]} | |
| (ts0, .., tsN) are timestamps in datetime format and (v0, .., vN) are | |
| the actual values. If a metric has no indom 0 will be used as its key''' | |
| self.context = pmapi.pmContext.fromOptions(opts.opts, sys.argv) | |
| self.context.pmTraversePMNS('', self.pmns_callback) | |
| self.start = opts.opts.pmGetOptionStart() | |
| self.end = opts.opts.pmGetOptionFinish() | |
| self.interval = opts.opts.pmGetOptionInterval() | |
| print('Userdefined Start: {0} - End: {1} - Interval: {2}'.format( | |
| self.start, self.end, self.interval)) | |
| self.context.pmSetMode(c_api.PM_MODE_FORW, self.start, 0) | |
| metrics = self.get_metrics() | |
| pmids = self.get_pmids(metrics) | |
| data = {} | |
| skipped_metrics = [] | |
| # This is just used as an optimization. The keys are (numpmid, numinst) and the value is | |
| # the indom name. This avoids too many expensive calls to pmNameInDomArchive | |
| indom_map = {} | |
| while 1: | |
| try: | |
| # Trying to do this without pmFetchArchive() which does not support INTERP mode | |
| # result = self.context.pmFetchArchive() | |
| pmids = self.context.pmLookupName(metrics) | |
| result = self.context.pmFetch(pmids) | |
| except pmapi.pmErr, error: | |
| # Exit if we are at the end of the file or if the record is corrupted | |
| # Signal any other issues | |
| if error.args[0] in [c_api.PM_ERR_EOL, c_api.PM_ERR_LOGREC]: | |
| break | |
| else: | |
| raise error | |
| secs = self._timestamp_to_secs(result.contents.timestamp) | |
| if not (float(self.start) <= secs and secs <= float(self.end)): | |
| self.context.pmFreeResult(result) | |
| continue | |
| ts = datetime.fromtimestamp(secs) | |
| for i in range(result.contents.numpmid): | |
| pmid = result.contents.get_pmid(i) | |
| desc = self.context.pmLookupDesc(pmid) | |
| metric = self.context.pmNameID(pmid) | |
| if metric not in data: | |
| data[metric] = {} | |
| count = result.contents.get_numval(i) | |
| if count == 0: # FIXME: double-check this (no instance whatsoever) | |
| continue | |
| elif count == 1: # No indoms are present | |
| try: | |
| value = self._extract_value(result, desc, i) | |
| except pmapi.pmErr, error: | |
| if error.args[0] in [c_api.PM_ERR_CONV]: | |
| skipped_metrics.append(metric) | |
| continue | |
| raise error | |
| if 0 not in data[metric]: | |
| data[metric][0] = [[ts,], [value,]] | |
| else: | |
| data[metric][0][0].append(ts) | |
| data[metric][0][1].append(value) | |
| continue | |
| for j in range(count): | |
| inst = result.contents.get_inst(i, j) | |
| try: | |
| value = self._extract_value(result, desc, i, j) | |
| except pmapi.pmErr, error: | |
| if error.args[0] in [c_api.PM_ERR_CONV]: | |
| skipped_metrics.append(metric) | |
| continue | |
| if (i, j) not in indom_map: | |
| indom = self.context.pmNameInDomArchive(desc, inst) | |
| indom_map[(i, j)] = indom | |
| else: | |
| indom = indom_map[(i, j)] | |
| if indom not in data[metric]: | |
| data[metric][indom] = [[ts,], [value,]] | |
| else: | |
| data[metric][indom][0].append(ts) | |
| data[metric][indom][1].append(value) | |
| self.context.pmFreeResult(result) | |
| print("data: %s - skipped: %s" % (len(data), len(skipped_metrics))) | |
| usage = resource.getrusage(resource.RUSAGE_SELF) | |
| print("Before parsing: usertime={0} systime={1} mem={2} MB" | |
| .format(usage[0], usage[1], (usage[2] / 1024.0))) | |
| if __name__ == '__main__': | |
| global opts | |
| opts = _Options() | |
| if c_api.pmGetOptionsFromList(sys.argv) != 0: | |
| c_api.pmUsageMessage() | |
| sys.exit(1) | |
| pcp_files = opts.opts.pmGetOptionArchives() | |
| if pcp_files == None: | |
| print("Error: No pcp archives specified") | |
| c_api.pmUsageMessage() | |
| sys.exit(1) | |
| if len(pcp_files) == 1 and not os.path.exists(pcp_files[0]): | |
| print("Path does not exist: {0}".format(pcp_files[0])) | |
| sys.exit(1) | |
| # Size in MB | |
| size = os.stat(pcp_files[0]).st_size / (1024.0 ** 2) | |
| print("Parsing files: {0} - {1} MB".format(" ".join(map(os.path.basename, pcp_files)), size), end='') | |
| print() | |
| test = Test() | |
| test.parse() | |
| test.profile_report() | |
| # vim: autoindent tabstop=4 expandtab smarttab shiftwidth=4 softtabstop=4 tw=0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment