Skip to content

Instantly share code, notes, and snippets.

@clohfink
Forked from Kami/cassandra_cfstats_histogram.py
Last active December 20, 2015 03:59
Show Gist options
  • Save clohfink/6068003 to your computer and use it in GitHub Desktop.
Save clohfink/6068003 to your computer and use it in GitHub Desktop.
Changed to just make bar graph. With large values histogram version takes multiple GB of ram
#!/usr/bin/env python
# Licensed to Tomaz Muraus under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# Tomaz muraus licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import re
import sys
from os.path import join as pjoin
from collections import defaultdict
from optparse import OptionParser
try:
from numpy import *
import matplotlib.pyplot as plt
except ImportError:
raise ImportError('Missing matplotlib dependency. You can install it' +
' using pip:\n pip install matplotlib')
LEGEND = {
'sstables': {
'title': '%s - Number of accessed SStables per read',
'x_axis': 'Number of SSTables accessed per read',
'index': 1
},
'write_latency': {
'title': '%s - Write Latency',
'x_axis': 'Write latency in ms',
'index': 2
},
'read_latency': {
'title': '%s - Read Latency',
'x_axis': 'Read latency in ms',
'index': 3
},
'row_size': {
'title': '%s - Row size',
'x_axis': 'Row size in bytes',
'index': 4
},
'column_count': {
'title': '%s - Column count',
'x_axis': 'Column count',
'index': 5
}
}
INDEX_MAP = {
1: 'sstables',
2: 'write_latency',
3: 'read_latency',
4: 'row_size',
5: 'column_count'
}
def parse_and_format_data(data):
lines = data.split('\n')
offsets = []
data = [[] for index in range(0, 6)]
for index, line in enumerate(lines):
if index == 0:
# header
continue
split = re.split('\s+', line)
if len(split) < 5:
# bad line
continue
if len(split) == 5:
# Insert 0 for sstable count
split.insert(1, 0)
offsets.append(split[0])
for index in range(1, 6):
value = int(split[index])
data[index].append(value)
return (offsets, data)
def main():
usage = 'usage: %prog --title<title> --input=<path to a file with cfhistograms output>' + \
' --output=<path to the output directory>'
parser = OptionParser(usage=usage)
parser.add_option('--input', dest='input',
help='Path to a file with cfhistograms output')
parser.add_option('--output', dest='output',
help='Path to a file where the graphs are saved')
parser.add_option('--title', dest='title',
help='name for title')
(options, args) = parser.parse_args()
if not options.input:
print('Error: Missing "--input" option')
print parser.print_usage()
sys.exit(1)
if not options.title:
print('Error: Missing "--title" option')
print parser.print_usage()
sys.exit(1)
if not options.output:
print('Error: Missing "--output" option')
print parser.print_usage()
sys.exit(1)
if not os.path.exists(options.input) or not \
os.path.isfile(options.input):
print('--input argument is not a valid file path')
sys.exit(2)
if not os.path.exists(options.output):
os.makedirs(options.output)
with open(options.input, 'r') as fp:
print('Processing file...')
content = fp.read()
offsets, data = parse_and_format_data(content)
for key, values in LEGEND.items():
output_path = pjoin(options.output, '%s_histogram.png' % (key))
print('Saving histogram: %s' % (output_path))
fig = plt.figure(figsize=(19,5))
fig.subplots_adjust(bottom=0.2) # Remark 1
ax = fig.add_subplot(111)
ax.set_title(values['title']%options.title)
ax.set_ylabel('Frequency')
ax.set_xlabel(values['x_axis'])
ax.bar(arange(len(data[values['index']])), data[values['index']])
ax.ticklabel_format(style='plain') # Remark 2
ax.set_xticks(arange(len(offsets)))
ax.set_xticklabels(offsets, rotation=80)
plt.savefig(output_path, dpi=100, bbox_inches='tight')
pass
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment