Skip to content

Instantly share code, notes, and snippets.

@Packetslave
Last active August 29, 2015 14:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Packetslave/186a2d3c916c61968f7b to your computer and use it in GitHub Desktop.
Save Packetslave/186a2d3c916c61968f7b to your computer and use it in GitHub Desktop.
#!/bin/env python
"""
Twitter Coding Challenge
#SREcon 2014
Collect samples of Linux kernel network statistics and report the minimum,
maximum, and average delta for each over a given interval.
Uses the Google gflags module for parsing command-line arguments.
Available on PyPI (pip install python-gflags)
Usage: netstat.py [options]
--stats: comma-separated list of stats to sample
(default: '')
--count: samples to collect
(default: '5')
(an integer)
--interval: seconds to sleep between samples
(default: '1')
(an integer)
--[no]names: include the name of each stat in the output
(default: 'false')
--[no]debug: log debug info
(default: 'false')
--netstat_file: for testing
(default: '/proc/net/netstat')
The output is designed to be machine readable:
$ netstat.py --stats=InOctets,OutOctets --count=5 --interval=1
52 312 182
100 732 395
$ netstat.py --stats=InOctets,OutOctets --count=5 --interval=1 --names
InOctets 52 312 182
OutOctets 100 732 395
Written and tested using Python 2.7 using PEP8 style. Python 3 is untested.
"""
__author__ = 'Brian Landers <brian@packetslave.com>'
import collections
import itertools
import logging
import sys
import time
import gflags
FLAGS = gflags.FLAGS
gflags.DEFINE_string('stats', '', 'comma-separated list of stats to sample')
gflags.DEFINE_integer('interval', 1, 'seconds to sleep between samples')
gflags.DEFINE_integer('count', 5, 'samples to collect')
gflags.DEFINE_boolean(
'names', False, 'include the name of each stat in the output')
gflags.DEFINE_string('netstat_file', '/proc/net/netstat', 'for testing')
gflags.DEFINE_boolean('debug', False, 'log debug info')
def get_raw_stats(filename):
"""Read the kernel network stats from /proc and return them in a dict.
Note that this makes several assumptions about the format of the data
in /proc/net/netstat. Real production code should probably be more
paranoid and/or generic. Tested on Ubuntu 12.04 LTS.
Args:
filename: path to the file to read, for testing in isolation
Returns:
dict (string->int) mapping stat to current value
"""
with open(filename) as stats:
# Read the file in one shot so we get a consistent view
contents = stats.readlines()
# Note: you could do this in a one-liner, inside the dict() below,
# but the return statement is already a bit ugly. Would be nice if
# there was a generator version of str.split() to avoid creating
# those lists needlessly, but http://bugs.python.org/issue17343
# is still being debated.
tcp_fields = contents[0].split()[1:]
ip_fields = contents[2].split()[1:]
tcp_values = [int(x) for x in contents[1].split()[1:]]
ip_values = [int(x) for x in contents[3].split()[1:]]
return dict(
itertools.chain(
itertools.izip(tcp_fields, tcp_values),
itertools.izip(ip_fields, ip_values)))
def sample(filename, stats, interval, count):
"""Collect each kernel stat and track the deltas.
Note that the 'all' value for each kernel stat will be one less than
'count', since the first sample will have no delta.
Args:
filename: path to the file to read, for testing in isolation
stats: list of kernel stats to sample
interval: time in seconds between each sample
count: number of samples to collect
Returns:
list of dicts containing statistics for each kernel stat
- avg: the average (mean) delta per second (NOTE: not per interval!)
- min: the smallest non-zero delta
- max: the numerically largest delta
- all: list of all delta values (for testing)
"""
min_delta = collections.defaultdict(int)
max_delta = collections.defaultdict(int)
all_deltas = collections.defaultdict(list)
# Grab the first sample outside the loop so that the min/max stats
# have a valid initial value to compare to. Otherwise, we end up
# with a drastically skewed max delta from the first value.
logging.info('sample #1 of %i', count)
old_vals = get_raw_stats(filename)
time.sleep(interval)
for i in xrange(1, count):
logging.info('sample #%i of %i', i+1, count)
values = get_raw_stats(filename)
for stat in stats:
if stat not in values:
logging.warn('%s not found in sample!', stat)
continue
delta = values[stat] - old_vals[stat]
logging.debug(
'Delta for %s is %i (%i - %i)',
stat, delta, values[stat], old_vals[stat])
if delta > max_delta[stat]:
logging.debug('%s: New max delta (%i)', stat, delta)
max_delta[stat] = delta
if not min_delta[stat] or delta < min_delta[stat]:
logging.debug('%s: New min delta (%i)', stat, delta)
min_delta[stat] = delta
all_deltas[stat].append(delta)
old_vals[stat] = values[stat]
time.sleep(interval)
out = {}
for stat in stats:
out[stat] = {
# Average is per second, not per interval. We're assuming here
# that all stats are integers, so we don't need float division.
'avg': sum(all_deltas[stat]) / ((count-1) * interval),
'max': max_delta[stat],
'min': min_delta[stat],
'all': sorted(all_deltas[stat]),
}
return out
def main(argv):
"""Main Entry Point."""
try:
argv = FLAGS(argv)
except gflags.FlagsError as ex:
print >>sys.stderr, '%s\n\nUsage: %s\n%s' % (
ex, sys.argv[0], FLAGS)
sys.exit(1)
logging.basicConfig(
format='%(asctime)-15s %(levelname)-8s %(message)s',
level=logging.DEBUG if FLAGS.debug else logging.INFO)
stats = FLAGS.stats.split(',')
samples = sample(FLAGS.netstat_file, stats, FLAGS.interval, FLAGS.count)
# We deliberately don't use samples.iteritems() here, because we want the
# display order to be the same as was specified in FLAGS.stats. Another
# option would be to use an OrderedDict as the return value of sample()
for stat in stats:
vals = samples[stat]
logging.debug('%s: %s', stat, vals)
prefix = stat + ' ' if FLAGS.names else ''
print "%s%i %i %i" % (
prefix, vals['min'], vals['max'], vals['avg'])
if __name__ == '__main__':
main(sys.argv)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment