Packetslave/SREcon14.py

## SREcon14.py
#!/bin/env python
"""
Twitter Coding Challenge
#SREcon 2014

Collect samples of Linux kernel network statistics and report the minimum,
maximum, and average delta for each over a given interval.

Uses the Google gflags module for parsing command-line arguments.
Available on PyPI (pip install python-gflags)

Usage: netstat.py [options]

  --stats: comma-separated list of stats to sample
    (default: '')
  --count: samples to collect
    (default: '5')
    (an integer)
  --interval: seconds to sleep between samples
    (default: '1')
    (an integer)
  --[no]names: include the name of each stat in the output
    (default: 'false')
  --[no]debug: log debug info
    (default: 'false')
  --netstat_file: for testing
    (default: '/proc/net/netstat')

The output is designed to be machine readable:

  $ netstat.py --stats=InOctets,OutOctets --count=5 --interval=1
  52 312 182
  100 732 395

  $ netstat.py --stats=InOctets,OutOctets --count=5 --interval=1 --names
  InOctets 52 312 182
  OutOctets 100 732 395

Written and tested using Python 2.7 using PEP8 style. Python 3 is untested.
"""

__author__ = 'Brian Landers <brian@packetslave.com>'

import collections
import itertools
import logging
import sys
import time

import gflags
FLAGS = gflags.FLAGS

gflags.DEFINE_string('stats', '', 'comma-separated list of stats to sample')
gflags.DEFINE_integer('interval', 1, 'seconds to sleep between samples')
gflags.DEFINE_integer('count', 5, 'samples to collect')
gflags.DEFINE_boolean(
    'names', False, 'include the name of each stat in the output')
gflags.DEFINE_string('netstat_file', '/proc/net/netstat', 'for testing')
gflags.DEFINE_boolean('debug', False, 'log debug info')


def get_raw_stats(filename):
    """Read the kernel network stats from /proc and return them in a dict.

    Note that this makes several assumptions about the format of the data
    in /proc/net/netstat.  Real production code should probably be more
    paranoid and/or generic.  Tested on Ubuntu 12.04 LTS.

    Args:
      filename: path to the file to read, for testing in isolation

    Returns:
      dict (string->int) mapping stat to current value
    """
    with open(filename) as stats:
        # Read the file in one shot so we get a consistent view
        contents = stats.readlines()

    # Note: you could do this in a one-liner, inside the dict() below,
    # but the return statement is already a bit ugly. Would be nice if
    # there was a generator version of str.split() to avoid creating
    # those lists needlessly, but http://bugs.python.org/issue17343
    # is still being debated.
    tcp_fields = contents[0].split()[1:]
    ip_fields = contents[2].split()[1:]
    tcp_values = [int(x) for x in contents[1].split()[1:]]
    ip_values = [int(x) for x in contents[3].split()[1:]]

    return dict(
        itertools.chain(
            itertools.izip(tcp_fields, tcp_values),
            itertools.izip(ip_fields, ip_values)))


def sample(filename, stats, interval, count):
    """Collect each kernel stat and track the deltas.

    Note that the 'all' value for each kernel stat will be one less than
    'count', since the first sample will have no delta.

    Args:
      filename: path to the file to read, for testing in isolation
      stats: list of kernel stats to sample
      interval: time in seconds between each sample
      count: number of samples to collect

    Returns:
      list of dicts containing statistics for each kernel stat
      - avg: the average (mean) delta per second (NOTE: not per interval!)
      - min: the smallest non-zero delta
      - max: the numerically largest delta
      - all: list of all delta values (for testing)
    """
    min_delta = collections.defaultdict(int)
    max_delta = collections.defaultdict(int)
    all_deltas = collections.defaultdict(list)

    # Grab the first sample outside the loop so that the min/max stats
    # have a valid initial value to compare to. Otherwise, we end up
    # with a drastically skewed max delta from the first value.
    logging.info('sample #1 of %i', count)
    old_vals = get_raw_stats(filename)
    time.sleep(interval)

    for i in xrange(1, count):
        logging.info('sample #%i of %i', i+1, count)
        values = get_raw_stats(filename)

        for stat in stats:
            if stat not in values:
                logging.warn('%s not found in sample!', stat)
                continue

            delta = values[stat] - old_vals[stat]

            logging.debug(
                'Delta for %s is %i (%i - %i)',
                stat, delta, values[stat], old_vals[stat])

            if delta > max_delta[stat]:
                logging.debug('%s: New max delta (%i)', stat, delta)
                max_delta[stat] = delta

            if not min_delta[stat] or delta < min_delta[stat]:
                logging.debug('%s: New min delta (%i)', stat, delta)
                min_delta[stat] = delta

            all_deltas[stat].append(delta)
            old_vals[stat] = values[stat]
        time.sleep(interval)

    out = {}
    for stat in stats:
        out[stat] = {
            # Average is per second, not per interval.  We're assuming here
            # that all stats are integers, so we don't need float division.
            'avg': sum(all_deltas[stat]) / ((count-1) * interval),

            'max': max_delta[stat],
            'min': min_delta[stat],
            'all': sorted(all_deltas[stat]),
        }
    return out


def main(argv):
    """Main Entry Point."""
    try:
        argv = FLAGS(argv)
    except gflags.FlagsError as ex:
        print >>sys.stderr, '%s\n\nUsage: %s\n%s' % (
            ex, sys.argv[0], FLAGS)
        sys.exit(1)

    logging.basicConfig(
        format='%(asctime)-15s %(levelname)-8s %(message)s',
        level=logging.DEBUG if FLAGS.debug else logging.INFO)

    stats = FLAGS.stats.split(',')
    samples = sample(FLAGS.netstat_file, stats, FLAGS.interval, FLAGS.count)

    # We deliberately don't use samples.iteritems() here, because we want the
    # display order to be the same as was specified in FLAGS.stats.  Another
    # option would be to use an OrderedDict as the return value of sample()
    for stat in stats:
        vals = samples[stat]
        logging.debug('%s: %s', stat, vals)

        prefix = stat + ' ' if FLAGS.names else ''

        print "%s%i %i %i" % (
            prefix, vals['min'], vals['max'], vals['avg'])


if __name__ == '__main__':
    main(sys.argv)
	#!/bin/env python
	"""
	Twitter Coding Challenge
	#SREcon 2014

	Collect samples of Linux kernel network statistics and report the minimum,
	maximum, and average delta for each over a given interval.

	Uses the Google gflags module for parsing command-line arguments.
	Available on PyPI (pip install python-gflags)

	Usage: netstat.py [options]

	--stats: comma-separated list of stats to sample
	(default: '')
	--count: samples to collect
	(default: '5')
	(an integer)
	--interval: seconds to sleep between samples
	(default: '1')
	(an integer)
	--[no]names: include the name of each stat in the output
	(default: 'false')
	--[no]debug: log debug info
	(default: 'false')
	--netstat_file: for testing
	(default: '/proc/net/netstat')

	The output is designed to be machine readable:

	$ netstat.py --stats=InOctets,OutOctets --count=5 --interval=1
	52 312 182
	100 732 395

	$ netstat.py --stats=InOctets,OutOctets --count=5 --interval=1 --names
	InOctets 52 312 182
	OutOctets 100 732 395

	Written and tested using Python 2.7 using PEP8 style. Python 3 is untested.
	"""

	__author__ = 'Brian Landers <brian@packetslave.com>'

	import collections
	import itertools
	import logging
	import sys
	import time

	import gflags
	FLAGS = gflags.FLAGS

	gflags.DEFINE_string('stats', '', 'comma-separated list of stats to sample')
	gflags.DEFINE_integer('interval', 1, 'seconds to sleep between samples')
	gflags.DEFINE_integer('count', 5, 'samples to collect')
	gflags.DEFINE_boolean(
	'names', False, 'include the name of each stat in the output')
	gflags.DEFINE_string('netstat_file', '/proc/net/netstat', 'for testing')
	gflags.DEFINE_boolean('debug', False, 'log debug info')


	def get_raw_stats(filename):
	"""Read the kernel network stats from /proc and return them in a dict.

	Note that this makes several assumptions about the format of the data
	in /proc/net/netstat. Real production code should probably be more
	paranoid and/or generic. Tested on Ubuntu 12.04 LTS.

	Args:
	filename: path to the file to read, for testing in isolation

	Returns:
	dict (string->int) mapping stat to current value
	"""
	with open(filename) as stats:
	# Read the file in one shot so we get a consistent view
	contents = stats.readlines()

	# Note: you could do this in a one-liner, inside the dict() below,
	# but the return statement is already a bit ugly. Would be nice if
	# there was a generator version of str.split() to avoid creating
	# those lists needlessly, but http://bugs.python.org/issue17343
	# is still being debated.
	tcp_fields = contents[0].split()[1:]
	ip_fields = contents[2].split()[1:]
	tcp_values = [int(x) for x in contents[1].split()[1:]]
	ip_values = [int(x) for x in contents[3].split()[1:]]

	return dict(
	itertools.chain(
	itertools.izip(tcp_fields, tcp_values),
	itertools.izip(ip_fields, ip_values)))


	def sample(filename, stats, interval, count):
	"""Collect each kernel stat and track the deltas.

	Note that the 'all' value for each kernel stat will be one less than
	'count', since the first sample will have no delta.

	Args:
	filename: path to the file to read, for testing in isolation
	stats: list of kernel stats to sample
	interval: time in seconds between each sample
	count: number of samples to collect

	Returns:
	list of dicts containing statistics for each kernel stat
	- avg: the average (mean) delta per second (NOTE: not per interval!)
	- min: the smallest non-zero delta
	- max: the numerically largest delta
	- all: list of all delta values (for testing)
	"""
	min_delta = collections.defaultdict(int)
	max_delta = collections.defaultdict(int)
	all_deltas = collections.defaultdict(list)

	# Grab the first sample outside the loop so that the min/max stats
	# have a valid initial value to compare to. Otherwise, we end up
	# with a drastically skewed max delta from the first value.
	logging.info('sample #1 of %i', count)
	old_vals = get_raw_stats(filename)
	time.sleep(interval)

	for i in xrange(1, count):
	logging.info('sample #%i of %i', i+1, count)
	values = get_raw_stats(filename)

	for stat in stats:
	if stat not in values:
	logging.warn('%s not found in sample!', stat)
	continue

	delta = values[stat] - old_vals[stat]

	logging.debug(
	'Delta for %s is %i (%i - %i)',
	stat, delta, values[stat], old_vals[stat])

	if delta > max_delta[stat]:
	logging.debug('%s: New max delta (%i)', stat, delta)
	max_delta[stat] = delta

	if not min_delta[stat] or delta < min_delta[stat]:
	logging.debug('%s: New min delta (%i)', stat, delta)
	min_delta[stat] = delta

	all_deltas[stat].append(delta)
	old_vals[stat] = values[stat]
	time.sleep(interval)

	out = {}
	for stat in stats:
	out[stat] = {
	# Average is per second, not per interval. We're assuming here
	# that all stats are integers, so we don't need float division.
	'avg': sum(all_deltas[stat]) / ((count-1) * interval),

	'max': max_delta[stat],
	'min': min_delta[stat],
	'all': sorted(all_deltas[stat]),
	}
	return out


	def main(argv):
	"""Main Entry Point."""
	try:
	argv = FLAGS(argv)
	except gflags.FlagsError as ex:
	print >>sys.stderr, '%s\n\nUsage: %s\n%s' % (
	ex, sys.argv[0], FLAGS)
	sys.exit(1)

	logging.basicConfig(
	format='%(asctime)-15s %(levelname)-8s %(message)s',
	level=logging.DEBUG if FLAGS.debug else logging.INFO)

	stats = FLAGS.stats.split(',')
	samples = sample(FLAGS.netstat_file, stats, FLAGS.interval, FLAGS.count)

	# We deliberately don't use samples.iteritems() here, because we want the
	# display order to be the same as was specified in FLAGS.stats. Another
	# option would be to use an OrderedDict as the return value of sample()
	for stat in stats:
	vals = samples[stat]
	logging.debug('%s: %s', stat, vals)

	prefix = stat + ' ' if FLAGS.names else ''

	print "%s%i %i %i" % (
	prefix, vals['min'], vals['max'], vals['avg'])


	if __name__ == '__main__':
	main(sys.argv)