Skip to content

Instantly share code, notes, and snippets.

@mmalone
Created December 8, 2010 22:21
Show Gist options
  • Save mmalone/734025 to your computer and use it in GitHub Desktop.
Save mmalone/734025 to your computer and use it in GitHub Desktop.
# Requires apachelog from http://code.google.com/p/apachelog/source/browse/trunk/apachelog.py
import sys, time, re
from collections import defaultdict
from threading import Timer
import apachelog
class Parser(apachelog.parser):
def alias(self, name):
return {
'%{User-agent}i': 'user_agent',
'%>s': 'status_code',
'%b': 'content_length',
'%r': 'request',
'%t': 'date',
'%{Referer}i': 'referer',
'%h': 'remote_host',
}.get(name, name)
class Aggregator(object):
def __init__(self, attribute):
self.attribute = attribute # attribute to aggregate by
self.stats = defaultdict(int)
def record(self, record):
self.stats[record.get(self.attribute)] += 1
def xreadstdin():
# Because sys.stdin.xreadlines buffers too much
while 1:
line = sys.stdin.readline()
if not line:
time.sleep(250)
yield line
def loglines():
parser = Parser(apachelog.formats['extended'])
for line in xreadstdin():
yield parser.parse(line)
class RepeatingTimer(object):
# Maybe an easier way to do this? Blerg.
def __init__(self, interval, f):
self.interval = interval
self.f = f
self.schedule()
def schedule(self):
self.timer = Timer(self.interval, self._do_f)
self.timer.start()
def _do_f(self, *args, **kwargs):
self.f(*args, **kwargs)
self.schedule()
def stat_tick(aggregator):
print ' '.join(['[%s: %s]' % (k, v) for k, v in aggregator.stats.iteritems()])
aggregator.stats = defaultdict(int)
if __name__ == '__main__':
start = time.time()
aggregator = Aggregator('status_code')
timer = RepeatingTimer(1.0, lambda: stat_tick(aggregator))
try:
for record in loglines():
aggregator.record(record)
finally:
try:
timer.timer.cancel()
except:
raise
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment