Skip to content

Instantly share code, notes, and snippets.

@almet
Created August 19, 2011 01:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save almet/1155742 to your computer and use it in GitHub Desktop.
Save almet/1155742 to your computer and use it in GitHub Desktop.
Experimentations about nginx stats in python
from collections import namedtuple
from datetime import datetime
from itertools import groupby
from operator import attrgetter
import re
import sys
import pylab as pl
Log = namedtuple("Log", ("url", "date", "ip", "code", "verb"))
def draw_graph(data, filename, title=None):
fig = pl.figure()
ax = fig.add_subplot(111)
ax.plot(*data)
if title:
pl.title(title)
fig.savefig(filename)
#fig.autofmt_xdate()
def parse_line(line):
"""Given a line, parse it and return a namedtuple object matching it"""
match = re.search(r"^(?P<ip>[0-9\.]+).*?\[(?P<date>.*?) \+.*?\].*?\"(?P<verb>[A-Z]+) (?P<url>.*?) .*?\" (?P<code>[0-9]+)", line)
if match:
data = match.groupdict()
data['date'] = datetime.strptime(data['date'], "%d/%b/%Y:%H:%M:%S")
return Log(**data)
return None
def extract_logs(*filenames):
"""I want information about:
* The resources that 404ed
* The number of time each resource has been viewed (code 200), per day
"""
# parsing
information = []
for filename in filenames:
print "analsying %s" % filename
with open(filename) as f:
for line in f:
data = parse_line(line)
if data:
information.append(data)
# analysis (when the real fun starts)
# number of hits per month
X, Y = [], []
for idx, (month, views) in enumerate(groupby(information, attrgetter("date.year", "date.month"))):
X.append(idx)
Y.append(len(list(views)))
draw_graph((X, Y), "monthly_hits.png", "Monthly hits")
# number of 404 per day
extract_logs(*sys.argv[1:])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment