Created
August 19, 2011 01:02
-
-
Save almet/1155742 to your computer and use it in GitHub Desktop.
Experimentations about nginx stats in python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import namedtuple | |
from datetime import datetime | |
from itertools import groupby | |
from operator import attrgetter | |
import re | |
import sys | |
import pylab as pl | |
Log = namedtuple("Log", ("url", "date", "ip", "code", "verb")) | |
def draw_graph(data, filename, title=None): | |
fig = pl.figure() | |
ax = fig.add_subplot(111) | |
ax.plot(*data) | |
if title: | |
pl.title(title) | |
fig.savefig(filename) | |
#fig.autofmt_xdate() | |
def parse_line(line): | |
"""Given a line, parse it and return a namedtuple object matching it""" | |
match = re.search(r"^(?P<ip>[0-9\.]+).*?\[(?P<date>.*?) \+.*?\].*?\"(?P<verb>[A-Z]+) (?P<url>.*?) .*?\" (?P<code>[0-9]+)", line) | |
if match: | |
data = match.groupdict() | |
data['date'] = datetime.strptime(data['date'], "%d/%b/%Y:%H:%M:%S") | |
return Log(**data) | |
return None | |
def extract_logs(*filenames): | |
"""I want information about: | |
* The resources that 404ed | |
* The number of time each resource has been viewed (code 200), per day | |
""" | |
# parsing | |
information = [] | |
for filename in filenames: | |
print "analsying %s" % filename | |
with open(filename) as f: | |
for line in f: | |
data = parse_line(line) | |
if data: | |
information.append(data) | |
# analysis (when the real fun starts) | |
# number of hits per month | |
X, Y = [], [] | |
for idx, (month, views) in enumerate(groupby(information, attrgetter("date.year", "date.month"))): | |
X.append(idx) | |
Y.append(len(list(views))) | |
draw_graph((X, Y), "monthly_hits.png", "Monthly hits") | |
# number of 404 per day | |
extract_logs(*sys.argv[1:]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment