Skip to content

Instantly share code, notes, and snippets.

@DirkR
Created February 23, 2012 21:27
Show Gist options
  • Save DirkR/1895148 to your computer and use it in GitHub Desktop.
Save DirkR/1895148 to your computer and use it in GitHub Desktop.
List all 403/404/500 error messages from an apache log
#!/usr/bin/env python2.7
import apachelog, sys, os, re, datetime, time
# Format copied and pasted from Apache conf - use raw string + single quotes
format = r'%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"'
# The return dictionary from the parse method depends on the input format.
# For the above example, the returned dictionary would look like;
#
# {
# '%>s': '200',
# '%b': '2607',
# '%h': '212.74.15.68',
# '%l': '-',
# '%r': 'GET /images/previous.png HTTP/1.1',
# '%t': '[23/Jan/2004:11:36:20 +0000]',
# '%u': '-',
# '%{Referer}i': 'http://peterhi.dyndns.org/bandwidth/index.html',
# '%{User-Agent}i': 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.2) Gecko/20021202'
# }
p = apachelog.parser(format)
results = {}
now = time.localtime()
for line in open(os.path.expanduser('~/logs/access_log')):
try:
data = p.parse(line)
errcode = data['%>s']
path = data['%r'].split(' ')[1]
date = datetime.datetime.strptime(data['%t'][1:-7], '%d/%b/%Y:%H:%M:%S')
user_agent = data['%{User-Agent}i']
if ((date + datetime.timedelta(days=1)) < datetime.datetime.now()):
continue
if user_agent.lower().count('bot') > 0:
continue
if data['%u'].count('claas') > 0:
continue
if not errcode.startswith('4') and not errcode.startswith('5'):
continue
if errcode not in results:
results[errcode] = {}
if path not in results[errcode].keys():
results[errcode][path] = 1
else:
results[errcode][path] += 1
# print "%s: %s" % (errcode, path)
except Exception as e:
sys.stderr.write("Unable to parse %s: %s" % (line, e))
for errcode in sorted(results, key=results.__getitem__, reverse=True):
print "%s" % errcode
for path in sorted(results[errcode], key=results[errcode].__getitem__, reverse=True):
print " %s: %s" % (path, results[errcode][path])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment