Skip to content

Instantly share code, notes, and snippets.

@oinume
Created October 5, 2013 06:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save oinume/6837358 to your computer and use it in GitHub Desktop.
Save oinume/6837358 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Usage: tail -10000 <access_log> | ./parse-nginx-log.py
import os
import re
import sys
class LogParser(object):
#10.32.202.174 - gm [27/Sep/2013:16:19:59 +0900] "GET /giftbox/index HTTP/1.1" 200 8012 "-" "Mozilla/5.0 (Linux; U; Android 2.1-update1; ja- jp; SonyEricssonSO-01B Build/2.0.1.B.0.19) AppleWebKit/530.17 (KHTML, like Gecko) Version/ 4.0 Mobile Safari/530.17" "-" 4.010 4.010
# '$remote_addr - $remote_user [$time_local] "$request" '
# '$status $body_bytes_sent "$http_referer" '
# '"$http_user_agent" "$http_x_forwarded_for" $request_time $upstream_response_time';
LOG_PATTERN = re.compile(r"""^(?P<remote_host>[0-9]{,3}\.[0-9]{,3}\.[0-9]{,3}\.[0-9]{,3}) (?P<ident>[^ ]{1,}) (?P<remote_user>[^ ]{1,}|\-) \[(?P<datetime>[0-9]{2}\/[A-Za-z]{3}\/[0-9]{1,4}:[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2} [+\-][0-9]{4})\] "(?P<method>[A-Z ]+) (?P<uri>[^"]*) (?P<protocol>[^"]*)" (?P<status>[0-9]{3}) (?P<bytes>[0-9]{1,}|\-) "(?P<referer>[^"]*|\-)" "(?P<user_agent>[^"]+)" "(?P<forwarded_for>[^"]*|\-)" (?P<elapsed>[\d\.]+)$""")
def __init__(self):
self.summary = {
'total_requests': 0,
'slow_requests': 0,
}
self.slow_request_summary = {}
def parse(self, file):
for line in file:
m = self.LOG_PATTERN.match(line)
if not m:
continue
request_info = m.groupdict()
#print str(request_info)
self.summarize(request_info)
print """\
Total requests: %d
Slow requests : %d (>= 1000ms)
""" % (self.summary['total_requests'], self.summary['slow_requests'])
print "%-40s %-10s %-10s %8s" % ('URL', 'count', 'average(ms)', '%')
def s(x):
return x['total_time'] / x['count']
total_time = 0
for data in (self.slow_request_summary.values()):
total_time += data['total_time']
for data in sorted(self.slow_request_summary.values(), key=s, reverse=True):
print "%-40s %10d %10d %10.2f" % (
data['uri'], data['count'], data['total_time'] / data['count'] / 1000,
1.0 * data['total_time'] / total_time * 100,
)
def summarize(self, req):
s = self.summary
s['total_requests'] = s['total_requests'] + 1
uri = req['uri']
normalized_uri = re.sub(r'[\d]+', '', uri)
normalized_uri = re.sub(r'\?(.+)', '', normalized_uri)
elapsed = float(req['elapsed'])
elapsed = int(elapsed * 1000 * 1000)
if elapsed < 1000 * 1000:
return
s['slow_requests'] = s['slow_requests'] + 1
summary = self.slow_request_summary.get(normalized_uri)
if summary:
summary['count'] = summary['count'] + 1
summary['total_time'] = summary['total_time'] + elapsed
else:
self.slow_request_summary[normalized_uri] = {
'uri': normalized_uri,
'count': 1,
'total_time': elapsed
}
if __name__ == '__main__':
file = None
if len(sys.argv) == 1:
file = sys.stdin
else:
if os.path.exists(sys.argv[1]):
file = open(sys.argv[1], 'r')
else:
print >>sys.stderr, "error"
sys.exit(1)
LogParser().parse(file)
file.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment