Skip to content

Instantly share code, notes, and snippets.

@tunix
Created October 7, 2013 11:26
Show Gist options
  • Save tunix/6866330 to your computer and use it in GitHub Desktop.
Save tunix/6866330 to your computer and use it in GitHub Desktop.
Apache log parser for IP count, path and user agent
# coding: utf-8
import os
import re
import sys
pattern = '(\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3}) - - \[.*\] "GET ([^\s]+) HTTP\/1.0" \d+ \d+ ".*" "(.*)"'
ip_list = {}
for p in sys.argv:
if p in (__file__, os.path.basename(__file__)):
continue
f = open(p, 'r')
for line in f:
result = re.match(pattern, line)
matches = None
if result:
result.groups()
if matches:
ip = matches[0]
path = matches[1]
useragent = matches[2]
if ip in ip_list:
ip_list[ip]['count'] += 1
else:
ip_list[ip] = {}
ip_list[ip]['count'] = 1
ip_list[ip]['path'] = path
ip_list[ip]['useragent'] = useragent
f.close()
sorted_ip_list = sorted(ip_list.items(), key=lambda i: i[1]['count'], reverse=True)[:30]
for k, v in sorted_ip_list:
print('%15s - %6s - [%s]' % (k, v['count'], v['path']))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment