Skip to content

Instantly share code, notes, and snippets.

@majidazizian
Forked from hreeder/parser.py
Created December 13, 2019 15:18
Show Gist options
  • Save majidazizian/25f52999391974d1edf5ff14989d66ac to your computer and use it in GitHub Desktop.
Save majidazizian/25f52999391974d1edf5ff14989d66ac to your computer and use it in GitHub Desktop.
Python nginx Log Parser
#!/usr/bin/env python
import gzip
import os
import sys
import re
INPUT_DIR = "nginx-logs"
lineformat = re.compile(r"""(?P<ipaddress>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - - \[(?P<dateandtime>\d{2}\/[a-z]{3}\/\d{4}:\d{2}:\d{2}:\d{2} (\+|\-)\d{4})\] ((\"(GET|POST) )(?P<url>.+)(http\/1\.1")) (?P<statuscode>\d{3}) (?P<bytessent>\d+) (["](?P<refferer>(\-)|(.+))["]) (["](?P<useragent>.+)["])""", re.IGNORECASE)
for f in os.listdir(INPUT_DIR):
if f.endswith(".gz"):
logfile = gzip.open(os.path.join(INPUT_DIR, f))
else:
logfile = open(os.path.join(INPUT_DIR, f))
for l in logfile.readlines():
data = re.search(lineformat, l)
if data:
datadict = data.groupdict()
ip = datadict["ipaddress"]
datetimestring = datadict["dateandtime"]
url = datadict["url"]
bytessent = datadict["bytessent"]
referrer = datadict["refferer"]
useragent = datadict["useragent"]
status = datadict["statuscode"]
method = data.group(6)
print ip, \
datetimestring, \
url, \
bytessent, \
referrer, \
useragent, \
status, \
method
logfile.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment