Skip to content

Instantly share code, notes, and snippets.

@hreeder
Last active February 27, 2024 03:53
Show Gist options
  • Star 40 You must be signed in to star a gist
  • Fork 10 You must be signed in to fork a gist
  • Save hreeder/f1ffe1408d296ce0591d to your computer and use it in GitHub Desktop.
Save hreeder/f1ffe1408d296ce0591d to your computer and use it in GitHub Desktop.
Python nginx Log Parser
#!/usr/bin/env python
import gzip
import os
import sys
import re
INPUT_DIR = "nginx-logs"
lineformat = re.compile(r"""(?P<ipaddress>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - - \[(?P<dateandtime>\d{2}\/[a-z]{3}\/\d{4}:\d{2}:\d{2}:\d{2} (\+|\-)\d{4})\] ((\"(GET|POST) )(?P<url>.+)(http\/1\.1")) (?P<statuscode>\d{3}) (?P<bytessent>\d+) (["](?P<refferer>(\-)|(.+))["]) (["](?P<useragent>.+)["])""", re.IGNORECASE)
for f in os.listdir(INPUT_DIR):
if f.endswith(".gz"):
logfile = gzip.open(os.path.join(INPUT_DIR, f))
else:
logfile = open(os.path.join(INPUT_DIR, f))
for l in logfile.readlines():
data = re.search(lineformat, l)
if data:
datadict = data.groupdict()
ip = datadict["ipaddress"]
datetimestring = datadict["dateandtime"]
url = datadict["url"]
bytessent = datadict["bytessent"]
referrer = datadict["refferer"]
useragent = datadict["useragent"]
status = datadict["statuscode"]
method = data.group(6)
print ip, \
datetimestring, \
url, \
bytessent, \
referrer, \
useragent, \
status, \
method
logfile.close()
@ksn-developer
Copy link

ksn-developer commented May 22, 2023

check this one!
log_format = r'(?P<remote_addr>\d+.\d+.\d+.\d+)\s+\S+\s+\S+\s+[(?P[^\]]+)]\s+"(?P[^"]+)"\s+(?P\d+)\s+(?P<bytes_sent>\d+)\s+"(?P[^"]+)+"\s+"(?P<user_agent>(?!http)[^"]*)"'

check my project on github https://github.com/ksn-developer/logbrain.git

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment