Skip to content

Instantly share code, notes, and snippets.

@dhondta
Created November 2, 2023 07:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dhondta/6c133993e870fae79845a0e84e5cf15d to your computer and use it in GitHub Desktop.
Save dhondta/6c133993e870fae79845a0e84e5cf15d to your computer and use it in GitHub Desktop.

WordPress Debug Log File Parser

This is a small tool using Tinyscript for parsing and searching into a debug log from WordPress and displaying errors and URLs seen.

$ pip install tinyscript
$ tsm install wp-log-parser
$ wp-log-parser debug.log --search password
$ wp-log-parser wp-errors.log -s script --sort end-date
#!/usr/bin/python3
# -*- coding: UTF-8 -*-
from datetime import datetime
from tinyscript import *
from tinyscript.report import Section
__script__ = "WordPress Debug Log File Parser"
__version__ = "1.0"
__author__ = "Alexandre D'Hondt"
__email__ = "alexandre.dhondt@gmail.com"
__copyright__ = ("A .D'Hondt", 2023)
__license__ = "gpl-3.0"
__docformat__ = "md"
__doc__ = """
This simple tool takes a WordPress debug log file and aggregates all the lines per error with time frames when they
occured and the number of occurences.
"""
__examples__ = ["debug.log", "/tmp/wp-errors.log --no-urls --search user"]
DT_FORMAT = "%d-%b-%Y %H:%M:%S %Z"
DTG_LINE = re.compile("^\[\d{2}-[A-Z][a-z]{2}-\d{4} \d{2}:\d{2}:\d{2} [A-Z]{3}\] .*$")
LOG_LINE = re.compile("^\[(\d{2}-[A-Z][a-z]{2}-\d{4} \d{2}:\d{2}:\d{2} [A-Z]{3})\] (.*)", re.M|re.DOTALL)
URL_LINE = re.compile("^https?://.*$")
def parse(logfile, sort="number-of-occurences", show_urls=True, search=r".*"):
dat, urls, latest = {}, {}, datetime(1, 1, 1)
# helper for formatting the result to be displayed for a dictionary (data or URLs)
def _format(log_data):
nonlocal latest
is_url, latest_dt = URL_LINE.match(list(log_data.keys())[0]) is not None, latest.strftime("%d-%m-%y")
tdata = [["Count", "Start date", "End date", ["Entry", "URL"][is_url]]]
_sort = lambda x: (-x[1][0][0].timestamp(), -x[1][0][1].timestamp(), -x[1][1]) if sort == "start-date" else \
(-x[1][0][1].timestamp(), -x[1][0][0].timestamp(), -x[1][1]) if sort == "end-date" else \
(-x[1][1], -x[1][0][0].timestamp(), -x[1][0][1].timestamp())
for i, dt in sorted((urls if is_url else dat).items(), key=_sort):
start, end = dt[0][0].strftime("%d-%m-%y"), dt[0][1].strftime("%d-%m-%y")
if end == latest_dt:
end = ""
tdata.append([str(dt[1]), start, end, i])
return ts.BorderlessTable(tdata)
# helper for parsing a single log entry to the right destination dictionary (data or URLs)
def _parse(log):
nonlocal latest
dt, err = LOG_LINE.match(log).groups()
if re.search(search, err) is None:
return
is_url = URL_LINE.match(err) is not None
if is_url:
err = err.rstrip("/")
dt = datetime.strptime(dt, DT_FORMAT)
d = urls if is_url else dat
d.setdefault(err, ([dt, dt], 0))
d[err] = ([min(dt, d[err][0][0]), max(dt, d[err][0][1])], d[err][1] + 1)
latest = max(latest, d[err][0][1])
# start parsing the log file
with open(logfile) as f:
prev = None
for l in f:
l = l.strip()
if l == "":
continue
if DTG_LINE.match(l):
if prev is not None:
_parse(prev)
prev = l
else:
try:
prev += "\n" + l
except TypeError:
prev = l
if prev is not None:
_parse(prev)
# print results found
if len(dat) > 0:
print(Section("Errors").rst())
print(_format(dat))
if show_urls and len(urls) > 0:
print(Section("URLs seen").rst())
print(_format(urls))
if __name__ == '__main__':
SORTS = ("start-date", "end-date", "number-of-occurences")
parser.add_argument("path", type=ts.file_exists, help="path to WordPress log file")
parser.add_argument("-s", "--search", default=r".*", help="search for specific patterns in log entries")
parser.add_argument("-u", "--no-urls", action="store_false", help="do not display URLs seen")
parser.add_argument("--sort", choices=SORTS, default="number-of-occurences",
help="sort by '%s', '%s' or '%s'" % SORTS)
initialize()
logger.info("Parsing %s..." % args.path)
parse(args.path, args.sort, args.no_urls, args.search)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment