Skip to content

Instantly share code, notes, and snippets.

@wayerr
Created August 8, 2018 13:36
Show Gist options
  • Save wayerr/1a08fad9838d009863c24af38028f222 to your computer and use it in GitHub Desktop.
Save wayerr/1a08fad9838d009863c24af38028f222 to your computer and use it in GitHub Desktop.
Tomcat log file parser
#!/usr/bin/python
# -*- coding: UTF-8 -*-
# Version: 0.1
# Author: rad <radiofun@ya.ru>
# License: GPL 3
#
import sys
import codecs
import re
import argparse
class ErrorInfo:
def __init__(self):
self.dates = []
def add_date(self, date):
self.dates.append(date)
@property
def err_count(self):
return str(len(self.dates))
def read_error(file):
text = "";
for line in file:
if time_re.search(line):
return text;
text += line
return text # срабатаывает для ошибки в конце файла, и если не сработал time_re то содержит текст всего лога, надо что-то делать
def read_errors(file_name, errors):
counter = 0;
file = codecs.open(file_name, mode="r", encoding="utf-8")
old_line = None
for line in file:
if line.startswith("SEVERE"):
mo = time_re.search(old_line)
if not mo:
continue
date = mo.group()
counter += 1
error = line;
other = read_error(file)
if other:
error += other
if error:
info = errors.get(error)
if not info:
info = ErrorInfo()
errors[error] = info;
info.add_date(date)
else:
old_line = line
if __name__ == "__main__":
ap = argparse.ArgumentParser(description="Java log analyzer")
ap.add_argument('log_files',
metavar='log',
type=str,
nargs='+',
help='log files for analyzing')
ap.add_argument('-t',
dest='time_pattern',
default="([\d.]+ [\d:]+)|([\w]+ \d+, \d{4} [\d:]+ [A|P]M)",
help='time regular expression patten (for example like "([\d.]+ [\d:]+)|([\w]+ \d+, \d{4} [\d:]+ [A|P]M)" )')
ap.add_argument('-o',
dest='out_file',
help='analyze report file (XML)')
args = ap.parse_args(sys.argv[1:])
time_re = re.compile("^(?:" + args.time_pattern + ")", re.UNICODE)
file_names = args.log_files
errors = {}
for file_name in file_names:
read_errors(file_name, errors)
with codecs.open(args.out_file, 'w', 'utf-8') as out:
out.write('''<?xml-stylesheet href="#style" type="text/css"?>
<errors>
<style id="style">
style {
display:none;
}
error, dates, date, title, text {
display:block;
}
error {
border-top: solid 1px gray;
}
count {
background-color:red;
}
date {
margin-left:40px;
display: list-item;
list-style-type:round;
}
text {
font-family: monospace;
white-space: pre;
}
</style>''')
for error_text, info in errors.items():
out.write("\t<error>\n" +
u"\t\t<title>Ошибка повторилась <count>" + info.err_count + u"</count> раз</title>\n" +
"\t\t<dates>\n")
for date in info.dates:
out.write("\t\t\t<date>" + date + "</date>\n")
out.write("\t\t</dates>\n" +
"\t<text><![CDATA[" + error_text + "]]></text>\n"
"</error>")
out.write("</errors>\n")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment