Skip to content

Instantly share code, notes, and snippets.

@frayos
Last active May 29, 2018 07:51
Show Gist options
  • Save frayos/e6282eced8e1bdb575dbed7f5ca0dc23 to your computer and use it in GitHub Desktop.
Save frayos/e6282eced8e1bdb575dbed7f5ca0dc23 to your computer and use it in GitHub Desktop.
LogFilter Viya Py
#!/bin/env python
#
# Script to scan dedicated time frames from sas/viya config folder log files.
# USAGE :
# python logfilter.py -i /opt/sas/viya/config/var/log -l /home/cloud-user/logfiltered.log -o /home/cloud-user/logO.log -v -b "2018-03-01 12:20:00" -e "2018-03-01 13:00:00"
#
# Given a begin and end time it scans all "*.log" files from given sas config folder.
# Requirement is that the logfiles start with the default formats like
# - 2017-10-04 12:55:53,380
# - 2017-10-04T11:00:00,600
# This is due to the limitation of dateutil.parser.parse which requires to
# extract the datetime part of the string from the rest of the line
# if the first 24 characters of the line contain a datetime format
# everyting between begin and end time will be dumped to the output file.
#
# Author: Thomas Rocks / gertro
# Version 0.1 October 4th, 2017: First test release
# Version 0.2 December 5th, 2017: Some bug fixes
# Version 0.3 January 31st, 2018: added support for Viya
# Version 0.4 February 23rd, 2018: minor fixes for file modification and creation conditions
# Version 0.5 March 2nd, 2018: IOError instead of FileNotFoundError exception for Python 2.x compatibility
# Version 0.6 April 27th: some extensions for omitting file date checks
# Version 0.7 May 18th: Allow filtering
import argparse
import datetime
import dateutil
import dateutil.parser
import logging
import os
import sys
from glob import glob
# Try to guess datetime from string
def getDT(str, fmts):
dt = None
if str != None:
for fmt, strlen in fmts:
try:
if len(str) > strlen:
sub = str[0:strlen-1]
else:
sub = str
dt = datetime.datetime.strptime(sub, fmt)
logging.debug("in: %s" % sub)
logging.debug("dt[%s]: %r" % (fmt, dt))
logging.debug("dt.date: %r" % dt.date())
logging.debug("dt.time: %r" % dt.time())
break
except ValueError:
pass
if dt is None:
try:
if len(str) > 24:
sub = str[0:23]
else:
sub = str
dt = dateutil.parser.parse(sub)
logging.debug("in: %s" % sub)
logging.debug("dt[parsed]: %r" % dt)
logging.debug("dt.date: %r" % dt.date())
logging.debug("dt.time: %r" % dt.time())
dt = dt.replace(tzinfo=None)
logging.debug("dt[parsed w/o tz]: %r" % dt)
logging.debug("dt.date w/o tz: %r" % dt.date())
logging.debug("dt.time w/o tz: %r" % dt.time())
except (ValueError, OverflowError) as e:
dt = None
logging.debug("No valid time identified: %r" % str)
return dt
# Future: scanning saslog-files
#def saslogs(folder, dtBegin, dtEnd, fmts, exclude_list):
# matches = [y for x in os.walk(folder) for y in glob(os.path.join(x[0], "*.saslog"))]
def filelist(folder, all, dtBegin, dtEnd, fmts, exclude_list):
matches = [y for x in os.walk(folder) for y in glob(os.path.join(x[0], "*.log"))]
matches = matches + [y for x in os.walk(folder) for y in glob(os.path.join(x[0], "*.log.?"))]
matches = matches + [y for x in os.walk(folder) for y in glob(os.path.join(x[0], "*.err"))]
matches = matches + [y for x in os.walk(folder) for y in glob(os.path.join(x[0], "*.watch-log"))]
filtered = []
for file in matches:
logging.debug("Checking valid time frame %r" % file)
mod = datetime.datetime.fromtimestamp(os.path.getmtime(file))
creat = datetime.datetime.fromtimestamp(os.path.getctime(file))
logging.debug("file: %r ctime: %r mtime: %r" % (file, creat, mod))
# Some files seem to get a new creation date when changed
# in this case we try to get the creation date from the first line
if all or ((creat >= dtEnd) and (creat >= mod)):
logging.debug("file: %r ctime: %r == mtime: %r" % (file, creat, mod))
size = os.path.getsize(file)
if size > 24:
f = open(file)
for i in range(0, 10):
try:
chkline = f.readline()
logging.debug("file: %r firstline: %r" % (file, chkline))
chkstr = chkline[0:23]
creat2 = getDT(chkstr, fmts)
if creat2 is not None and creat2 < creat:
logging.debug("file reassign: %r ctime: %r new ctime: %r" % (file, creat, creat2))
creat = creat2
break
except UnicodeDecodeError:
pass
f.close()
logging.debug("basename: %r" % os.path.basename(file))
if all or ((mod >= dtBegin) and (creat <= dtEnd)) and os.path.basename(file) not in exclude_list:
logging.debug("Adding %r to filtered list [creat: %r mod: %r]" % (file, creat, mod))
filtered.append(file)
logging.debug("filtered after: %r" % filtered)
return sorted(filtered, key=os.path.getmtime)
def main(argv):
parser = argparse.ArgumentParser(description='Search Logfiles recursively and scan for dedicated time frame (by default last hour)')
parser.add_argument('-i', '--input', help='input folder', required=True)
parser.add_argument('-o', '--output', help='output file', default=sys.argv[0].rsplit(".", 1)[0] + ".txt")
parser.add_argument('-x', '--exclude', help='exclude list', default=sys.argv[0].rsplit(".", 1)[0] + ".exclude")
parser.add_argument('-a', '--all', help='Scan all available files, ignore file dates', action='store_true', default=False)
parser.add_argument('-f', '--filter', help='List of keywords to search for (case sensitive and separated by white space)', nargs='*', default=[])
parser.add_argument('-b', '--begintime', help='Begin of extraction (%Y-%m-%d %H:%M:%S)', default=(datetime.datetime.now()- datetime.timedelta(hours=1)).strftime("%Y-%m-%d %H:%M:%S"))
parser.add_argument('-e', '--endtime', help='End of extraction (%Y-%m-%d %H:%M:%S)', default=datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
parser.add_argument('-l', '--logfile', help='path of logfile', default=sys.argv[0].rsplit(".", 1)[0] + ".log")
parser.add_argument('-v', '--verbose', help='Use Log-Level DEBUG instead of INFO', action='store_true', default=False)
args = parser.parse_args()
if args.verbose:
logging.basicConfig(filename=args.logfile, filemode='w', level=logging.DEBUG, format='%(asctime)s %(message)s')
else:
logging.basicConfig(filename=args.logfile, filemode='w', level=logging.INFO, format='%(asctime)s %(message)s')
fmts = [("%Y-%m-%d %H:%M:%S", 20),
("%d-%m-%Y %H:%M:%S", 20),
("%Y-%m-%d %H:%M:%S,%f", 24),
("%Y-%m-%dT%H:%M:%S,%f", 24),
("%d-%m-%Y %H:%M:%S,%f", 24),
("%Y-%m-%d %H:%M:%S,%f", 27),
("%Y-%m-%dT%H:%M:%S,%f", 27),
("%d-%m-%Y %H:%M:%S,%f", 27),
("%Y/%m/%d %H:%M:%S", 20)]
dtBegin = getDT(args.begintime, fmts)
dtEnd = getDT(args.endtime, fmts)
exclude_list = []
if os.path.exists(args.exclude):
with open(args.exclude) as exclude:
exclude_list = exclude.read().splitlines()
logging.debug("exclude_list: %r" % exclude_list)
file_list = filelist(args.input, args.all, dtBegin, dtEnd, fmts, exclude_list)
logging.debug("file_list valid for timeframe: %r" % file_list)
filterlist = []
for val in args.filter:
filterlist.append(' '+val+' ')
outlist = []
for filename in file_list:
try:
between = False
lineNumber = 0
datesfound = 0
with open(filename) as infile:
try:
for line in infile:
lineNumber += 1
logging.debug("filename: %r line: %r" % (filename, line))
dtLine = getDT(line, fmts)
if dtLine is not None:
datesfound +=1
if dtLine >= dtBegin:
logging.debug("%r[%d] dtLine %r dtBegin %r begins" % (filename, lineNumber, dtLine, dtBegin))
between = True
if dtLine > dtEnd:
logging.debug("%r[%d] dtLine %r dtEnd %r ends" % (filename, lineNumber, dtLine, dtEnd))
between = False
break
if between:
if args.filter:
for val in filterlist:
if val in line:
outlist.append("%s[line %d]:%s" % (filename, lineNumber, line))
break
else:
outlist.append("%s[line %d]:%s" % (filename, lineNumber, line))
except UnicodeDecodeError:
pass
if datesfound == 0:
logging.error("%r doesn't contain any parsable timestamp" % (filename))
# except FileNotFoundError:
except IOError:
logging.error("%r couldn't be opened!" % (filename))
pass
with open(args.output, "w") as outfile:
for line in outlist:
outfile.writelines(line)
if __name__ == "__main__":
main(sys.argv[1:])
@frayos
Copy link
Author

frayos commented May 29, 2018

How-To for logfilter.py

Requirements: python-dateutil library installed (works with system python2 and python3)
Installation for system python2: "yum install python-dateutil"
Installation for other python versions: "python -m pip install python-dateutil"

The tool is parameterized via command line options.

'-i','--input' : Root folder for recursive log file scan (required)
'-o','--output' : Output file with extracted log lines (default: logfilter.txt)
'-x','--exclude' : File containing list of fies to be excluded (spares time, by default uses "logfilter.exclude" when exists)
'-a','--all' : Scan all available files, ignore file dates
'-f','--filter' : List of keywords to search for (case sensitive and separated by white space)
'-b','--begintime' : Begin timestamp, format %Y-%m-%d %H:%M:%S (default: current time - 1 hour. Warning: can be huge)
'-e','--endtime' : End timestamp format: %Y-%m-%d %H:%M:%S (default. current time)
'-l','--logfile' : In case you assume an error using the tool you can check the logfile (default: logfilter.log)
'-v','--verbose' : In case you assume an error, be very chatty in the logfile (Warning: contains nearly all logfile contents)

The most complex issue with identifying timestamps from log: various date formats used. It's lacking an all-purpose and fast parser that identifies all datetime formats regardless where its positioned in a string.

Here an example using it on Windows for cutting two small timeframes into two files (customer sent the archive for analyzing:
python C:\Users\gertro\Documents\SAS\WORK\saspython\logfilter.py -i log -o logextract_0850AM.txt -l logfilter.log -v -b "2018-02-22 08:49:00" -e "2018-02-22 08:53:00"
python C:\Users\gertro\Documents\SAS\WORK\saspython\logfilter.py -i log -o logextract_0830AM.txt -l logfilter.log -v -b "2018-02-22 08:29:00" -e "2018-02-22 08:31:00"

Here an execution live on the system:

sudo python /sas/bin/logfilter.py -i /opt/sas/viya/config/var/log -l /home/sas/backup/logfilter.log -o /home/sas/backup/viyalogs2.log -v -b "2018-02-12 04:19:00" -e "2018-02-12 04:21:00"

Caution: to read all logs you need root permissions.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment