Skip to content

Instantly share code, notes, and snippets.

@matt2000
Created September 14, 2015 09:09
Show Gist options
  • Save matt2000/8d26ac8ee505b017cf14 to your computer and use it in GitHub Desktop.
Save matt2000/8d26ac8ee505b017cf14 to your computer and use it in GitHub Desktop.
#!/usr/bin/python2
"""
Efficiently searches a log file for recent rows matching a Regular Expression
pattern. Assumes log file is appended in timestamp order.
"""
import os
import sys
from datetime import timedelta
from datetime import datetime
import re
import smtplib
from email.mime.text import MIMEText
from socket import gethostname
def readlines_reverse(filename):
"""
Read a file in reverse, loading bytes into memory only as needed.
"""
with open(filename) as f:
f.seek(0, os.SEEK_END)
position = f.tell()
line = ''
while position >= 0:
f.seek(position)
next_char = f.read(1)
# Skip 'blank' lines with only a line seperator.
if next_char == os.linesep and len(line) > 1:
yield line[::-1]
line = ''
else:
line += next_char
position -= 1
yield line[::-1]
def send_notification(to, pattern, max_count, minutes, filename):
text = 'The pattern "{}" was found more than {} times in the last {} minutes in the log file: {}'
msg = MIMEText(text.format(pattern, max_count, minutes, filename))
msg['Subject'] = 'Notification for log ' + filename
msg['From'] = 'no-reply@' + gethostname()
msg['To'] = to
# As an imporvement, the server to use could be moved to external config.
mailer = smtplib.SMTP('localhost')
mailer.sendmail(msg['From'], to, msg.as_string())
mailer.quit()
if __name__ == '__main__':
# Provide usage instructions if we didn't get enough arguments.
if len(sys.argv) < 6:
print 'Usage: python ' + os.path.basename(__file__) + ' filename.log email@domain pattern max_count past_minutes'
sys.exit()
# Readable args.
filename = sys.argv[1]
email = sys.argv[2]
pattern = sys.argv[3]
max_count = int(sys.argv[4])
mins = int(sys.argv[5])
delta = timedelta(minutes=mins)
count = 0
start_time = datetime.now()
for l in readlines_reverse(filename):
# Extract the date from the line with an imprecise RegEx.
# We could match the date format exactly, but that would be slower, so
# we'll trust the log file to use the correct date format and use a
# simpler pattern thats just enough to extract the timestamp.
timestamp_match = re.search('\[(.+) #', l)
if timestamp_match == None:
print "UH-oh. Log file doesn't have the date in the expected place."
sys.exit(1)
timestamp = timestamp_match.group(1)
# Parse the timestamp string into a date object that we can compare.
logdate = datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%S.%f')
# See if this is recent enough.
if logdate > start_time - delta:
# See if the pattern is in the line.
match = re.search(pattern, l)
if match:
count += 1
# Since we working through the file backward, if we've gone earlier
# than the target time, we can stop.
else:
print '{} matches found.'.format(count)
sys.exit()
# We don't need to wait for the loop to finish; we can notify and quit
# as soon as we cross the threshold.
if count > max_count:
print 'At least {} matches found. Sending notification.'.format(count)
send_notification(email, pattern, max_count, mins, filename)
sys.exit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment