Last active
September 14, 2015 09:11
-
-
Save matt2000/32997e9e96fe0d46dcbc to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python2 | |
""" | |
Efficiently searches a log file for recent rows matching a Regular Expression | |
pattern. Assumes log file is appended in timestamp order. | |
""" | |
import os | |
import sys | |
from datetime import timedelta | |
from datetime import datetime | |
import re | |
import smtplib | |
from email.mime.text import MIMEText | |
from socket import gethostname | |
def readlines_reverse(filename): | |
""" | |
Read a file in reverse, loading bytes into memory only as needed. | |
""" | |
with open(filename) as f: | |
f.seek(0, os.SEEK_END) | |
position = f.tell() | |
line = '' | |
while position >= 0: | |
f.seek(position) | |
next_char = f.read(1) | |
# Skip 'blank' lines with only a line seperator. | |
if next_char == os.linesep and len(line) > 1: | |
yield line[::-1] | |
line = '' | |
else: | |
line += next_char | |
position -= 1 | |
yield line[::-1] | |
def send_notification(to, pattern, max_count, minutes, filename): | |
text = 'The pattern "{}" was found more than {} times in the last {} minutes in the log file: {}' | |
msg = MIMEText(text.format(pattern, max_count, minutes, filename)) | |
msg['Subject'] = 'Notification for log ' + filename | |
msg['From'] = 'no-reply@' + gethostname() | |
msg['To'] = to | |
# As an imporvement, the server to use could be moved to external config. | |
mailer = smtplib.SMTP('localhost') | |
mailer.sendmail(msg['From'], to, msg.as_string()) | |
mailer.quit() | |
if __name__ == '__main__': | |
# Provide usage instructions if we didn't get enough arguments. | |
if len(sys.argv) < 6: | |
print 'Usage: python ' + os.path.basename(__file__) + ' filename.log email@domain pattern max_count past_minutes' | |
sys.exit() | |
# Readable args. | |
filename = sys.argv[1] | |
email = sys.argv[2] | |
pattern = sys.argv[3] | |
max_count = int(sys.argv[4]) | |
mins = int(sys.argv[5]) | |
delta = timedelta(minutes=mins) | |
count = 0 | |
start_time = datetime.now() | |
for l in readlines_reverse(filename): | |
# Extract the date from the line with an imprecise RegEx. | |
# We could match the date format exactly, but that would be slower, so | |
# we'll trust the log file to use the correct date format and use a | |
# simpler pattern thats just enough to extract the timestamp. | |
timestamp_match = re.search('\[(.+) #', l) | |
if timestamp_match == None: | |
print "UH-oh. Log file doesn't have the date in the expected place." | |
sys.exit(1) | |
timestamp = timestamp_match.group(1) | |
# Parse the timestamp string into a date object that we can compare. | |
logdate = datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%S.%f') | |
# See if this is recent enough. | |
if logdate > start_time - delta: | |
# See if the pattern is in the line. | |
match = re.search(pattern, l) | |
if match: | |
count += 1 | |
# Since we working through the file backward, if we've gone earlier | |
# than the target time, we can stop. | |
else: | |
print '{} matches found.'.format(count) | |
sys.exit() | |
# We don't need to wait for the loop to finish; we can notify and quit | |
# as soon as we cross the threshold. | |
if count > max_count: | |
print 'At least {} matches found. Sending notification.'.format(count) | |
send_notification(email, pattern, max_count, mins, filename) | |
sys.exit() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment