Skip to content

Instantly share code, notes, and snippets.

Created May 3, 2015 18:32
Show Gist options
  • Save seamustuohy/9f088b4b1386f5c102cd to your computer and use it in GitHub Desktop.
Save seamustuohy/9f088b4b1386f5c102cd to your computer and use it in GitHub Desktop.
A file system monitor that SHOULD NOT BE USED. I just have it up here so I can fix it one day.
#!/usr/bin/env python
import sys
import time
import datetime
import subprocess
import csv
import tempfile
import os
from os.path import abspath, dirname
from watchdog.observers import Observer
from import FileSystemEventHandler
import argparse
#FIrst install watchdog
# sudo pip install watchdog
def write_base_info(path, baseline_logfile):
for root, directories, files in os.walk(path):
for directory_name in directories:
directory_path = os.path.join(root, directory_name)
write_initial_size(directory_path, baseline_logfile)
except OSError:
def get_size(path):
return subprocess.check_output(['du','-s', path]).split()[0].decode('utf-8')
def write_initial_size(path, logfile):
folder | start_size | start_size
path = abspath(dirname(path))
size = get_size(path)
with open(logfile, 'a') as csvfile:
filewriter = csv.writer(csvfile)
filewriter.writerow([path, size, size])
def write_size(path, logfile):
folder | start_size | current_size
NOTE: All sized are recursive (they include the path below.)
NOTE#2: I **REALLY** hope you don't have any paths with commas in the names
log_path = logfile
path = abspath(dirname(path))
size = get_size(basename(path))
csv_update = []
with open(log_path, 'r') as csvfile:
filereader = csv.reader(csvfile)
for row in filereader:
if row[0] == path:
csv_update.append([path, row[1], size])
with open(log_path, 'w') as csvfile:
filewriter = csv.writer(csvfile)
class LogHandler(FileSystemEventHandler):
def __init__(self, eventlog="~/temp/s2e_event.log", sizelog="~/temp/s2e_size.log", path="/"):
eventlog (str): Is the place to write the logs of all events that occurred in csv format
sizelog (str): Is a place that tracks the size of the folders being watched
path (str): A directory to recursively watch.
super(FileSystemEventHandler, self).__init__()
self._eventlog = eventlog
self._sizelog = sizelog
self._path = path
print("I was created")
def set_recurse(self):
for root, directories, files in path_walk(self._path, recursive):
for directory_name in directories:
directory_path = os.path.join(root, directory_name)
stat_info = os.stat(directory_path)
self._stat_snapshot[directory_path] = stat_info
self._inode_to_path[stat_info.st_ino] = directory_path
walker_callback(directory_path, stat_info)
except OSError:
def on_modified(self):
self.write_event("modified", event.src_path)
write_size(event.src_path, self._sizelog)
def on_created(self, event):
self.write_event("created", event.src_path)
write_size(event.src_path, self._sizelog)
def on_deleted(self, event):
self.write_event("deleted", event.src_path)
write_size(event.src_path, self._sizelog)
def write_event(self, event, path):
time | event | file_path
print("writing to event")
path = abspath(path)
with open(self._eventlog, 'a+') as csvfile:
filewriter = csv.writer(csvfile)
filewriter.write([, event, path])
def find_problem(path, eventlog, sizelog):
write_base_info(path, sizelog)
observer = Observer()
config_handle = LogHandler(eventlog, sizelog, path)
print("I started")
while True:
except KeyboardInterrupt:
def find_problem_child(path, event, size):
#_dirs_to_review limits the ammount of directories to monitor process' within.
_dirs_to_review = 25
temp = tempfile.NamedTemporaryFile()
review = []
size_diff = {}
with open(, 'r') as new_baseline:
filereader = csv.reader(new_baseline)
size_diff[row[0]] = [row[1], rpw[2]]
with open(size, 'r') as updated_size:
filereader = csv.reader(updated_size)
for row in filereader:
# We are only checking the directories here
# we have event logs if you want to do more analysis
if row in size_diff:
#If the current (size_diff) is more than the original it is interesting
if row[1] < size_diff[row[0]][0]:
_difference = size_diff[row[0]][0]-row[1]
review.append(row[0], _difference)
# Automatically cleans up the file
ordered_by_size = sorted(review, key=lambda i: i[1])
top = ordered_by_size[:_dirs_to_review]
process = {}
for pth in top:
#TOTAL HACK, May not even work on your device.
desc = subprocess.check_output(['lsof','+d', '/home/s2e', '-F', 'c'])
except Exception as e:
output = e.output
lsof = output.split('\n')
currP = ""
for line in lsof:
if line in process:
process[line] = process[line]+1
process[line] = 1
def parse_arguments():
arg_p = argparse.ArgumentParser("Get a summary of some text")
arg_p.add_argument("-o", "--get-offender", type=str, help="Toggle to identify the offending process (Will take like 5-10 minutes)")
arg_p.add_argument("-d", "--base-directory", type=str, help="A directory that you want the script to monitor within (recursively)")
arg_p.add_argument("-e", "--event-log-file", help="A file where you want file events written to.")
arg_p.add_argument("-s", "--size-log-file", help="A file where you want size information written to.")
args = arg_p.parse_args()
return args
if __name__ == "__main__":
args = parse_arguments()
if args.get_offender:
find_problem_child(args.base_directory, args.event_log_file, args.size_log_file)
find_problem(args.base_directory, args.event_log_file, args.size_log_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment