Skip to content

Instantly share code, notes, and snippets.

@seamustuohy
Created May 3, 2015 18:32
Show Gist options
  • Save seamustuohy/9f088b4b1386f5c102cd to your computer and use it in GitHub Desktop.
Save seamustuohy/9f088b4b1386f5c102cd to your computer and use it in GitHub Desktop.
A file system monitor that SHOULD NOT BE USED. I just have it up here so I can fix it one day.
#!/usr/bin/env python
import sys
import time
import datetime
import subprocess
import csv
import tempfile
import os
from os.path import abspath, dirname
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
import argparse
#FIrst install watchdog
# sudo pip install watchdog
def write_base_info(path, baseline_logfile):
for root, directories, files in os.walk(path):
for directory_name in directories:
try:
directory_path = os.path.join(root, directory_name)
write_initial_size(directory_path, baseline_logfile)
except OSError:
continue
def get_size(path):
return subprocess.check_output(['du','-s', path]).split()[0].decode('utf-8')
def write_initial_size(path, logfile):
"""
headers:
folder | start_size | start_size
"""
path = abspath(dirname(path))
size = get_size(path)
with open(logfile, 'a') as csvfile:
filewriter = csv.writer(csvfile)
filewriter.writerow([path, size, size])
def write_size(path, logfile):
"""
headers:
folder | start_size | current_size
NOTE: All sized are recursive (they include the path below.)
NOTE#2: I **REALLY** hope you don't have any paths with commas in the names
"""
log_path = logfile
path = abspath(dirname(path))
size = get_size(basename(path))
csv_update = []
with open(log_path, 'r') as csvfile:
filereader = csv.reader(csvfile)
for row in filereader:
if row[0] == path:
csv_update.append([path, row[1], size])
else:
csv_update.append(row)
with open(log_path, 'w') as csvfile:
filewriter = csv.writer(csvfile)
filewriter.write(csv_update)
class LogHandler(FileSystemEventHandler):
def __init__(self, eventlog="~/temp/s2e_event.log", sizelog="~/temp/s2e_size.log", path="/"):
"""
eventlog (str): Is the place to write the logs of all events that occurred in csv format
sizelog (str): Is a place that tracks the size of the folders being watched
path (str): A directory to recursively watch.
"""
super(FileSystemEventHandler, self).__init__()
self._eventlog = eventlog
self._sizelog = sizelog
self._path = path
print("I was created")
def set_recurse(self):
"""https://github.com/howeyc/fsnotify/issues/56"""
for root, directories, files in path_walk(self._path, recursive):
for directory_name in directories:
try:
directory_path = os.path.join(root, directory_name)
stat_info = os.stat(directory_path)
self._stat_snapshot[directory_path] = stat_info
self._inode_to_path[stat_info.st_ino] = directory_path
walker_callback(directory_path, stat_info)
except OSError:
continue
def on_modified(self):
print("modified")
self.write_event("modified", event.src_path)
write_size(event.src_path, self._sizelog)
def on_created(self, event):
print("created")
self.write_event("created", event.src_path)
write_size(event.src_path, self._sizelog)
def on_deleted(self, event):
print("deleted")
self.write_event("deleted", event.src_path)
write_size(event.src_path, self._sizelog)
def write_event(self, event, path):
"""
headers:
time | event | file_path
"""
print("writing to event")
path = abspath(path)
with open(self._eventlog, 'a+') as csvfile:
filewriter = csv.writer(csvfile)
filewriter.write([datetime.datetime.now(), event, path])
def find_problem(path, eventlog, sizelog):
write_base_info(path, sizelog)
observer = Observer()
config_handle = LogHandler(eventlog, sizelog, path)
observer.start()
print("I started")
try:
while True:
time.sleep(1)
except KeyboardInterrupt:
observer.stop()
observer.join()
def find_problem_child(path, event, size):
#_dirs_to_review limits the ammount of directories to monitor process' within.
_dirs_to_review = 25
temp = tempfile.NamedTemporaryFile()
try:
write_base_info(path, temp.name)
review = []
size_diff = {}
with open(temp.name, 'r') as new_baseline:
filereader = csv.reader(new_baseline)
size_diff[row[0]] = [row[1], rpw[2]]
with open(size, 'r') as updated_size:
filereader = csv.reader(updated_size)
for row in filereader:
# We are only checking the directories here
# we have event logs if you want to do more analysis
if row in size_diff:
#If the current (size_diff) is more than the original it is interesting
if row[1] < size_diff[row[0]][0]:
_difference = size_diff[row[0]][0]-row[1]
review.append(row[0], _difference)
finally:
# Automatically cleans up the file
temp.close()
ordered_by_size = sorted(review, key=lambda i: i[1])
top = ordered_by_size[:_dirs_to_review]
process = {}
for pth in top:
#TOTAL HACK, May not even work on your device.
try:
desc = subprocess.check_output(['lsof','+d', '/home/s2e', '-F', 'c'])
except Exception as e:
output = e.output
lsof = output.split('\n')
currP = ""
for line in lsof:
if line in process:
process[line] = process[line]+1
else:
process[line] = 1
def parse_arguments():
arg_p = argparse.ArgumentParser("Get a summary of some text")
arg_p.add_argument("-o", "--get-offender", type=str, help="Toggle to identify the offending process (Will take like 5-10 minutes)")
arg_p.add_argument("-d", "--base-directory", type=str, help="A directory that you want the script to monitor within (recursively)")
arg_p.add_argument("-e", "--event-log-file", help="A file where you want file events written to.")
arg_p.add_argument("-s", "--size-log-file", help="A file where you want size information written to.")
args = arg_p.parse_args()
return args
if __name__ == "__main__":
args = parse_arguments()
if args.get_offender:
find_problem_child(args.base_directory, args.event_log_file, args.size_log_file)
else:
find_problem(args.base_directory, args.event_log_file, args.size_log_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment