Created
May 3, 2015 18:32
-
-
Save seamustuohy/9f088b4b1386f5c102cd to your computer and use it in GitHub Desktop.
A file system monitor that SHOULD NOT BE USED. I just have it up here so I can fix it one day.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import sys | |
import time | |
import datetime | |
import subprocess | |
import csv | |
import tempfile | |
import os | |
from os.path import abspath, dirname | |
from watchdog.observers import Observer | |
from watchdog.events import FileSystemEventHandler | |
import argparse | |
#FIrst install watchdog | |
# sudo pip install watchdog | |
def write_base_info(path, baseline_logfile): | |
for root, directories, files in os.walk(path): | |
for directory_name in directories: | |
try: | |
directory_path = os.path.join(root, directory_name) | |
write_initial_size(directory_path, baseline_logfile) | |
except OSError: | |
continue | |
def get_size(path): | |
return subprocess.check_output(['du','-s', path]).split()[0].decode('utf-8') | |
def write_initial_size(path, logfile): | |
""" | |
headers: | |
folder | start_size | start_size | |
""" | |
path = abspath(dirname(path)) | |
size = get_size(path) | |
with open(logfile, 'a') as csvfile: | |
filewriter = csv.writer(csvfile) | |
filewriter.writerow([path, size, size]) | |
def write_size(path, logfile): | |
""" | |
headers: | |
folder | start_size | current_size | |
NOTE: All sized are recursive (they include the path below.) | |
NOTE#2: I **REALLY** hope you don't have any paths with commas in the names | |
""" | |
log_path = logfile | |
path = abspath(dirname(path)) | |
size = get_size(basename(path)) | |
csv_update = [] | |
with open(log_path, 'r') as csvfile: | |
filereader = csv.reader(csvfile) | |
for row in filereader: | |
if row[0] == path: | |
csv_update.append([path, row[1], size]) | |
else: | |
csv_update.append(row) | |
with open(log_path, 'w') as csvfile: | |
filewriter = csv.writer(csvfile) | |
filewriter.write(csv_update) | |
class LogHandler(FileSystemEventHandler): | |
def __init__(self, eventlog="~/temp/s2e_event.log", sizelog="~/temp/s2e_size.log", path="/"): | |
""" | |
eventlog (str): Is the place to write the logs of all events that occurred in csv format | |
sizelog (str): Is a place that tracks the size of the folders being watched | |
path (str): A directory to recursively watch. | |
""" | |
super(FileSystemEventHandler, self).__init__() | |
self._eventlog = eventlog | |
self._sizelog = sizelog | |
self._path = path | |
print("I was created") | |
def set_recurse(self): | |
"""https://github.com/howeyc/fsnotify/issues/56""" | |
for root, directories, files in path_walk(self._path, recursive): | |
for directory_name in directories: | |
try: | |
directory_path = os.path.join(root, directory_name) | |
stat_info = os.stat(directory_path) | |
self._stat_snapshot[directory_path] = stat_info | |
self._inode_to_path[stat_info.st_ino] = directory_path | |
walker_callback(directory_path, stat_info) | |
except OSError: | |
continue | |
def on_modified(self): | |
print("modified") | |
self.write_event("modified", event.src_path) | |
write_size(event.src_path, self._sizelog) | |
def on_created(self, event): | |
print("created") | |
self.write_event("created", event.src_path) | |
write_size(event.src_path, self._sizelog) | |
def on_deleted(self, event): | |
print("deleted") | |
self.write_event("deleted", event.src_path) | |
write_size(event.src_path, self._sizelog) | |
def write_event(self, event, path): | |
""" | |
headers: | |
time | event | file_path | |
""" | |
print("writing to event") | |
path = abspath(path) | |
with open(self._eventlog, 'a+') as csvfile: | |
filewriter = csv.writer(csvfile) | |
filewriter.write([datetime.datetime.now(), event, path]) | |
def find_problem(path, eventlog, sizelog): | |
write_base_info(path, sizelog) | |
observer = Observer() | |
config_handle = LogHandler(eventlog, sizelog, path) | |
observer.start() | |
print("I started") | |
try: | |
while True: | |
time.sleep(1) | |
except KeyboardInterrupt: | |
observer.stop() | |
observer.join() | |
def find_problem_child(path, event, size): | |
#_dirs_to_review limits the ammount of directories to monitor process' within. | |
_dirs_to_review = 25 | |
temp = tempfile.NamedTemporaryFile() | |
try: | |
write_base_info(path, temp.name) | |
review = [] | |
size_diff = {} | |
with open(temp.name, 'r') as new_baseline: | |
filereader = csv.reader(new_baseline) | |
size_diff[row[0]] = [row[1], rpw[2]] | |
with open(size, 'r') as updated_size: | |
filereader = csv.reader(updated_size) | |
for row in filereader: | |
# We are only checking the directories here | |
# we have event logs if you want to do more analysis | |
if row in size_diff: | |
#If the current (size_diff) is more than the original it is interesting | |
if row[1] < size_diff[row[0]][0]: | |
_difference = size_diff[row[0]][0]-row[1] | |
review.append(row[0], _difference) | |
finally: | |
# Automatically cleans up the file | |
temp.close() | |
ordered_by_size = sorted(review, key=lambda i: i[1]) | |
top = ordered_by_size[:_dirs_to_review] | |
process = {} | |
for pth in top: | |
#TOTAL HACK, May not even work on your device. | |
try: | |
desc = subprocess.check_output(['lsof','+d', '/home/s2e', '-F', 'c']) | |
except Exception as e: | |
output = e.output | |
lsof = output.split('\n') | |
currP = "" | |
for line in lsof: | |
if line in process: | |
process[line] = process[line]+1 | |
else: | |
process[line] = 1 | |
def parse_arguments(): | |
arg_p = argparse.ArgumentParser("Get a summary of some text") | |
arg_p.add_argument("-o", "--get-offender", type=str, help="Toggle to identify the offending process (Will take like 5-10 minutes)") | |
arg_p.add_argument("-d", "--base-directory", type=str, help="A directory that you want the script to monitor within (recursively)") | |
arg_p.add_argument("-e", "--event-log-file", help="A file where you want file events written to.") | |
arg_p.add_argument("-s", "--size-log-file", help="A file where you want size information written to.") | |
args = arg_p.parse_args() | |
return args | |
if __name__ == "__main__": | |
args = parse_arguments() | |
if args.get_offender: | |
find_problem_child(args.base_directory, args.event_log_file, args.size_log_file) | |
else: | |
find_problem(args.base_directory, args.event_log_file, args.size_log_file) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment