Skip to content

Instantly share code, notes, and snippets.

@ghaering
Last active August 29, 2015 14:07
Show Gist options
  • Save ghaering/46dd6362172f526d8132 to your computer and use it in GitHub Desktop.
Save ghaering/46dd6362172f526d8132 to your computer and use it in GitHub Desktop.
Make sure a cache directory does not overflow the entire filesystem
# Keeps a cache directory from growing over a given size limit.
#
# There are two modes to manage the cache folder:
# 1) absolute size limit of the cache folder (--size flag);
# 2) keep percentage free space on filesystem (--percent_free flag)
import asyncore
from collections import OrderedDict
import os
import sys
import threading
import time
import pyinotify
import click
class LastUpdatedOrderedDict(OrderedDict):
"Store items in the order the keys were last added."
def __setitem__(self, key, value):
if key in self:
del self[key]
OrderedDict.__setitem__(self, key, value)
class CacheManagerMaxSize(object):
"""
Keep a cache folder within a size limit.
"""
def __init__(self, max_size):
self.max_size = max_size
# map file name to file size
self.cache = LastUpdatedOrderedDict()
# total size of all files in the cache
self.total_size = 0
def delete(self, entry):
# remove a file from the cache, also delete it from the filesystem.
# either step can fail if the file was deleted from a different process.
try:
os.unlink(entry)
except OSError:
pass
try:
self.total_size -= self.cache[entry]
del self.cache[entry]
except KeyError:
pass
def add(self, entry, size):
old_size = self.cache.get(entry, 0)
self.total_size -= old_size
self.cache[entry] = size
self.total_size += size
# free space if cache size is exceeded
if self.total_size > self.max_size:
for cache_key in self.cache.iterkeys():
self.delete(cache_key)
if self.total_size < self.max_size:
break
class CacheManagerPercentFree(object):
"""
Keep as many files in a cache folder that the file system it's on keeps
a certain percentage of free space available.
"""
def __init__(self, percent_free, folder):
self.percent_free = percent_free
self.folder = folder
self.cache = LastUpdatedOrderedDict()
def delete(self, entry):
# cf. CacheManagerMaxSize.delete
try:
os.unlink(entry)
except OSError:
pass
try:
del self.cache[entry]
except KeyError:
pass
def add(self, entry, size):
self.cache[entry] = size
# delete files until file system has required 'free' percentage
for cache_key in self.cache.iterkeys():
stat = os.statvfs(self.folder)
if float(stat.f_bavail) / stat.f_blocks > self.percent_free:
break
self.delete(cache_key)
class EventHandler(pyinotify.ProcessEvent):
def __init__(self, max_size=None, percent_free=None, folder=None):
if max_size is not None:
self.cache_manager = CacheManagerMaxSize(max_size)
else:
self.cache_manager = CacheManagerPercentFree(percent_free, folder)
def process_IN_DELETE(self, event):
self.cache_manager.delete(event.pathname)
def process_IN_CLOSE_NOWRITE(self, event):
self.update_cache(event.pathname)
def process_IN_CLOSE_WRITE(self, event):
self.update_cache(event.pathname)
def update_cache(self, path):
if not os.path.isfile(path):
return
try:
size = os.stat(path).st_size
except OSError:
return
self.cache_manager.add(path, size)
def quit_after(seconds):
time.sleep(seconds)
print "Exiting due to --exit-after flag"
os._exit(0)
@click.command()
@click.option("--folder", required=True, help="cache folder to watch")
@click.option("--size", help="max size to keep folder (e. g. 500M, 3G)")
@click.option("--percent_free", help="how much space to keep in <folder>'s filesystem")
@click.option("--exit_after", help="exit process after n seconds (and be restarted by upstart, etc.)")
def main(folder, size, percent_free, exit_after):
watch_manager = pyinotify.WatchManager()
if size:
size = size.upper()
if size[-1] == "M":
factor = 10**6
elif size[-1] == "G":
factor = 10**9
else:
factor = 1
size = size + "B"
size = int(size[:-1])
event_handler = EventHandler(max_size=size * factor)
else:
event_handler = EventHandler(percent_free=float(percent_free) / 100.0, folder=folder)
for dir_, dirs_, files in os.walk(folder):
for file in files:
path = os.path.join(dir_, file)
if os.path.isfile(path):
event_handler.update_cache(path)
notifier = pyinotify.AsyncNotifier(watch_manager, event_handler)
mask = pyinotify.IN_DELETE | pyinotify.IN_CLOSE_NOWRITE | pyinotify.IN_CLOSE_WRITE
watch_manager.add_watch(folder, mask, rec=True, auto_add=True)
if exit_after:
thread = threading.Thread(target=quit_after, args=(int(exit_after),))
thread.start()
# HACK: make it certain that we exit on KeyboardInterrupt etc.
sys.exit = os._exit
asyncore.loop()
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment