Skip to content

Instantly share code, notes, and snippets.

@martijnvermaat
Created August 19, 2011 12:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save martijnvermaat/1156657 to your computer and use it in GitHub Desktop.
Save martijnvermaat/1156657 to your computer and use it in GitHub Desktop.
Clean a directory (e.g. cache) by removing rarely used files.
#!/usr/bin/env python
"""
Clean a directory (e.g. cache) by removing rarely used files.
Usage:
./clean_directory directory [max_size]
The max_size argument is expected in megabytes. File removals are reported to
standard output and afterwards empty directories are (silently) removed.
Jeroen Laros <j.f.j.laros@lumc.nl>
Martijn Vermaat <m.vermaat.hg@lumc.nl>
This code is in the public domain; it can be used for whatever purpose with
absolutely no restrictions.
"""
import sys
import os
# Default maximum directory size (in megabytes).
DEFAULT_MAX_SIZE = 50
def directory_size(directory):
"""
Return the size of a directory in bytes.
@arg directory: Directory under scrutiny.
@type directory: string
@return: The total size of {directory} in bytes.
@rtype: int
"""
size = 0
for (path, dirs, files) in os.walk(directory):
for file in files:
size += os.path.getsize(os.path.join(path, file))
return size
def clean_directory(directory, max_size=DEFAULT_MAX_SIZE):
"""
Keep removing files until the total size of the directory is less than the
maximum size. Empty directories are removed.
@arg directory: Directory under scrutiny.
@type directory: string
@kwarg max_size: Maximum size of the directory in megabytes.
@type max_size: int
First, {directory} is checked for its size. If it exceeds {max_size}, the
'oldest' files are deleted. Note that accessing a file makes it 'new'.
"""
max_size = max_size * pow(2, 20)
if directory_size(directory) < max_size:
return
# Build a list of files sorted by access time.
entries = []
for (path, dirs, files) in os.walk(directory):
for file in files:
file_path = os.path.join(path, file)
entries.append( (os.stat(file_path).st_atime, file_path) )
entries.sort()
# Now start removing pairs of files until the size of the directory is
# small enough (or until the list is exhausted).
for entry in entries:
os.remove(entry[1])
print 'Removed: %s' % entry[1]
if directory_size(directory) < max_size:
break
# Remove any empty directories.
for (path, dirs, files) in os.walk(directory, topdown=False):
for dir in dirs:
os.rmdir(os.path.join(path, dir))
if __name__ == '__main__':
if not 1 < len(sys.argv) < 4:
print 'Usage:\n ./clean_directory directory [max_size]'
sys.exit(1)
try:
clean_directory(sys.argv[1], int(sys.argv[2]))
except IndexError:
clean_directory(sys.argv[1])
except ValueError:
print 'Second argument must be an integer'
sys.exit(1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment