Created
August 19, 2011 12:00
-
-
Save martijnvermaat/1156657 to your computer and use it in GitHub Desktop.
Clean a directory (e.g. cache) by removing rarely used files.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Clean a directory (e.g. cache) by removing rarely used files. | |
Usage: | |
./clean_directory directory [max_size] | |
The max_size argument is expected in megabytes. File removals are reported to | |
standard output and afterwards empty directories are (silently) removed. | |
Jeroen Laros <j.f.j.laros@lumc.nl> | |
Martijn Vermaat <m.vermaat.hg@lumc.nl> | |
This code is in the public domain; it can be used for whatever purpose with | |
absolutely no restrictions. | |
""" | |
import sys | |
import os | |
# Default maximum directory size (in megabytes). | |
DEFAULT_MAX_SIZE = 50 | |
def directory_size(directory): | |
""" | |
Return the size of a directory in bytes. | |
@arg directory: Directory under scrutiny. | |
@type directory: string | |
@return: The total size of {directory} in bytes. | |
@rtype: int | |
""" | |
size = 0 | |
for (path, dirs, files) in os.walk(directory): | |
for file in files: | |
size += os.path.getsize(os.path.join(path, file)) | |
return size | |
def clean_directory(directory, max_size=DEFAULT_MAX_SIZE): | |
""" | |
Keep removing files until the total size of the directory is less than the | |
maximum size. Empty directories are removed. | |
@arg directory: Directory under scrutiny. | |
@type directory: string | |
@kwarg max_size: Maximum size of the directory in megabytes. | |
@type max_size: int | |
First, {directory} is checked for its size. If it exceeds {max_size}, the | |
'oldest' files are deleted. Note that accessing a file makes it 'new'. | |
""" | |
max_size = max_size * pow(2, 20) | |
if directory_size(directory) < max_size: | |
return | |
# Build a list of files sorted by access time. | |
entries = [] | |
for (path, dirs, files) in os.walk(directory): | |
for file in files: | |
file_path = os.path.join(path, file) | |
entries.append( (os.stat(file_path).st_atime, file_path) ) | |
entries.sort() | |
# Now start removing pairs of files until the size of the directory is | |
# small enough (or until the list is exhausted). | |
for entry in entries: | |
os.remove(entry[1]) | |
print 'Removed: %s' % entry[1] | |
if directory_size(directory) < max_size: | |
break | |
# Remove any empty directories. | |
for (path, dirs, files) in os.walk(directory, topdown=False): | |
for dir in dirs: | |
os.rmdir(os.path.join(path, dir)) | |
if __name__ == '__main__': | |
if not 1 < len(sys.argv) < 4: | |
print 'Usage:\n ./clean_directory directory [max_size]' | |
sys.exit(1) | |
try: | |
clean_directory(sys.argv[1], int(sys.argv[2])) | |
except IndexError: | |
clean_directory(sys.argv[1]) | |
except ValueError: | |
print 'Second argument must be an integer' | |
sys.exit(1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment