Skip to content

Instantly share code, notes, and snippets.

@jvhaarst
Last active April 6, 2022 02:55
Show Gist options
  • Save jvhaarst/f0b0af7c0400e2ae7d29 to your computer and use it in GitHub Desktop.
Save jvhaarst/f0b0af7c0400e2ae7d29 to your computer and use it in GitHub Desktop.
Simple script to print: human size of a directory tree, number of files in that tree, number of hard links in that tree, number of softlinks in that tree and number of directories.
#!/usr/bin/env python3
# Simple script to print:
# Size of a directory tree
# Number of files in that tree
# Number of hard links in that tree
# Number of softlinks in that tree
import os
import sys
import datetime
from os.path import normpath, basename
import operator
from itertools import starmap
# Make sure we do not "hang" on large directories
max_directory_entries = 100000
# http://stackoverflow.com/a/25613067/194447
def file_size(size):
from math import log2
_suffixes = ['bytes', 'KiB', 'MiB', 'GiB', 'TiB', 'EiB', 'ZiB']
order = int(log2(size) / 10) if size else 0
return '{:.4g}{}'.format(size / (1 << (order * 10)), _suffixes[order])
# Adapted from https://www.python.org/dev/peps/pep-0471/#notes-on-exception-handling
def get_tree_size(path):
"""Return total size of files in path and subdirs. If
is_dir() or stat() fails, print an error message to stderr
and assume zero size (for example, file has been deleted).
"""
size = 0
files = 0
symlinks = 0
hardlinks = 0
directories = 0
skipped_directories = 0
for entry in os.scandir(path):
try:
is_dir = entry.is_dir(follow_symlinks=False)
except OSError as error:
print('Error calling is_dir():', error, file=sys.stderr)
continue
if is_dir:
if entry.stat(follow_symlinks=False).st_size < max_directory_entries:
directories += 1
# http://stackoverflow.com/a/18132749/194447
size, files, symlinks, hardlinks, directories, skipped_directories = starmap(operator.iadd, zip((size, files, symlinks, hardlinks, directories, skipped_directories), get_tree_size(entry.path)))
else:
skipped_directories += 1
files += entry.stat(follow_symlinks=False).st_size
else:
try:
size += entry.stat(follow_symlinks=False).st_size
files += 1
if entry.is_symlink():
symlinks += 1
if entry.stat(follow_symlinks=False).st_nlink > 1:
hardlinks += 1
except OSError as error:
print('Error calling stat():', error, file=sys.stderr)
return (size, files, symlinks, hardlinks, directories, skipped_directories)
if __name__ == "__main__":
directory = sys.argv[1]
start_time = datetime.datetime.now()
size, files, symlinks, hardlinks, directories, skipped_directories = get_tree_size(directory)
directory = basename(normpath(directory))
size = file_size(size)
end_time = datetime.datetime.now()
difference = str(end_time - start_time)
print("{:s} : {:s} in {:d} files, with {:d} symlinks, {:d} hardlinks, {:d} directories and {:d} skipped directories (more than {:,} files) in {:s}".format(directory, size, files, symlinks, hardlinks, directories, skipped_directories, max_directory_entries, difference))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment