Last active
November 23, 2022 08:19
-
-
Save dipietrantonio/3e6ecbade785b7c2f000f45622a47ef8 to your computer and use it in GitHub Desktop.
cf.py - recursively count the files within a directory tree, displaying directories with the highest file count.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import os | |
import os.path | |
from contextlib import suppress | |
import argparse | |
def visit(path, state): | |
if os.path.isdir(path): | |
count, size = 0, 0 | |
for entry in os.listdir(path): | |
with suppress(Exception): | |
entry_abspath = os.path.join(path, entry) | |
c_e, s_e = visit(entry_abspath, state) | |
count += c_e | |
size += s_e | |
state[path] = (count, size) | |
return (count, size) | |
else: | |
return (1, os.path.getsize(path)) | |
def to_readable_size(n_bytes): | |
n_bytes = float(n_bytes) | |
unit = ['bytes', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB'] | |
idx = 0 | |
while n_bytes > 1000 and idx < len(unit) - 1: | |
n_bytes /= 1024.0 | |
idx += 1 | |
return f"{n_bytes:.2f} {unit[idx]}" | |
def print_report(state : dict, limit : int): | |
top = sorted(state.keys(), reverse=True, key=lambda x: state[x][0])[:limit] | |
print(f"\nTop {limit} directories by file count:") | |
for k in top: | |
c, s = state[k] | |
print(f"Directory '{k}' - count: {c}, size: {to_readable_size(s)}") | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument('path', type=str, default=os.getcwd(), help="Path to the directory to analyse.") | |
parser.add_argument('-l', '--limit', default=10, type=int, help="Limit the directories with the highest file count being displayed to a certain number.") | |
args = vars(parser.parse_args()) | |
start = os.path.abspath(args['path']) | |
state = {} | |
visit(start, state) | |
count, size = state[start] | |
print(f"Root directory '{start}' contains {count} files and occupies a total of {to_readable_size(size)}") | |
print_report(state, args['limit']) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment