Skip to content

Instantly share code, notes, and snippets.

@dipietrantonio
Last active November 23, 2022 08:19
Show Gist options
  • Save dipietrantonio/3e6ecbade785b7c2f000f45622a47ef8 to your computer and use it in GitHub Desktop.
Save dipietrantonio/3e6ecbade785b7c2f000f45622a47ef8 to your computer and use it in GitHub Desktop.
cf.py - recursively count the files within a directory tree, displaying directories with the highest file count.
#!/usr/bin/env python3
import os
import os.path
from contextlib import suppress
import argparse
def visit(path, state):
if os.path.isdir(path):
count, size = 0, 0
for entry in os.listdir(path):
with suppress(Exception):
entry_abspath = os.path.join(path, entry)
c_e, s_e = visit(entry_abspath, state)
count += c_e
size += s_e
state[path] = (count, size)
return (count, size)
else:
return (1, os.path.getsize(path))
def to_readable_size(n_bytes):
n_bytes = float(n_bytes)
unit = ['bytes', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB']
idx = 0
while n_bytes > 1000 and idx < len(unit) - 1:
n_bytes /= 1024.0
idx += 1
return f"{n_bytes:.2f} {unit[idx]}"
def print_report(state : dict, limit : int):
top = sorted(state.keys(), reverse=True, key=lambda x: state[x][0])[:limit]
print(f"\nTop {limit} directories by file count:")
for k in top:
c, s = state[k]
print(f"Directory '{k}' - count: {c}, size: {to_readable_size(s)}")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('path', type=str, default=os.getcwd(), help="Path to the directory to analyse.")
parser.add_argument('-l', '--limit', default=10, type=int, help="Limit the directories with the highest file count being displayed to a certain number.")
args = vars(parser.parse_args())
start = os.path.abspath(args['path'])
state = {}
visit(start, state)
count, size = state[start]
print(f"Root directory '{start}' contains {count} files and occupies a total of {to_readable_size(size)}")
print_report(state, args['limit'])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment