Skip to content

Instantly share code, notes, and snippets.

@AdrienHorgnies
Last active April 12, 2019 11:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save AdrienHorgnies/04409408bf7dffd181405f305e93ca1e to your computer and use it in GitHub Desktop.
Save AdrienHorgnies/04409408bf7dffd181405f305e93ca1e to your computer and use it in GitHub Desktop.
compute directory size recursively. Motivated by shell limitation of 1000 arguments.
#!/usr/bin/env python3
import argparse
import logging
import math
import os
from os.path import dirname
def main(directories):
measures = {}
for directory in directories:
measures = {**measures, **ds(directory)}
for path in reversed(list(measures)):
log.info('{} contains {} files which amount to {}'.format(path, measures[path]['count'],
human_readable_size(measures[path]['size'])))
def ds(directory):
dir_tree = dict()
stack = []
for current, sub_dirs, files in os.walk(directory, topdown=False):
log.debug('working in {}'.format(current))
size = sum(os.stat(os.path.join(current, f)).st_size for f in files)
dir_tree[current] = {'size': size, 'count': len(files)}
log.debug('{} has {} files for {}'.format(current, dir_tree[current]['count'],
human_readable_size(dir_tree[current]['size'])))
if stack and stack[-1]['parent'] == current:
# feeding of child
children = stack.pop()
dir_tree[current]['size'] += children['size']
dir_tree[current]['count'] += children['count']
if stack and stack[-1]['parent'] == dirname(current):
# merging with brother
stack[-1]['size'] += dir_tree[current]['size']
stack[-1]['count'] += dir_tree[current]['count']
else:
stack.append(dict(**dir_tree[current], parent=dirname(current)))
return dir_tree
def human_readable_size(size):
if size == 0:
return '0 B'
unit_base = 1000 if args.commercial_base else 1024
units = ['B'] + [coef + 'B' if args.commercial_base else coef + 'iB' for coef in 'KMGTPEZY']
size_log = math.log(size, unit_base)
if size_log >= len(units):
return '{number:.3f} {unit}'.format(
number=size / unit_base ** (len(units) - 1),
unit=units[-1])
coef_idx = math.floor(size_log)
return '{number:.3f} {unit}'.format(
number=size / unit_base ** coef_idx,
unit=units[coef_idx])
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Find files recursively and compute size of each directory level')
parser.add_argument('directories', nargs='+', metavar='directory', help='a directory to search in')
# parser.add_argument('-e', '--extensions', nargs='+', metavar='extension', required=False,
# help='only count files with these extensions')
# parser.add_argument('-c', '--classify', default=False, action='store_true',
# help='Display size by file type')
parser.add_argument('--commercial-base', default=False, action='store_true',
help='By default, size is printed using coefficient with base 1024.'
' This option sets coefficient base to 1000')
parser.add_argument('--DEBUG', default=False, action='store_true', help='Display debug logs')
args = parser.parse_args()
log = logging.getLogger(__name__)
logging.basicConfig(level=logging.DEBUG if args.DEBUG else logging.INFO)
main(args.directories)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment