Skip to content

Instantly share code, notes, and snippets.

@gbin
Last active January 3, 2022 09:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save gbin/4998909 to your computer and use it in GitHub Desktop.
Save gbin/4998909 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
#
# This script is a tool that helps you calculate the potential benefits
# in occupied size on disk using the ext4 inlinedata new feature on
# Linux kernels 3.8.0+:
# "The new Inline Data Support feature allows Ext4 to store files that only consist of a few bytes together with the inode to save storage space and accelerate access"
# see http://www.h-online.com/open/features/What-s-new-in-Linux-3-8-1804240.html for details.
#
# Just run it on your ext4 mountpoints and it will tell give you the trade off
# for all your files depending on the inode size you choose.
#
# To get you current inode size you can do :
# $ tune2fs -l /dev/sda | grep Inode
# Inode count: 15040512
# Inodes per group: 8192
# Inode blocks per group: 512
# Inode size: 256
#
# You can set the inode size at creation time with :
# mkfs.ext4 -I inode-size /dev/...
#
import os
from sys import argv, stdout
def find_mount_point(path):
path = os.path.abspath(path)
while not os.path.ismount(path):
path = os.path.dirname(path)
return path
def compute_rough_file_count(path):
st = os.statvfs(path)
return st.f_files - st.f_ffree
def progress_bar(iteration, total, prefix='', suffix='', decimals=1, length=100, fill='█'):
percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total)))
filled_length = int(length * iteration // total)
bar = fill * filled_length + '-' * (length - filled_length)
stdout.write('\x1b[2K')
print(f'{prefix} |{bar}| {percent}% {suffix}', end='\r')
if iteration == total:
print()
def get_all_ext4_mountpoints():
ext4mountedpaths = []
other_mount_points = set()
with open('/etc/mtab', 'r') as f:
for line in f.readlines():
block_dev, mount_point, fs, opts, _, __ = line.split()
if fs == 'ext4':
ext4mountedpaths.append(mount_point)
else:
other_mount_points.add(mount_point)
return ext4mountedpaths, other_mount_points
TYPICAL_OCCUPIED_SPACE_FOR_INODE = 124
EXPLORE_INODES_SIZES = (256, 512, 1024, 2048, 4096)
def main():
if len(argv) != 2:
print('Syntax : ext4-inlinedata-calculator.py mountpoint')
exit(-1)
ext4_mount_points, other_mount_points = get_all_ext4_mountpoints()
if not ext4_mount_points:
print('No ext4 fs are mounted on your system.')
_, mount_point = argv
print(f'Exploring {mount_point} ...')
to_explore = compute_rough_file_count(mount_point)
print(f'Number of inodes to explore {to_explore:,} ...')
print()
print()
allfiles = {}
inode_count = 0
for root, subFolders, files in os.walk(mount_point):
inode_count += 1
if find_mount_point(root) in other_mount_points:
continue
for entry in files:
try:
filename = os.path.join(root, entry)
if find_mount_point(filename) not in ext4_mount_points:
continue # only consider files from a mounted ext4 filesystems
inode_count += 1
allfiles[filename] = os.path.getsize(filename) if os.path.isfile(
filename) else 0 # count as an empty file everything entry not beeing a real file
if not (inode_count % 10000):
progress_bar(inode_count, to_explore, prefix='Progress', suffix='Complete')
except OSError as e:
print(entry)
print(e)
continue # probably a systemfile
results = {inode_size: [0, 0] for inode_size in EXPLORE_INODES_SIZES}
total_occupied_space = 0
for _, filesize in allfiles.items():
total_occupied_space += filesize
for size in EXPLORE_INODES_SIZES:
if filesize <= size - TYPICAL_OCCUPIED_SPACE_FOR_INODE:
results[size][0] += 1
results[size][1] += size - TYPICAL_OCCUPIED_SPACE_FOR_INODE - filesize
else:
results[size][1] += TYPICAL_OCCUPIED_SPACE_FOR_INODE # the full empty space is wasted
if total_occupied_space == 0:
print('No relevant file found')
exit(-2)
print(f"""
== Results ==
Total file size {total_occupied_space:,}
Inode size\t\tfiles fit in\t\twasted inode space""")
total_nb = len(allfiles)
for size in EXPLORE_INODES_SIZES:
nb, wasted = results[size]
print(f'{size}\t\t\t{nb / float(total_nb):>7.2%}\t\t\t{float(wasted) / float(total_occupied_space):>15.8%}')
if __name__ == '__main__':
main()
@gbin
Copy link
Author

gbin commented Jan 28, 2020

I am uploading an updated version

@onlyjob
Copy link

onlyjob commented Jan 29, 2020

Thank you.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment