Skip to content

Instantly share code, notes, and snippets.

@gbin
Last active January 3, 2022 09:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save gbin/4998909 to your computer and use it in GitHub Desktop.
Save gbin/4998909 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
#
# This script is a tool that helps you calculate the potential benefits
# in occupied size on disk using the ext4 inlinedata new feature on
# Linux kernels 3.8.0+:
# "The new Inline Data Support feature allows Ext4 to store files that only consist of a few bytes together with the inode to save storage space and accelerate access"
# see http://www.h-online.com/open/features/What-s-new-in-Linux-3-8-1804240.html for details.
#
# Just run it on your ext4 mountpoints and it will tell give you the trade off
# for all your files depending on the inode size you choose.
#
# To get you current inode size you can do :
# $ tune2fs -l /dev/sda | grep Inode
# Inode count: 15040512
# Inodes per group: 8192
# Inode blocks per group: 512
# Inode size: 256
#
# You can set the inode size at creation time with :
# mkfs.ext4 -I inode-size /dev/...
#
import os
from sys import argv, stdout
def find_mount_point(path):
path = os.path.abspath(path)
while not os.path.ismount(path):
path = os.path.dirname(path)
return path
def compute_rough_file_count(path):
st = os.statvfs(path)
return st.f_files - st.f_ffree
def progress_bar(iteration, total, prefix='', suffix='', decimals=1, length=100, fill='█'):
percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total)))
filled_length = int(length * iteration // total)
bar = fill * filled_length + '-' * (length - filled_length)
stdout.write('\x1b[2K')
print(f'{prefix} |{bar}| {percent}% {suffix}', end='\r')
if iteration == total:
print()
def get_all_ext4_mountpoints():
ext4mountedpaths = []
other_mount_points = set()
with open('/etc/mtab', 'r') as f:
for line in f.readlines():
block_dev, mount_point, fs, opts, _, __ = line.split()
if fs == 'ext4':
ext4mountedpaths.append(mount_point)
else:
other_mount_points.add(mount_point)
return ext4mountedpaths, other_mount_points
TYPICAL_OCCUPIED_SPACE_FOR_INODE = 124
EXPLORE_INODES_SIZES = (256, 512, 1024, 2048, 4096)
def main():
if len(argv) != 2:
print('Syntax : ext4-inlinedata-calculator.py mountpoint')
exit(-1)
ext4_mount_points, other_mount_points = get_all_ext4_mountpoints()
if not ext4_mount_points:
print('No ext4 fs are mounted on your system.')
_, mount_point = argv
print(f'Exploring {mount_point} ...')
to_explore = compute_rough_file_count(mount_point)
print(f'Number of inodes to explore {to_explore:,} ...')
print()
print()
allfiles = {}
inode_count = 0
for root, subFolders, files in os.walk(mount_point):
inode_count += 1
if find_mount_point(root) in other_mount_points:
continue
for entry in files:
try:
filename = os.path.join(root, entry)
if find_mount_point(filename) not in ext4_mount_points:
continue # only consider files from a mounted ext4 filesystems
inode_count += 1
allfiles[filename] = os.path.getsize(filename) if os.path.isfile(
filename) else 0 # count as an empty file everything entry not beeing a real file
if not (inode_count % 10000):
progress_bar(inode_count, to_explore, prefix='Progress', suffix='Complete')
except OSError as e:
print(entry)
print(e)
continue # probably a systemfile
results = {inode_size: [0, 0] for inode_size in EXPLORE_INODES_SIZES}
total_occupied_space = 0
for _, filesize in allfiles.items():
total_occupied_space += filesize
for size in EXPLORE_INODES_SIZES:
if filesize <= size - TYPICAL_OCCUPIED_SPACE_FOR_INODE:
results[size][0] += 1
results[size][1] += size - TYPICAL_OCCUPIED_SPACE_FOR_INODE - filesize
else:
results[size][1] += TYPICAL_OCCUPIED_SPACE_FOR_INODE # the full empty space is wasted
if total_occupied_space == 0:
print('No relevant file found')
exit(-2)
print(f"""
== Results ==
Total file size {total_occupied_space:,}
Inode size\t\tfiles fit in\t\twasted inode space""")
total_nb = len(allfiles)
for size in EXPLORE_INODES_SIZES:
nb, wasted = results[size]
print(f'{size}\t\t\t{nb / float(total_nb):>7.2%}\t\t\t{float(wasted) / float(total_occupied_space):>15.8%}')
if __name__ == '__main__':
main()
@onlyjob
Copy link

onlyjob commented Jan 28, 2020

It would be great to adjust the script to use the actual size of sparse files.

Also please convert the script to Python3. Thanks.

@gbin
Copy link
Author

gbin commented Jan 28, 2020

I am uploading an updated version

@onlyjob
Copy link

onlyjob commented Jan 29, 2020

Thank you.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment