Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save JayDoubleu/ac363d214869bc7f76cfa054b2c88a2b to your computer and use it in GitHub Desktop.
Save JayDoubleu/ac363d214869bc7f76cfa054b2c88a2b to your computer and use it in GitHub Desktop.
"""
This python script will find flatpak deduplication size stats.
Of course, this checks the regular flatpak installation at /var/lib/flatpak .
If you want to use an alternative path, execute the program with said path.
Made with :heart: by powpingdone#3611, or just powpingdone on github.
Explaination for output:
'no dedupe': The size that the ostree repository would take up if files were not deduplicated.
'dedupe': The actual size of the ostree repository.
'deduplicated ratio': The ratio of space taken to how much space would be taken if deduplication wasn't in use.
'singlelet ratio': The ratio of files that are only shared between applications/runtimes once (ie, only used in one).
'orphan file ratio': The ratio of files that are somehow not even referenced by flatpak.
"""
from glob import iglob as glob
from os import path, scandir
from sys import argv, exit
PATH_ROOT = "/var/lib/flatpak"
if len(argv) > 1:
PATH_ROOT = " ".join(argv[1:])
if not path.exists(PATH_ROOT):
print(f"{PATH_ROOT} does not exist or is not able to be seen, exiting...")
exit(1)
if not (path.exists(PATH_ROOT + "/app") and path.exists(PATH_ROOT + "/runtime")):
print(f"{PATH_ROOT} does not point to a \"valid\" flatpak repo, exiting...")
exit(2)
not_deduped_size = 0
deduped_size = 0
singlelet_files = 0
orphan_files = 0
all_files = 0
inodes = {}
def collect_data(globbed):
global not_deduped_size, deduped_size, singlelet_files, orphan_files, all_files, inodes
for file in scandir(globbed):
if file.is_symlink():
continue
if file.is_dir():
collect_data(file.path)
continue
statout = file.stat(follow_symlinks=False)
not_deduped_size += (statout.st_nlink - 1 if statout.st_nlink > 1 else 1) * statout.st_size
singlelet_files += 1 if statout.st_nlink == 2 else 0
orphan_files += 1 if statout.st_nlink == 1 else 0
# deduped related stats
if statout.st_ino in inodes:
continue
inodes[statout.st_ino] = None
all_files += 1
deduped_size += statout.st_size
for app in glob(PATH_ROOT + "/app/*"):
print(f"collecting app {app.split('/')[-1]}")
collect_data(app)
for runtime in glob(PATH_ROOT + "/runtime/*"):
print(f"collecting runtime {runtime.split('/')[-1]}")
collect_data(runtime)
def to_human_readable_size(num):
for suffix in ["B", "KB", "MB", "GB"]:
if abs(num) < 1024:
return f"{num:.1f} {suffix}"
num /= 1024
return f"{num:.1f} TB"
print(f"no dedupe: {to_human_readable_size(not_deduped_size)} ({not_deduped_size} B)")
print(f"dedupe: {to_human_readable_size(deduped_size)} ({deduped_size} B)")
print(f"deduplicated ratio: {100*(deduped_size/not_deduped_size):0.2f}")
print(f"singlelet file ratio: {100*(singlelet_files/all_files):0.2f}")
print(f"orphan file ratio: {100*(orphan_files/all_files):0.2f}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment