Skip to content

Instantly share code, notes, and snippets.

@nh2
Created June 3, 2020 12:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nh2/ed3650ff625f586d6011cbfbd89af7f5 to your computer and use it in GitHub Desktop.
Save nh2/ed3650ff625f586d6011cbfbd89af7f5 to your computer and use it in GitHub Desktop.
Small benchmark to deduplicate /nix/store into a bup repository
#! /usr/bin/env python3
import glob
import os
import subprocess
import sys
from itertools import zip_longest
# From https://stackoverflow.com/questions/434287/what-is-the-most-pythonic-way-to-iterate-over-a-list-in-chunks/434411#434411
def grouper(iterable, n):
args = [iter(iterable)] * n
# remove trailing `None`s
return ((x for x in it if x is not None) for it in zip_longest(*args))
store_paths = glob.glob('/nix/store/*')
# Pass this many paths to 1 `tar` invocation to reduce `bup` invocations.
num_batch_paths = 100
num_processed = 0
for group in grouper(store_paths, num_batch_paths):
print(num_processed)
paths = list(group)
for p in paths:
print(p)
command = "tar c {} | bup split -n nix-store".format(' '.join(paths))
env = os.environ
env["BUP_DIR"] = "nix-store-bup-dedup-test-tar"
subprocess.run(command, shell=True, env=env)
print()
num_processed += num_batch_paths
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment