Skip to content

Instantly share code, notes, and snippets.

@Mic92
Last active July 16, 2023 10:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Mic92/401c49f80ed910fd295c470fd64228d0 to your computer and use it in GitHub Desktop.
Save Mic92/401c49f80ed910fd295c470fd64228d0 to your computer and use it in GitHub Desktop.
Evaluation benchmark for benchmark with different batchsizes
# Batch size: 1000
## Query all packages
elapsed: 24.68 s, heap size: 2610.18 MiB
## Evaluating packages in batches
elapsed: 15.04 s, heap size: 1217.18 MiB
elapsed: 18.31 s, heap size: 1377.18 MiB
elapsed: 9.13 s, heap size: 673.18 MiB
elapsed: 11.24 s, heap size: 769.18 MiB
elapsed: 16.69 s, heap size: 1169.18 MiB
elapsed: 3.50 s, heap size: 384.31 MiB
elapsed: 3.12 s, heap size: 384.31 MiB
elapsed: 3.40 s, heap size: 384.31 MiB
elapsed: 3.83 s, heap size: 384.31 MiB
elapsed: 3.29 s, heap size: 384.31 MiB
elapsed: 10.04 s, heap size: 672.68 MiB
elapsed: 13.84 s, heap size: 1121.18 MiB
elapsed: 8.33 s, heap size: 641.18 MiB
elapsed: 13.48 s, heap size: 881.18 MiB
elapsed: 10.16 s, heap size: 865.18 MiB
elapsed: 12.03 s, heap size: 913.18 MiB
elapsed: 12.38 s, heap size: 849.18 MiB
elapsed: 23.17 s, heap size: 481.18 MiB
elapsed: 15.50 s, heap size: 672.68 MiB
elapsed: 8.07 s, heap size: 432.43 MiB
elapsed: 6.32 s, heap size: 432.43 MiB
elapsed: 9.17 s, heap size: 465.18 MiB
elapsed: 18.98 s, heap size: 897.18 MiB
elapsed: 12.32 s, heap size: 865.18 MiB
elapsed: 11.39 s, heap size: 945.18 MiB
elapsed: 13.37 s, heap size: 753.18 MiB
elapsed: 11.20 s, heap size: 705.18 MiB
elapsed: 3.13 s, heap size: 384.25 MiB
elapsed: 3.06 s, heap size: 384.31 MiB
elapsed: 2.98 s, heap size: 384.31 MiB
elapsed: 16.03 s, heap size: 897.18 MiB
elapsed: 11.64 s, heap size: 801.18 MiB
elapsed: 7.56 s, heap size: 464.68 MiB
elapsed: 7.13 s, heap size: 528.68 MiB
elapsed: 6.10 s, heap size: 432.68 MiB
elapsed: 7.68 s, heap size: 624.68 MiB
elapsed: 17.42 s, heap size: 544.68 MiB
elapsed: 11.08 s, heap size: 496.68 MiB
elapsed: 6.15 s, heap size: 432.68 MiB
elapsed: 11.21 s, heap size: 608.68 MiB
elapsed: 7.94 s, heap size: 448.68 MiB
elapsed: 7.29 s, heap size: 592.68 MiB
elapsed: 5.28 s, heap size: 400.43 MiB
elapsed: 7.35 s, heap size: 592.68 MiB
elapsed: 6.56 s, heap size: 496.68 MiB
elapsed: 13.77 s, heap size: 1121.18 MiB
elapsed: 4.33 s, heap size: 384.25 MiB
elapsed: 11.03 s, heap size: 784.68 MiB
elapsed: 3.91 s, heap size: 384.25 MiB
elapsed: 2.30 s, heap size: 384.25 MiB
elapsed: 3.77 s, heap size: 384.25 MiB
elapsed: 2.28 s, heap size: 384.25 MiB
elapsed: 11.91 s, heap size: 1057.18 MiB
elapsed: 13.30 s, heap size: 929.18 MiB
elapsed: 15.09 s, heap size: 1121.18 MiB
elapsed: 9.03 s, heap size: 496.68 MiB
elapsed: 6.60 s, heap size: 512.68 MiB
elapsed: 18.44 s, heap size: 705.18 MiB
elapsed: 10.54 s, heap size: 849.18 MiB
Total elapsed: 9.92 min
# Batch size: 4000
## Query all packages
elapsed: 27.81 s, heap size: 2610.18 MiB
## Evaluating packages in batches
elapsed: 33.82 s, heap size: 2369.18 MiB
elapsed: 24.12 s, heap size: 1521.18 MiB
elapsed: 20.90 s, heap size: 1697.18 MiB
elapsed: 22.12 s, heap size: 1793.18 MiB
elapsed: 19.50 s, heap size: 1441.18 MiB
elapsed: 35.08 s, heap size: 1313.18 MiB
elapsed: 24.20 s, heap size: 1425.18 MiB
elapsed: 17.53 s, heap size: 1361.18 MiB
elapsed: 14.11 s, heap size: 849.18 MiB
elapsed: 17.97 s, heap size: 1041.18 MiB
elapsed: 12.73 s, heap size: 977.18 MiB
elapsed: 23.20 s, heap size: 1553.18 MiB
elapsed: 5.88 s, heap size: 400.43 MiB
elapsed: 29.77 s, heap size: 2017.18 MiB
elapsed: 20.60 s, heap size: 1265.18 MiB
Total elapsed: 5.83 min
# Batch size: 10000
## Query all packages
elapsed: 25.12 s, heap size: 2610.18 MiB
## Evaluating packages in batches
elapsed: 43.95 s, heap size: 3041.18 MiB
elapsed: 48.64 s, heap size: 2978.18 MiB
elapsed: 38.98 s, heap size: 2225.18 MiB
elapsed: 26.52 s, heap size: 1953.18 MiB
elapsed: 27.96 s, heap size: 2113.18 MiB
elapsed: 45.68 s, heap size: 2545.18 MiB
Total elapsed: 4.29 min
# Batch size: 40000
## Query all packages
elapsed: 23.34 s, heap size: 2594.18 MiB
## Evaluating packages in batches
elapsed: 117.78 s, heap size: 6932.18 MiB
elapsed: 54.00 s, heap size: 3842.18 MiB
Total elapsed: 3.26 min
# Batch size: 100000
## Query all packages
elapsed: 29.31 s, heap size: 2610.18 MiB
## Evaluating packages in batches
elapsed: 154.43 s, heap size: 9140.19 MiB
Total elapsed: 3.08 min
import json
import os
import subprocess
from dataclasses import dataclass
from pathlib import Path
from tempfile import TemporaryDirectory
from timeit import default_timer as timer
from typing import Any
def print_stats(stat_path: Path, time: float) -> None:
stats = json.loads(stat_path.read_text())
heap_size = stats["gc"]["heapSize"]
print(f"elapsed: {time:.2f} s, heap size: {heap_size / 1024 / 1024:.2f} MiB")
def batch(iterable, n=1):
l = len(iterable)
for ndx in range(0, l, n):
yield iterable[ndx : min(ndx + n, l)]
@dataclass
class Package:
name: str
path: str
def nix_env(file: str, extra_args: list[str], tmpdir: Path) -> dict[str, Any]:
env = os.environ.copy()
stats_path = tmpdir / "stats.json"
env["NIX_SHOW_STATS_PATH"] = str(stats_path)
env["NIX_SHOW_STATS"] = "1"
packages_path = tmpdir / "packages.json"
start = timer()
subprocess.run(
[
"nix-env",
"-f",
file,
"-qaP",
"--json",
"--show-trace",
]
+ extra_args,
check=True,
stdout=open(packages_path, "w"),
cwd=tmpdir,
env=env,
)
end = timer()
print_stats(stats_path, end - start)
return json.loads(packages_path.read_text())
def escape_attr(attr: str) -> str:
index = attr.rfind(".")
if index == -1:
return attr
return f'{attr[:index]}."{attr[index+1:]}"'
def eval_batch(packages: list[str], tmpdir: Path) -> dict[str, Any]:
subset_expr = tmpdir / "subset.nix"
quoted_subset = [f"pkgs.{escape_attr(p)}\n" for p in packages]
subset_expr.write_text(
f"""
with import <nixpkgs> {{}}; [
{ ''.join(quoted_subset) }
]"""
)
return nix_env(str(subset_expr), ["--out-path"], tmpdir)
def main():
batch_size = int(os.environ.get("BATCH_SIZE", 40000))
for batch_size in [1000, 4000, 10000, 40000, 100000]:
print(f"# Batch size: {batch_size}")
with TemporaryDirectory() as _tmpdir:
tmpdir = Path(_tmpdir)
start = timer()
print("## Query all packages")
packages = nix_env("<nixpkgs>", [], tmpdir)
print("## Evaluating packages in batches")
for subset in batch(list(packages.keys()), batch_size):
eval_batch(subset, tmpdir)
end = timer()
print(f"Total elapsed: {(end - start)/ 60:.2f} min")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment