robieta/bootcamp_benchmark.py

## bootcamp_benchmark.py
import argparse
import atexit
import multiprocessing
import multiprocessing.dummy
import os
import pickle
import shutil
import subprocess
import tempfile
import uuid

import torch
from torch.utils.benchmark import Measurement, Timer


# =============================================================================
# == PR specific code =========================================================
# =============================================================================
raise NotImplementedError(
    "Definitions for `COMMIT_0`, `COMMIT_1`, and `make_timer` should be "
    "copied from the PyTorch issue."
)


# These will be workflow specific
ROOT = os.path.join("/data", "users", os.getenv("USER"), "repos", "pytorch_worktrees")
ENV_0 = os.path.join(ROOT, f"tree-{COMMIT_0}-repo-env")
ENV_1 = os.path.join(ROOT, f"tree-{COMMIT_1}-repo-env")


def measure_main(result_file: str, **kwargs):
    timer = make_timer()
    counts = timer.collect_callgrind(number=100, collect_baseline=False)
    times = timer.blocked_autorange(min_run_time=5)
    with open(result_file, "wb") as f:
        pickle.dump([counts, times], f)


def map_fn(args):
    i, result_dir = args
    env = [ENV_0, ENV_1][i]
    result_file = os.path.join(result_dir, f"results_{i}_{uuid.uuid4()}.pkl")
    cmd = f"source activate {env} && python {os.path.abspath(__file__)} --mode measure --result_file {result_file}"
    subprocess.run(
        args=cmd,
        shell=True,
        executable="/bin/bash",
        check=True,
    )

    with open(result_file, "rb") as f:
        counts, times = pickle.load(f)

    return i, counts, times


def main(fast: bool, **kwargs):
    if os.getenv("CONDA_PREFIX") != ENV_1:
        raise ValueError(
            "This script should be called from ENV_1 "
            f"(`conda deactivate; source activate {ENV_1}`), got {os.getenv('CONDA_PREFIX')} instead."
        )

    result_dir = tempfile.mkdtemp()
    atexit.register(shutil.rmtree, path=result_dir)

    repeats = 2 if fast else 20
    num_workers = multiprocessing.cpu_count() // 2  # Don't overload the CPU, as this can distort results
    times = [[], []]
    counts = [[], []]
    map_args = [(0, result_dir), (1, result_dir)] * repeats
    with multiprocessing.dummy.Pool(num_workers) as pool:
        for j, (i, c, t) in enumerate(pool.imap(map_fn, map_args)):
            print(f"\r{j + 1} / {len(map_args)}", end="")
            times[i].append(t)
            counts[i].append(c)
    print()

    t0 = Measurement.merge(times[0])[0]
    t1 = Measurement.merge(times[1])[0]

    # Take the min as any interpreter jitter will increase from the baseline.
    c0 = min(counts[0], key=lambda x: x.counts(denoise=True))
    c1 = min(counts[1], key=lambda x: x.counts(denoise=True))

    torch.set_printoptions(linewidth=200)
    delta = (
        c1.as_standardized().stats().transform(lambda l: l.replace(f"{ENV_1}/", "")) -
        c0.as_standardized().stats().transform(lambda l: l.replace(f"{ENV_0}/", ""))
    ).denoise()

    print(t0, "\n")
    print(t1, "\n")
    print(c0.counts(denoise=True))
    print(c1.counts(denoise=True), "\n")
    print(delta)

    # Uncomment to debug:
    # import pdb
    # pdb.set_trace()


MODES = {
    "measure": measure_main,
    "main": main,
}


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--mode", choices=list(MODES.keys()), default="main")
    parser.add_argument("--fast", action="store_true")
    parser.add_argument("--result_file", type=str, default=None)

    args = parser.parse_args()
    MODES[args.mode](fast=args.fast, result_file=args.result_file)
	import argparse
	import atexit
	import multiprocessing
	import multiprocessing.dummy
	import os
	import pickle
	import shutil
	import subprocess
	import tempfile
	import uuid

	import torch
	from torch.utils.benchmark import Measurement, Timer


	# =============================================================================
	# == PR specific code =========================================================
	# =============================================================================
	raise NotImplementedError(
	"Definitions for `COMMIT_0`, `COMMIT_1`, and `make_timer` should be "
	"copied from the PyTorch issue."
	)


	# These will be workflow specific
	ROOT = os.path.join("/data", "users", os.getenv("USER"), "repos", "pytorch_worktrees")
	ENV_0 = os.path.join(ROOT, f"tree-{COMMIT_0}-repo-env")
	ENV_1 = os.path.join(ROOT, f"tree-{COMMIT_1}-repo-env")


	def measure_main(result_file: str, **kwargs):
	timer = make_timer()
	counts = timer.collect_callgrind(number=100, collect_baseline=False)
	times = timer.blocked_autorange(min_run_time=5)
	with open(result_file, "wb") as f:
	pickle.dump([counts, times], f)


	def map_fn(args):
	i, result_dir = args
	env = [ENV_0, ENV_1][i]
	result_file = os.path.join(result_dir, f"results_{i}_{uuid.uuid4()}.pkl")
	cmd = f"source activate {env} && python {os.path.abspath(__file__)} --mode measure --result_file {result_file}"
	subprocess.run(
	args=cmd,
	shell=True,
	executable="/bin/bash",
	check=True,
	)

	with open(result_file, "rb") as f:
	counts, times = pickle.load(f)

	return i, counts, times


	def main(fast: bool, **kwargs):
	if os.getenv("CONDA_PREFIX") != ENV_1:
	raise ValueError(
	"This script should be called from ENV_1 "
	f"(`conda deactivate; source activate {ENV_1}`), got {os.getenv('CONDA_PREFIX')} instead."
	)

	result_dir = tempfile.mkdtemp()
	atexit.register(shutil.rmtree, path=result_dir)

	repeats = 2 if fast else 20
	num_workers = multiprocessing.cpu_count() // 2 # Don't overload the CPU, as this can distort results
	times = [[], []]
	counts = [[], []]
	map_args = [(0, result_dir), (1, result_dir)] * repeats
	with multiprocessing.dummy.Pool(num_workers) as pool:
	for j, (i, c, t) in enumerate(pool.imap(map_fn, map_args)):
	print(f"\r{j + 1} / {len(map_args)}", end="")
	times[i].append(t)
	counts[i].append(c)
	print()

	t0 = Measurement.merge(times[0])[0]
	t1 = Measurement.merge(times[1])[0]

	# Take the min as any interpreter jitter will increase from the baseline.
	c0 = min(counts[0], key=lambda x: x.counts(denoise=True))
	c1 = min(counts[1], key=lambda x: x.counts(denoise=True))

	torch.set_printoptions(linewidth=200)
	delta = (
	c1.as_standardized().stats().transform(lambda l: l.replace(f"{ENV_1}/", "")) -
	c0.as_standardized().stats().transform(lambda l: l.replace(f"{ENV_0}/", ""))
	).denoise()

	print(t0, "\n")
	print(t1, "\n")
	print(c0.counts(denoise=True))
	print(c1.counts(denoise=True), "\n")
	print(delta)

	# Uncomment to debug:
	# import pdb
	# pdb.set_trace()


	MODES = {
	"measure": measure_main,
	"main": main,
	}


	if __name__ == "__main__":
	parser = argparse.ArgumentParser()
	parser.add_argument("--mode", choices=list(MODES.keys()), default="main")
	parser.add_argument("--fast", action="store_true")
	parser.add_argument("--result_file", type=str, default=None)

	args = parser.parse_args()
	MODES[args.mode](fast=args.fast, result_file=args.result_file)