ilia-cher/gist:09daa9876082af615bb35b71683bf46e

## gistfile1.txt
commit edc815cb94e4a1cc501cda87c6e05a73137e4593 (HEAD -> extra_sampling_2, origin/gh/ilia-cher/89/orig)
Author: ilia-cher <iliacher@fb.com>
Date:   Wed Dec 9 14:34:44 2020 -0800


(pytorch) iliacher@devgpu083:~/local/pytorch  (extra_sampling_2)$ python
Python 3.8.5 (default, Sep  4 2020, 07:30:14)
[GCC 7.3.0] :: Anaconda, Inc. on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> from torch.utils.benchmark import Timer
No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda'
>>> timer = Timer("torch::empty({0});", language="c++")
>>> print(timer.blocked_autorange(min_run_time=5))
<torch.utils.benchmark.utils.common.Measurement object at 0x7fe72452af70>
torch::empty({0});
  Median: 293.38 ns
  IQR:    12.15 ns (287.75 to 299.90)
  170 measurements, 100000 runs per measurement, 1 thread
>>> print(timer.blocked_autorange(min_run_time=5))
<torch.utils.benchmark.utils.common.Measurement object at 0x7fe833367130>
torch::empty({0});
  Median: 298.61 ns
  IQR:    12.08 ns (291.88 to 303.96)
  169 measurements, 100000 runs per measurement, 1 thread
>>> print(timer.blocked_autorange(min_run_time=5))
<torch.utils.benchmark.utils.common.Measurement object at 0x7fe8332b50d0>
torch::empty({0});
  Median: 304.55 ns
  IQR:    11.57 ns (297.46 to 309.03)
  164 measurements, 100000 runs per measurement, 1 thread
>>> print(timer.blocked_autorange(min_run_time=5))
<torch.utils.benchmark.utils.common.Measurement object at 0x7fe72452aa60>
torch::empty({0});
  Median: 300.98 ns
  IQR:    13.08 ns (293.52 to 306.59)
  167 measurements, 100000 runs per measurement, 1 thread


>>> print(timer.collect_callgrind(number=10_000))
<torch.utils.benchmark.utils.valgrind_wrapper.timer_interface.CallgrindStats object at 0x7fe72452afd0>
torch::empty({0});
                           All          Noisy symbols removed
    Instructions:     18420016                   18420016
    Baseline:                0                          0
10000 runs per measurement, 1 thread
Warning: PyTorch was not built with debug symbols.
         Source information may be limited. Rebuild with
         REL_WITH_DEB_INFO=1 for more detailed results.
>>> print(timer.collect_callgrind(number=10_000))
<torch.utils.benchmark.utils.valgrind_wrapper.timer_interface.CallgrindStats object at 0x7fe72452afd0>
torch::empty({0});
                           All          Noisy symbols removed
    Instructions:     18420016                   18420016
    Baseline:                0                          0
10000 runs per measurement, 1 thread
Warning: PyTorch was not built with debug symbols.
         Source information may be limited. Rebuild with
         REL_WITH_DEB_INFO=1 for more detailed results.


commit fc0a3a1787ce3fcc7846665ce12805b006c18231 (HEAD, origin/gh/ilia-cher/89/base, origin/gh/eellison/130/base)
Author: Peter Bell <peterbell10@live.co.uk>
Date:   Wed Dec 9 12:36:34 2020 -0800


(pytorch) iliacher@devgpu083:~/local/pytorch  (fc0a3a17)$ python
Python 3.8.5 (default, Sep  4 2020, 07:30:14)
[GCC 7.3.0] :: Anaconda, Inc. on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> from torch.utils.benchmark import Timer
No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda'
>>> timer = Timer("torch::empty({0});", language="c++")
>>> print(timer.blocked_autorange(min_run_time=5))
<torch.utils.benchmark.utils.common.Measurement object at 0x7f557f5e9f70>
torch::empty({0});
  Median: 358.28 ns
  IQR:    7.85 ns (355.62 to 363.47)
  140 measurements, 100000 runs per measurement, 1 thread
>>> print(timer.blocked_autorange(min_run_time=5))
<torch.utils.benchmark.utils.common.Measurement object at 0x7f568e1d8130>
torch::empty({0});
  Median: 352.08 ns
  IQR:    9.81 ns (346.99 to 356.81)
  143 measurements, 100000 runs per measurement, 1 thread
>>> print(timer.blocked_autorange(min_run_time=5))
<torch.utils.benchmark.utils.common.Measurement object at 0x7f568e126280>
torch::empty({0});
  Median: 369.64 ns
  IQR:    11.82 ns (362.66 to 374.48)
  135 measurements, 100000 runs per measurement, 1 thread
>>> print(timer.blocked_autorange(min_run_time=5))
<torch.utils.benchmark.utils.common.Measurement object at 0x7f557f5e9310>
torch::empty({0});
  Median: 355.79 ns
  IQR:    12.84 ns (348.67 to 361.51)
  141 measurements, 100000 runs per measurement, 1 thread


>>> print(timer.collect_callgrind(number=10_000))
<torch.utils.benchmark.utils.valgrind_wrapper.timer_interface.CallgrindStats object at 0x7f568e1261c0>
torch::empty({0});
                           All          Noisy symbols removed
    Instructions:     18350016                   18350016
    Baseline:                0                          0
10000 runs per measurement, 1 thread
Warning: PyTorch was not built with debug symbols.
         Source information may be limited. Rebuild with
         REL_WITH_DEB_INFO=1 for more detailed results.
>>> print(timer.collect_callgrind(number=10_000))
<torch.utils.benchmark.utils.valgrind_wrapper.timer_interface.CallgrindStats object at 0x7f568e1261c0>
torch::empty({0});
                           All          Noisy symbols removed
    Instructions:     18350016                   18350016
    Baseline:                0                          0
10000 runs per measurement, 1 thread
Warning: PyTorch was not built with debug symbols.
         Source information may be limited. Rebuild with
         REL_WITH_DEB_INFO=1 for more detailed results.
	commit edc815cb94e4a1cc501cda87c6e05a73137e4593 (HEAD -> extra_sampling_2, origin/gh/ilia-cher/89/orig)
	Author: ilia-cher <iliacher@fb.com>
	Date: Wed Dec 9 14:34:44 2020 -0800



	(pytorch) iliacher@devgpu083:~/local/pytorch (extra_sampling_2)$ python
	Python 3.8.5 (default, Sep 4 2020, 07:30:14)
	[GCC 7.3.0] :: Anaconda, Inc. on linux
	Type "help", "copyright", "credits" or "license" for more information.
	>>> from torch.utils.benchmark import Timer
	No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda'
	>>> timer = Timer("torch::empty({0});", language="c++")
	>>> print(timer.blocked_autorange(min_run_time=5))
	<torch.utils.benchmark.utils.common.Measurement object at 0x7fe72452af70>
	torch::empty({0});
	Median: 293.38 ns
	IQR: 12.15 ns (287.75 to 299.90)
	170 measurements, 100000 runs per measurement, 1 thread
	>>> print(timer.blocked_autorange(min_run_time=5))
	<torch.utils.benchmark.utils.common.Measurement object at 0x7fe833367130>
	torch::empty({0});
	Median: 298.61 ns
	IQR: 12.08 ns (291.88 to 303.96)
	169 measurements, 100000 runs per measurement, 1 thread
	>>> print(timer.blocked_autorange(min_run_time=5))
	<torch.utils.benchmark.utils.common.Measurement object at 0x7fe8332b50d0>
	torch::empty({0});
	Median: 304.55 ns
	IQR: 11.57 ns (297.46 to 309.03)
	164 measurements, 100000 runs per measurement, 1 thread
	>>> print(timer.blocked_autorange(min_run_time=5))
	<torch.utils.benchmark.utils.common.Measurement object at 0x7fe72452aa60>
	torch::empty({0});
	Median: 300.98 ns
	IQR: 13.08 ns (293.52 to 306.59)
	167 measurements, 100000 runs per measurement, 1 thread


	>>> print(timer.collect_callgrind(number=10_000))
	<torch.utils.benchmark.utils.valgrind_wrapper.timer_interface.CallgrindStats object at 0x7fe72452afd0>
	torch::empty({0});
	All Noisy symbols removed
	Instructions: 18420016 18420016
	Baseline: 0 0
	10000 runs per measurement, 1 thread
	Warning: PyTorch was not built with debug symbols.
	Source information may be limited. Rebuild with
	REL_WITH_DEB_INFO=1 for more detailed results.
	>>> print(timer.collect_callgrind(number=10_000))
	<torch.utils.benchmark.utils.valgrind_wrapper.timer_interface.CallgrindStats object at 0x7fe72452afd0>
	torch::empty({0});
	All Noisy symbols removed
	Instructions: 18420016 18420016
	Baseline: 0 0
	10000 runs per measurement, 1 thread
	Warning: PyTorch was not built with debug symbols.
	Source information may be limited. Rebuild with
	REL_WITH_DEB_INFO=1 for more detailed results.





	commit fc0a3a1787ce3fcc7846665ce12805b006c18231 (HEAD, origin/gh/ilia-cher/89/base, origin/gh/eellison/130/base)
	Author: Peter Bell <peterbell10@live.co.uk>
	Date: Wed Dec 9 12:36:34 2020 -0800



	(pytorch) iliacher@devgpu083:~/local/pytorch (fc0a3a17)$ python
	Python 3.8.5 (default, Sep 4 2020, 07:30:14)
	[GCC 7.3.0] :: Anaconda, Inc. on linux
	Type "help", "copyright", "credits" or "license" for more information.
	>>> from torch.utils.benchmark import Timer
	No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda'
	>>> timer = Timer("torch::empty({0});", language="c++")
	>>> print(timer.blocked_autorange(min_run_time=5))
	<torch.utils.benchmark.utils.common.Measurement object at 0x7f557f5e9f70>
	torch::empty({0});
	Median: 358.28 ns
	IQR: 7.85 ns (355.62 to 363.47)
	140 measurements, 100000 runs per measurement, 1 thread
	>>> print(timer.blocked_autorange(min_run_time=5))
	<torch.utils.benchmark.utils.common.Measurement object at 0x7f568e1d8130>
	torch::empty({0});
	Median: 352.08 ns
	IQR: 9.81 ns (346.99 to 356.81)
	143 measurements, 100000 runs per measurement, 1 thread
	>>> print(timer.blocked_autorange(min_run_time=5))
	<torch.utils.benchmark.utils.common.Measurement object at 0x7f568e126280>
	torch::empty({0});
	Median: 369.64 ns
	IQR: 11.82 ns (362.66 to 374.48)
	135 measurements, 100000 runs per measurement, 1 thread
	>>> print(timer.blocked_autorange(min_run_time=5))
	<torch.utils.benchmark.utils.common.Measurement object at 0x7f557f5e9310>
	torch::empty({0});
	Median: 355.79 ns
	IQR: 12.84 ns (348.67 to 361.51)
	141 measurements, 100000 runs per measurement, 1 thread


	>>> print(timer.collect_callgrind(number=10_000))
	<torch.utils.benchmark.utils.valgrind_wrapper.timer_interface.CallgrindStats object at 0x7f568e1261c0>
	torch::empty({0});
	All Noisy symbols removed
	Instructions: 18350016 18350016
	Baseline: 0 0
	10000 runs per measurement, 1 thread
	Warning: PyTorch was not built with debug symbols.
	Source information may be limited. Rebuild with
	REL_WITH_DEB_INFO=1 for more detailed results.
	>>> print(timer.collect_callgrind(number=10_000))
	<torch.utils.benchmark.utils.valgrind_wrapper.timer_interface.CallgrindStats object at 0x7f568e1261c0>
	torch::empty({0});
	All Noisy symbols removed
	Instructions: 18350016 18350016
	Baseline: 0 0
	10000 runs per measurement, 1 thread
	Warning: PyTorch was not built with debug symbols.
	Source information may be limited. Rebuild with
	REL_WITH_DEB_INFO=1 for more detailed results.