vkuzo/gist:fbe1968d2bbb79b3f6dd776309fbcffc Secret

## gistfile1.txt
# old

(pytorch2) [vasiliy@devgpu108.ash6 ~/local/pytorch/benchmarks/operator_benchmark] python -m pt.quantization_test
# ----------------------------------------
# PyTorch/Caffe2 Operator Micro-benchmarks
# ----------------------------------------
# Tag : short

# Benchmarking PyTorch: original_kernel
# Mode: Eager
# Name: original_kernel_N1_C3_H512_W512_nbits8_cpu
# Input: N: 1, C: 3, H: 512, W: 512, nbits: 8, device: cpu
Forward Execution Time (us) : 512.815

# Benchmarking PyTorch: original_kernel
# Mode: Eager
# Name: original_kernel_N1_C3_H512_W512_nbits8_cuda
# Input: N: 1, C: 3, H: 512, W: 512, nbits: 8, device: cuda
Forward Execution Time (us) : 98.896

# new

(pytorch2) [vasiliy@devgpu108.ash6 ~/local/pytorch/benchmarks/operator_benchmark] python -m pt.quantization_test
# ----------------------------------------
# PyTorch/Caffe2 Operator Micro-benchmarks
# ----------------------------------------
# Tag : short

# Benchmarking PyTorch: original_kernel
# Mode: Eager
# Name: original_kernel_N1_C3_H512_W512_nbits8_cpu
# Input: N: 1, C: 3, H: 512, W: 512, nbits: 8, device: cpu
Forward Execution Time (us) : 750.340

# Benchmarking PyTorch: original_kernel
# Mode: Eager
# Name: original_kernel_N1_C3_H512_W512_nbits8_cuda
# Input: N: 1, C: 3, H: 512, W: 512, nbits: 8, device: cuda
Forward Execution Time (us) : 128.428
	# old

	(pytorch2) [vasiliy@devgpu108.ash6 ~/local/pytorch/benchmarks/operator_benchmark] python -m pt.quantization_test
	# ----------------------------------------
	# PyTorch/Caffe2 Operator Micro-benchmarks
	# ----------------------------------------
	# Tag : short

	# Benchmarking PyTorch: original_kernel
	# Mode: Eager
	# Name: original_kernel_N1_C3_H512_W512_nbits8_cpu
	# Input: N: 1, C: 3, H: 512, W: 512, nbits: 8, device: cpu
	Forward Execution Time (us) : 512.815

	# Benchmarking PyTorch: original_kernel
	# Mode: Eager
	# Name: original_kernel_N1_C3_H512_W512_nbits8_cuda
	# Input: N: 1, C: 3, H: 512, W: 512, nbits: 8, device: cuda
	Forward Execution Time (us) : 98.896

	# new

	(pytorch2) [vasiliy@devgpu108.ash6 ~/local/pytorch/benchmarks/operator_benchmark] python -m pt.quantization_test
	# ----------------------------------------
	# PyTorch/Caffe2 Operator Micro-benchmarks
	# ----------------------------------------
	# Tag : short

	# Benchmarking PyTorch: original_kernel
	# Mode: Eager
	# Name: original_kernel_N1_C3_H512_W512_nbits8_cpu
	# Input: N: 1, C: 3, H: 512, W: 512, nbits: 8, device: cpu
	Forward Execution Time (us) : 750.340

	# Benchmarking PyTorch: original_kernel
	# Mode: Eager
	# Name: original_kernel_N1_C3_H512_W512_nbits8_cuda
	# Input: N: 1, C: 3, H: 512, W: 512, nbits: 8, device: cuda
	Forward Execution Time (us) : 128.428