Skip to content

Instantly share code, notes, and snippets.

@vfdev-5
Last active October 25, 2021 09:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vfdev-5/229b0a3a7f392d46cae40502e8139fc0 to your computer and use it in GitHub Desktop.
Save vfdev-5/229b0a3a7f392d46cae40502e8139fc0 to your computer and use it in GitHub Desktop.
Benchmark torchvision transforms on PIL vs Tensor
Torch config: PyTorch built with:
- GCC 7.3
- C++ Version: 201402
- Intel(R) oneAPI Math Kernel Library Version 2021.3-Product Build 20210617 for Intel(R) 64 architecture applications
- Intel(R) MKL-DNN v2.2.3 (Git Hash 7336ca9f055cf1bfa13efb658fe15dc9b41f0740)
- OpenMP 201511 (a.k.a. OpenMP 4.5)
- LAPACK is enabled (usually provided by MKL)
- NNPACK is enabled
- CPU capability usage: AVX2
- CUDA Runtime 11.1
- NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_37,code=compute_37
- CuDNN 8.0.5
- Magma 2.5.2
- Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.1, CUDNN_VERSION=8.0.5, CXX_COMPILER=/opt/rh/devtoolset-7/root/usr/bin/c++, CXX_FLAGS= -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -DEDGE_PROFILER_USE_KINETO -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.11.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF,
Num threads: 1
Torch version: 1.11.0.dev20211013
Torchvision version: 0.12.0a0
PIL version: 8.3.2
[----------------------------- Resize -----------------------------]
| Transform on PIL | Transform on Tensor
1 threads: ---------------------------------------------------------
torch.uint8 / RGB | 1752.1 | 1839.4
torch.float32 / F | 902.9 | 887.2
Times are in microseconds (us).
[---------------------- RandomHorizontalFlip ----------------------]
| Transform on PIL | Transform on Tensor
1 threads: ---------------------------------------------------------
torch.uint8 / RGB | 62.9 | 390.2
torch.float32 / F | 63.1 | 54.2
Times are in microseconds (us).
[----------------------- RandomResizedCrop ------------------------]
| Transform on PIL | Transform on Tensor
1 threads: ---------------------------------------------------------
torch.uint8 / RGB | 1092.5 | 1396.3
torch.float32 / F | 606.9 | 362.1
Times are in microseconds (us).
[-------------------------- RandAugment ---------------------------]
| Transform on PIL | Transform on Tensor
1 threads: ---------------------------------------------------------
torch.uint8 / RGB | 1.0 | 2.0
Times are in milliseconds (ms).
[------------------------- ImageNet train -------------------------]
| Transform on PIL | Transform on Tensor
1 threads: ---------------------------------------------------------
torch.uint8 / RGB | 2.5 | 2.0
Times are in milliseconds (ms).
from collections import namedtuple
import PIL
from PIL import Image
import torch
import torch.utils.benchmark as benchmark
import torchvision
import torchvision.transforms as T
import torchvision.transforms.functional as F
BTransform = namedtuple("BTransform", ["op", "input_size", "name", "supported_dtypes"])
transforms = [
BTransform(op=T.Resize([256, 256], interpolation=T.InterpolationMode.BILINEAR), input_size=[500, 500], name=None, supported_dtypes=None),
BTransform(op=T.RandomHorizontalFlip(p=1.0), input_size=[256, 256], name=None, supported_dtypes=None),
BTransform(op=T.RandomResizedCrop(224), input_size=[500, 500], name=None, supported_dtypes=None),
BTransform(op=T.autoaugment.RandAugment(), input_size=[224, 224], name=None, supported_dtypes=[torch.uint8, ]),
# ImageNet train preset:
BTransform(op=T.Compose([
T.RandomResizedCrop(224),
T.RandomHorizontalFlip(p=0.5),
lambda x: F.pil_to_tensor(x) if isinstance(x, PIL.Image.Image) else x,
T.ConvertImageDtype(torch.float),
T.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
]), name="ImageNet train", input_size=[500, 500], supported_dtypes=[torch.uint8, ]),
]
def run_bench(t):
min_run_time = 2
transform = t.op
label = transform.__class__.__name__ if t.name is None else t.name
results = []
for dtype in [torch.uint8, torch.float32]:
if t.supported_dtypes is not None and dtype not in t.supported_dtypes:
continue
c = 3
mode = "RGB"
if dtype == torch.float32:
c = 1
mode = "F"
size = [c, ] + t.input_size
tensor = torch.randint(0, 256, size=size, dtype=dtype)
data = tensor.permute(1, 2, 0).contiguous().cpu().numpy()
if dtype == torch.float32:
pil_img = Image.fromarray(data[..., 0], mode=mode)
else:
pil_img = Image.fromarray(data, mode=mode)
sub_label = f"{dtype} / {mode}"
results += [
# With Pillow
benchmark.Timer(
stmt="t(x)",
globals={
"x": pil_img,
"t": transform,
},
num_threads=torch.get_num_threads(),
label=label,
sub_label=sub_label,
description=f"Transform on PIL",
).blocked_autorange(min_run_time=min_run_time),
# With tensor
benchmark.Timer(
stmt="t(x)",
globals={
"x": tensor,
"t": transform,
},
num_threads=torch.get_num_threads(),
label=label,
sub_label=sub_label,
description=f"Transform on Tensor",
).blocked_autorange(min_run_time=min_run_time),
]
return results
def main():
all_results = []
for t in transforms:
all_results += run_bench(t)
compare = benchmark.Compare(all_results)
compare.print()
if __name__ == "__main__":
print(f"Torch config: {torch.__config__.show()}")
print(f"Num threads: {torch.get_num_threads()}")
print(f"Torch version: {torch.__version__}")
print(f"Torchvision version: {torchvision.__version__}")
print(f"PIL version: {PIL.__version__}")
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment