Created
March 11, 2024 11:38
-
-
Save NicolasHug/2d79a419396cfbe02344381ddf270b5e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"id": "184c1cd2", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import cv2\n", | |
"import torch\n", | |
"import numpy as np\n", | |
"from PIL import Image\n", | |
"from torchvision.transforms.v2.functional import resize as tv_resize, InterpolationMode\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "1724ecad", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"Hin, Win = 256, 276\n", | |
"\n", | |
"def make_uint8_images(Hin, Win):\n", | |
" tensor_img = torch.randint(0, 256, size=(3, Hin, Win), dtype=torch.uint8)\n", | |
" # Note: cv2 arrays are BGR, not RGB, but we don't care\n", | |
" cv2_img = tensor_img.numpy().transpose(1, 2, 0) # CHW -> HWC\n", | |
" pil_img = Image.fromarray(cv2_img)\n", | |
"\n", | |
" return tensor_img, cv2_img, pil_img\n", | |
"\n", | |
"def to_tensor(x):\n", | |
" if isinstance(x, torch.Tensor):\n", | |
" return x\n", | |
"\n", | |
" if isinstance(x, Image.Image):\n", | |
" x = np.asarray(x)\n", | |
" return torch.from_numpy(x).permute(2, 0, 1)\n", | |
"\n", | |
"\n", | |
"def assert_close(a, b, atol=1, allowed_percent=0):\n", | |
" # Assert that a and b differ by more than atol on no more than allowed_percentb % entries\n", | |
" # Allowed_percent is expected in [0, 100]\n", | |
" # When allowed_precent is 0 (default), we assert that all pixels differ by at most atol\n", | |
"\n", | |
" a, b = to_tensor(a), to_tensor(b)\n", | |
"\n", | |
" if allowed_percent == 0:\n", | |
" torch.testing.assert_close(a, b, rtol=0, atol=atol)\n", | |
" return\n", | |
"\n", | |
" abs_diff = (a.float() - b.float()).abs()\n", | |
" actual_percent = (abs_diff > atol).float().mean() \n", | |
" if actual_percent > allowed_percent / 100:\n", | |
" raise AssertionError(f\"{actual_percent:.1%} pixels differ by more than {atol=}!\")\n", | |
"\n", | |
"def assert_exactly_equal(a, b, *args, **kwargs):\n", | |
" a, b = to_tensor(a), to_tensor(b)\n", | |
" torch.testing.assert_close(a, b, *args, **kwargs, rtol=0, atol=0)\n", | |
"\n", | |
"def tv_resize_on_floats(uint8_tensor_img, *args, **kwargs):\n", | |
" # Convert uint8 to float, resize, then round back to uint8.\n", | |
" # This is what the v1 version of resize (torchvision.transforms.functional.resize) does\n", | |
" # This is mathematically more correct for bicubic mode, but slower.\n", | |
" # It doesn't matter in practice for models, you can just use the faster version on uint8\n", | |
" return tv_resize(uint8_tensor_img.float(), *args, **kwargs).round().clamp(0, 255).to(torch.uint8)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "386a5e19", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Bilinear interpolation, no antialiasing\n", | |
"# ---------------------------------------\n", | |
"for Hout, Wout in [(120, 128), (500, 480)]: # Downsampling, upsampling\n", | |
" tensor_img, cv2_img, _ = make_uint8_images(Hin, Win)\n", | |
"\n", | |
" out_cv2_linear = cv2.resize(cv2_img, (Wout, Hout), interpolation=cv2.INTER_LINEAR)\n", | |
" out_cv2_linear_exact = cv2.resize(cv2_img, (Wout, Hout), interpolation=cv2.INTER_LINEAR_EXACT)\n", | |
" out_tv = tv_resize(tensor_img, size=(Hout, Wout), interpolation=InterpolationMode.BILINEAR, antialias=False)\n", | |
" out_tv_float = tv_resize_on_floats(tensor_img, size=(Hout, Wout), interpolation=InterpolationMode.BILINEAR, antialias=False)\n", | |
"\n", | |
" assert_close(out_cv2_linear, out_cv2_linear_exact, atol=1)\n", | |
" assert_close(out_cv2_linear, out_cv2_linear_exact, atol=0, allowed_percent=20)\n", | |
" assert_close(out_tv, out_cv2_linear, atol=1) \n", | |
" assert_close(out_tv, out_cv2_linear, atol=0, allowed_percent=15)\n", | |
" assert_close(out_tv, out_cv2_linear_exact, atol=1) \n", | |
" assert_close(out_tv, out_cv2_linear_exact, atol=0, allowed_percent=15)\n", | |
" assert_exactly_equal(out_tv, out_tv_float) " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"id": "2dbfc5bd", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Bilinear interpolation, with antialiasing\n", | |
"# -----------------------------------------\n", | |
"for Hout, Wout in [(120, 128), (500, 480)]: # Downsampling, upsampling\n", | |
" tensor_img, _, pil_img = make_uint8_images(Hin, Win)\n", | |
"\n", | |
" out_pil = pil_img.resize((Wout, Hout), Image.BILINEAR)\n", | |
" out_tv = tv_resize(tensor_img, size=(Hout, Wout), interpolation=InterpolationMode.BILINEAR, antialias=True)\n", | |
" out_tv_float = tv_resize_on_floats(tensor_img, size=(Hout, Wout), interpolation=InterpolationMode.BILINEAR, antialias=True)\n", | |
"\n", | |
" assert_close(out_tv, out_pil, atol=1) \n", | |
" assert_close(out_tv, out_pil, atol=0, allowed_percent=20)\n", | |
" assert_exactly_equal(out_tv, out_tv_float)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"id": "0b1f6374", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Bicubic interpolation, no antialiasing\n", | |
"# --------------------------------------\n", | |
"for Hout, Wout in [(120, 128), (500, 480)]: # Downsampling, upsampling\n", | |
" tensor_img, cv2_img, _ = make_uint8_images(Hin, Win)\n", | |
"\n", | |
" out_cv2 = cv2.resize(cv2_img, (Wout, Hout), interpolation=cv2.INTER_CUBIC)\n", | |
" out_tv = tv_resize(tensor_img, size=(Hout, Wout), interpolation=InterpolationMode.BICUBIC, antialias=False)\n", | |
" out_tv_float = tv_resize_on_floats(tensor_img, size=(Hout, Wout), interpolation=InterpolationMode.BICUBIC, antialias=False)\n", | |
"\n", | |
" assert_close(out_tv, out_cv2, atol=1, allowed_percent=7)\n", | |
" assert_close(out_tv, out_cv2, atol=0, allowed_percent=30)\n", | |
" assert_close(out_tv, out_tv_float, atol=1, allowed_percent=7)\n", | |
" assert_close(out_tv, out_tv_float, atol=0, allowed_percent=30)\n", | |
" assert_close(out_tv_float, out_cv2, atol=1)\n", | |
" assert_close(out_tv_float, out_cv2, atol=0, allowed_percent=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"id": "8cf1e6db", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Bicubic interpolation, with antialiasing\n", | |
"# ----------------------------------------\n", | |
"for Hout, Wout in [(120, 128), (500, 480)]: # Downsampling, upsampling\n", | |
" tensor_img, _, pil_img = make_uint8_images(Hin, Win)\n", | |
"\n", | |
" out_pil = pil_img.resize((Wout, Hout), Image.BICUBIC)\n", | |
" out_tv = tv_resize(tensor_img, size=(Hout, Wout), interpolation=InterpolationMode.BICUBIC, antialias=True)\n", | |
" out_tv_float = tv_resize_on_floats(tensor_img, size=(Hout, Wout), interpolation=InterpolationMode.BICUBIC, antialias=True)\n", | |
"\n", | |
" assert_close(out_tv, out_pil, atol=2)\n", | |
" assert_close(out_tv, out_pil, atol=1, allowed_percent=0.5)\n", | |
" assert_close(out_tv, out_pil, atol=0, allowed_percent=1)\n", | |
" assert_close(out_tv, out_tv_float, atol=1, allowed_percent=2)\n", | |
" assert_close(out_tv, out_tv_float, atol=0, allowed_percent=25)\n", | |
" assert_close(out_tv_float, out_pil, atol=2, allowed_percent=1)\n", | |
" assert_close(out_tv_float, out_pil, atol=1, allowed_percent=2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"id": "e46ab5c1", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Nearest and nearest-exact interpolation\n", | |
"# ---------------------------------------\n", | |
"for Hout, Wout in [(120, 128), (500, 480)]: # Downsampling, upsampling\n", | |
" tensor_img, cv2_img, pil_img = make_uint8_images(Hin, Win)\n", | |
"\n", | |
" out_cv2_nearest = cv2.resize(cv2_img, (Wout, Hout), interpolation=cv2.INTER_NEAREST)\n", | |
" out_cv2_nearest_exact = cv2.resize(cv2_img, (Wout, Hout), interpolation=cv2.INTER_NEAREST_EXACT)\n", | |
"\n", | |
" out_pil = pil_img.resize((Wout, Hout), Image.NEAREST)\n", | |
"\n", | |
" out_tv_nearest = tv_resize(tensor_img, size=(Hout, Wout), interpolation=InterpolationMode.NEAREST)\n", | |
" out_tv_float_nearest = tv_resize(tensor_img, size=(Hout, Wout), interpolation=InterpolationMode.NEAREST)\n", | |
" out_tv_nearest_exact = tv_resize(tensor_img, size=(Hout, Wout), interpolation=InterpolationMode.NEAREST_EXACT)\n", | |
" out_tv_float_nearest_exact = tv_resize(tensor_img, size=(Hout, Wout), interpolation=InterpolationMode.NEAREST_EXACT)\n", | |
"\n", | |
" assert_exactly_equal(out_tv_nearest, out_cv2_nearest)\n", | |
" assert_exactly_equal(out_tv_nearest, out_tv_float_nearest)\n", | |
"\n", | |
" assert_close(out_cv2_nearest_exact, out_pil, atol=0, allowed_percent=5)\n", | |
" assert_close(out_tv_nearest_exact, out_pil, atol=0, allowed_percent=5)\n", | |
" assert_close(out_tv_nearest_exact, out_cv2_nearest_exact, atol=0, allowed_percent=7)\n", | |
" assert_exactly_equal(out_tv_nearest_exact, out_tv_float_nearest_exact)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "7fe96b3a", | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.9.16" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment