NicolasHug/resize_opencv_vs_pil_vs_pytorch.ipynb

## resize_opencv_vs_pil_vs_pytorch.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "184c1cd2",
   "metadata": {},
   "outputs": [],
   "source": [
    "import cv2\n",
    "import torch\n",
    "import numpy as np\n",
    "from PIL import Image\n",
    "from torchvision.transforms.v2.functional import resize as tv_resize, InterpolationMode\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "1724ecad",
   "metadata": {},
   "outputs": [],
   "source": [
    "Hin, Win = 256, 276\n",
    "\n",
    "def make_uint8_images(Hin, Win):\n",
    "    tensor_img = torch.randint(0, 256, size=(3, Hin, Win), dtype=torch.uint8)\n",
    "    # Note: cv2 arrays are BGR, not RGB, but we don't care\n",
    "    cv2_img = tensor_img.numpy().transpose(1, 2, 0)  # CHW -> HWC\n",
    "    pil_img = Image.fromarray(cv2_img)\n",
    "\n",
    "    return tensor_img, cv2_img, pil_img\n",
    "\n",
    "def to_tensor(x):\n",
    "    if isinstance(x, torch.Tensor):\n",
    "        return x\n",
    "\n",
    "    if isinstance(x, Image.Image):\n",
    "        x = np.asarray(x)\n",
    "    return torch.from_numpy(x).permute(2, 0, 1)\n",
    "\n",
    "\n",
    "def assert_close(a, b, atol=1, allowed_percent=0):\n",
    "    # Assert that a and b differ by more than atol on no more than allowed_percentb % entries\n",
    "    # Allowed_percent is expected in [0, 100]\n",
    "    # When allowed_precent is 0 (default), we assert that all pixels differ by at most atol\n",
    "\n",
    "    a, b = to_tensor(a), to_tensor(b)\n",
    "\n",
    "    if allowed_percent == 0:\n",
    "        torch.testing.assert_close(a, b, rtol=0, atol=atol)\n",
    "        return\n",
    "\n",
    "    abs_diff = (a.float() - b.float()).abs()\n",
    "    actual_percent = (abs_diff > atol).float().mean() \n",
    "    if actual_percent > allowed_percent / 100:\n",
    "        raise AssertionError(f\"{actual_percent:.1%} pixels differ by more than {atol=}!\")\n",
    "\n",
    "def assert_exactly_equal(a, b, *args, **kwargs):\n",
    "    a, b = to_tensor(a), to_tensor(b)\n",
    "    torch.testing.assert_close(a, b, *args, **kwargs, rtol=0, atol=0)\n",
    "\n",
    "def tv_resize_on_floats(uint8_tensor_img, *args, **kwargs):\n",
    "    # Convert uint8 to float, resize, then round back to uint8.\n",
    "    # This is what the v1 version of resize (torchvision.transforms.functional.resize) does\n",
    "    # This is mathematically more correct for bicubic mode, but slower.\n",
    "    # It doesn't matter in practice for models, you can just use the faster version on uint8\n",
    "    return tv_resize(uint8_tensor_img.float(), *args, **kwargs).round().clamp(0, 255).to(torch.uint8)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "386a5e19",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Bilinear interpolation, no antialiasing\n",
    "# ---------------------------------------\n",
    "for Hout, Wout in [(120, 128), (500, 480)]:  # Downsampling, upsampling\n",
    "    tensor_img, cv2_img, _ = make_uint8_images(Hin, Win)\n",
    "\n",
    "    out_cv2_linear = cv2.resize(cv2_img, (Wout, Hout), interpolation=cv2.INTER_LINEAR)\n",
    "    out_cv2_linear_exact = cv2.resize(cv2_img, (Wout, Hout), interpolation=cv2.INTER_LINEAR_EXACT)\n",
    "    out_tv = tv_resize(tensor_img, size=(Hout, Wout), interpolation=InterpolationMode.BILINEAR, antialias=False)\n",
    "    out_tv_float = tv_resize_on_floats(tensor_img, size=(Hout, Wout), interpolation=InterpolationMode.BILINEAR, antialias=False)\n",
    "\n",
    "    assert_close(out_cv2_linear, out_cv2_linear_exact, atol=1)\n",
    "    assert_close(out_cv2_linear, out_cv2_linear_exact, atol=0, allowed_percent=20)\n",
    "    assert_close(out_tv, out_cv2_linear, atol=1) \n",
    "    assert_close(out_tv, out_cv2_linear, atol=0, allowed_percent=15)\n",
    "    assert_close(out_tv, out_cv2_linear_exact, atol=1) \n",
    "    assert_close(out_tv, out_cv2_linear_exact, atol=0, allowed_percent=15)\n",
    "    assert_exactly_equal(out_tv, out_tv_float) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "2dbfc5bd",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Bilinear interpolation, with antialiasing\n",
    "# -----------------------------------------\n",
    "for Hout, Wout in [(120, 128), (500, 480)]:  # Downsampling, upsampling\n",
    "    tensor_img, _, pil_img = make_uint8_images(Hin, Win)\n",
    "\n",
    "    out_pil = pil_img.resize((Wout, Hout), Image.BILINEAR)\n",
    "    out_tv = tv_resize(tensor_img, size=(Hout, Wout), interpolation=InterpolationMode.BILINEAR, antialias=True)\n",
    "    out_tv_float = tv_resize_on_floats(tensor_img, size=(Hout, Wout), interpolation=InterpolationMode.BILINEAR, antialias=True)\n",
    "\n",
    "    assert_close(out_tv, out_pil, atol=1) \n",
    "    assert_close(out_tv, out_pil, atol=0, allowed_percent=20)\n",
    "    assert_exactly_equal(out_tv, out_tv_float)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "0b1f6374",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Bicubic interpolation, no antialiasing\n",
    "# --------------------------------------\n",
    "for Hout, Wout in [(120, 128), (500, 480)]:  # Downsampling, upsampling\n",
    "    tensor_img, cv2_img, _ = make_uint8_images(Hin, Win)\n",
    "\n",
    "    out_cv2 = cv2.resize(cv2_img, (Wout, Hout), interpolation=cv2.INTER_CUBIC)\n",
    "    out_tv = tv_resize(tensor_img, size=(Hout, Wout), interpolation=InterpolationMode.BICUBIC, antialias=False)\n",
    "    out_tv_float = tv_resize_on_floats(tensor_img, size=(Hout, Wout), interpolation=InterpolationMode.BICUBIC, antialias=False)\n",
    "\n",
    "    assert_close(out_tv, out_cv2, atol=1, allowed_percent=7)\n",
    "    assert_close(out_tv, out_cv2, atol=0, allowed_percent=30)\n",
    "    assert_close(out_tv, out_tv_float, atol=1, allowed_percent=7)\n",
    "    assert_close(out_tv, out_tv_float, atol=0, allowed_percent=30)\n",
    "    assert_close(out_tv_float, out_cv2, atol=1)\n",
    "    assert_close(out_tv_float, out_cv2, atol=0, allowed_percent=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "8cf1e6db",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Bicubic interpolation, with antialiasing\n",
    "# ----------------------------------------\n",
    "for Hout, Wout in [(120, 128), (500, 480)]:  # Downsampling, upsampling\n",
    "    tensor_img, _, pil_img = make_uint8_images(Hin, Win)\n",
    "\n",
    "    out_pil = pil_img.resize((Wout, Hout), Image.BICUBIC)\n",
    "    out_tv = tv_resize(tensor_img, size=(Hout, Wout), interpolation=InterpolationMode.BICUBIC, antialias=True)\n",
    "    out_tv_float = tv_resize_on_floats(tensor_img, size=(Hout, Wout), interpolation=InterpolationMode.BICUBIC, antialias=True)\n",
    "\n",
    "    assert_close(out_tv, out_pil, atol=2)\n",
    "    assert_close(out_tv, out_pil, atol=1, allowed_percent=0.5)\n",
    "    assert_close(out_tv, out_pil, atol=0, allowed_percent=1)\n",
    "    assert_close(out_tv, out_tv_float, atol=1, allowed_percent=2)\n",
    "    assert_close(out_tv, out_tv_float, atol=0, allowed_percent=25)\n",
    "    assert_close(out_tv_float, out_pil, atol=2, allowed_percent=1)\n",
    "    assert_close(out_tv_float, out_pil, atol=1, allowed_percent=2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "e46ab5c1",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Nearest and nearest-exact interpolation\n",
    "# ---------------------------------------\n",
    "for Hout, Wout in [(120, 128), (500, 480)]:  # Downsampling, upsampling\n",
    "    tensor_img, cv2_img, pil_img = make_uint8_images(Hin, Win)\n",
    "\n",
    "    out_cv2_nearest = cv2.resize(cv2_img, (Wout, Hout), interpolation=cv2.INTER_NEAREST)\n",
    "    out_cv2_nearest_exact = cv2.resize(cv2_img, (Wout, Hout), interpolation=cv2.INTER_NEAREST_EXACT)\n",
    "\n",
    "    out_pil = pil_img.resize((Wout, Hout), Image.NEAREST)\n",
    "\n",
    "    out_tv_nearest = tv_resize(tensor_img, size=(Hout, Wout), interpolation=InterpolationMode.NEAREST)\n",
    "    out_tv_float_nearest = tv_resize(tensor_img, size=(Hout, Wout), interpolation=InterpolationMode.NEAREST)\n",
    "    out_tv_nearest_exact = tv_resize(tensor_img, size=(Hout, Wout), interpolation=InterpolationMode.NEAREST_EXACT)\n",
    "    out_tv_float_nearest_exact = tv_resize(tensor_img, size=(Hout, Wout), interpolation=InterpolationMode.NEAREST_EXACT)\n",
    "\n",
    "    assert_exactly_equal(out_tv_nearest, out_cv2_nearest)\n",
    "    assert_exactly_equal(out_tv_nearest, out_tv_float_nearest)\n",
    "\n",
    "    assert_close(out_cv2_nearest_exact, out_pil, atol=0, allowed_percent=5)\n",
    "    assert_close(out_tv_nearest_exact, out_pil, atol=0, allowed_percent=5)\n",
    "    assert_close(out_tv_nearest_exact, out_cv2_nearest_exact, atol=0, allowed_percent=7)\n",
    "    assert_exactly_equal(out_tv_nearest_exact, out_tv_float_nearest_exact)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7fe96b3a",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 8,
	"id": "184c1cd2",
	"metadata": {},
	"outputs": [],
	"source": [
	"import cv2\n",
	"import torch\n",
	"import numpy as np\n",
	"from PIL import Image\n",
	"from torchvision.transforms.v2.functional import resize as tv_resize, InterpolationMode\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"id": "1724ecad",
	"metadata": {},
	"outputs": [],
	"source": [
	"Hin, Win = 256, 276\n",
	"\n",
	"def make_uint8_images(Hin, Win):\n",
	" tensor_img = torch.randint(0, 256, size=(3, Hin, Win), dtype=torch.uint8)\n",
	" # Note: cv2 arrays are BGR, not RGB, but we don't care\n",
	" cv2_img = tensor_img.numpy().transpose(1, 2, 0) # CHW -> HWC\n",
	" pil_img = Image.fromarray(cv2_img)\n",
	"\n",
	" return tensor_img, cv2_img, pil_img\n",
	"\n",
	"def to_tensor(x):\n",
	" if isinstance(x, torch.Tensor):\n",
	" return x\n",
	"\n",
	" if isinstance(x, Image.Image):\n",
	" x = np.asarray(x)\n",
	" return torch.from_numpy(x).permute(2, 0, 1)\n",
	"\n",
	"\n",
	"def assert_close(a, b, atol=1, allowed_percent=0):\n",
	" # Assert that a and b differ by more than atol on no more than allowed_percentb % entries\n",
	" # Allowed_percent is expected in [0, 100]\n",
	" # When allowed_precent is 0 (default), we assert that all pixels differ by at most atol\n",
	"\n",
	" a, b = to_tensor(a), to_tensor(b)\n",
	"\n",
	" if allowed_percent == 0:\n",
	" torch.testing.assert_close(a, b, rtol=0, atol=atol)\n",
	" return\n",
	"\n",
	" abs_diff = (a.float() - b.float()).abs()\n",
	" actual_percent = (abs_diff > atol).float().mean() \n",
	" if actual_percent > allowed_percent / 100:\n",
	" raise AssertionError(f\"{actual_percent:.1%} pixels differ by more than {atol=}!\")\n",
	"\n",
	"def assert_exactly_equal(a, b, args, *kwargs):\n",
	" a, b = to_tensor(a), to_tensor(b)\n",
	" torch.testing.assert_close(a, b, args, *kwargs, rtol=0, atol=0)\n",
	"\n",
	"def tv_resize_on_floats(uint8_tensor_img, args, *kwargs):\n",
	" # Convert uint8 to float, resize, then round back to uint8.\n",
	" # This is what the v1 version of resize (torchvision.transforms.functional.resize) does\n",
	" # This is mathematically more correct for bicubic mode, but slower.\n",
	" # It doesn't matter in practice for models, you can just use the faster version on uint8\n",
	" return tv_resize(uint8_tensor_img.float(), args, *kwargs).round().clamp(0, 255).to(torch.uint8)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"id": "386a5e19",
	"metadata": {},
	"outputs": [],
	"source": [
	"# Bilinear interpolation, no antialiasing\n",
	"# ---------------------------------------\n",
	"for Hout, Wout in [(120, 128), (500, 480)]: # Downsampling, upsampling\n",
	" tensor_img, cv2_img, _ = make_uint8_images(Hin, Win)\n",
	"\n",
	" out_cv2_linear = cv2.resize(cv2_img, (Wout, Hout), interpolation=cv2.INTER_LINEAR)\n",
	" out_cv2_linear_exact = cv2.resize(cv2_img, (Wout, Hout), interpolation=cv2.INTER_LINEAR_EXACT)\n",
	" out_tv = tv_resize(tensor_img, size=(Hout, Wout), interpolation=InterpolationMode.BILINEAR, antialias=False)\n",
	" out_tv_float = tv_resize_on_floats(tensor_img, size=(Hout, Wout), interpolation=InterpolationMode.BILINEAR, antialias=False)\n",
	"\n",
	" assert_close(out_cv2_linear, out_cv2_linear_exact, atol=1)\n",
	" assert_close(out_cv2_linear, out_cv2_linear_exact, atol=0, allowed_percent=20)\n",
	" assert_close(out_tv, out_cv2_linear, atol=1) \n",
	" assert_close(out_tv, out_cv2_linear, atol=0, allowed_percent=15)\n",
	" assert_close(out_tv, out_cv2_linear_exact, atol=1) \n",
	" assert_close(out_tv, out_cv2_linear_exact, atol=0, allowed_percent=15)\n",
	" assert_exactly_equal(out_tv, out_tv_float) "
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"id": "2dbfc5bd",
	"metadata": {},
	"outputs": [],
	"source": [
	"# Bilinear interpolation, with antialiasing\n",
	"# -----------------------------------------\n",
	"for Hout, Wout in [(120, 128), (500, 480)]: # Downsampling, upsampling\n",
	" tensor_img, _, pil_img = make_uint8_images(Hin, Win)\n",
	"\n",
	" out_pil = pil_img.resize((Wout, Hout), Image.BILINEAR)\n",
	" out_tv = tv_resize(tensor_img, size=(Hout, Wout), interpolation=InterpolationMode.BILINEAR, antialias=True)\n",
	" out_tv_float = tv_resize_on_floats(tensor_img, size=(Hout, Wout), interpolation=InterpolationMode.BILINEAR, antialias=True)\n",
	"\n",
	" assert_close(out_tv, out_pil, atol=1) \n",
	" assert_close(out_tv, out_pil, atol=0, allowed_percent=20)\n",
	" assert_exactly_equal(out_tv, out_tv_float)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"id": "0b1f6374",
	"metadata": {},
	"outputs": [],
	"source": [
	"# Bicubic interpolation, no antialiasing\n",
	"# --------------------------------------\n",
	"for Hout, Wout in [(120, 128), (500, 480)]: # Downsampling, upsampling\n",
	" tensor_img, cv2_img, _ = make_uint8_images(Hin, Win)\n",
	"\n",
	" out_cv2 = cv2.resize(cv2_img, (Wout, Hout), interpolation=cv2.INTER_CUBIC)\n",
	" out_tv = tv_resize(tensor_img, size=(Hout, Wout), interpolation=InterpolationMode.BICUBIC, antialias=False)\n",
	" out_tv_float = tv_resize_on_floats(tensor_img, size=(Hout, Wout), interpolation=InterpolationMode.BICUBIC, antialias=False)\n",
	"\n",
	" assert_close(out_tv, out_cv2, atol=1, allowed_percent=7)\n",
	" assert_close(out_tv, out_cv2, atol=0, allowed_percent=30)\n",
	" assert_close(out_tv, out_tv_float, atol=1, allowed_percent=7)\n",
	" assert_close(out_tv, out_tv_float, atol=0, allowed_percent=30)\n",
	" assert_close(out_tv_float, out_cv2, atol=1)\n",
	" assert_close(out_tv_float, out_cv2, atol=0, allowed_percent=1)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"id": "8cf1e6db",
	"metadata": {},
	"outputs": [],
	"source": [
	"# Bicubic interpolation, with antialiasing\n",
	"# ----------------------------------------\n",
	"for Hout, Wout in [(120, 128), (500, 480)]: # Downsampling, upsampling\n",
	" tensor_img, _, pil_img = make_uint8_images(Hin, Win)\n",
	"\n",
	" out_pil = pil_img.resize((Wout, Hout), Image.BICUBIC)\n",
	" out_tv = tv_resize(tensor_img, size=(Hout, Wout), interpolation=InterpolationMode.BICUBIC, antialias=True)\n",
	" out_tv_float = tv_resize_on_floats(tensor_img, size=(Hout, Wout), interpolation=InterpolationMode.BICUBIC, antialias=True)\n",
	"\n",
	" assert_close(out_tv, out_pil, atol=2)\n",
	" assert_close(out_tv, out_pil, atol=1, allowed_percent=0.5)\n",
	" assert_close(out_tv, out_pil, atol=0, allowed_percent=1)\n",
	" assert_close(out_tv, out_tv_float, atol=1, allowed_percent=2)\n",
	" assert_close(out_tv, out_tv_float, atol=0, allowed_percent=25)\n",
	" assert_close(out_tv_float, out_pil, atol=2, allowed_percent=1)\n",
	" assert_close(out_tv_float, out_pil, atol=1, allowed_percent=2)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"id": "e46ab5c1",
	"metadata": {},
	"outputs": [],
	"source": [
	"# Nearest and nearest-exact interpolation\n",
	"# ---------------------------------------\n",
	"for Hout, Wout in [(120, 128), (500, 480)]: # Downsampling, upsampling\n",
	" tensor_img, cv2_img, pil_img = make_uint8_images(Hin, Win)\n",
	"\n",
	" out_cv2_nearest = cv2.resize(cv2_img, (Wout, Hout), interpolation=cv2.INTER_NEAREST)\n",
	" out_cv2_nearest_exact = cv2.resize(cv2_img, (Wout, Hout), interpolation=cv2.INTER_NEAREST_EXACT)\n",
	"\n",
	" out_pil = pil_img.resize((Wout, Hout), Image.NEAREST)\n",
	"\n",
	" out_tv_nearest = tv_resize(tensor_img, size=(Hout, Wout), interpolation=InterpolationMode.NEAREST)\n",
	" out_tv_float_nearest = tv_resize(tensor_img, size=(Hout, Wout), interpolation=InterpolationMode.NEAREST)\n",
	" out_tv_nearest_exact = tv_resize(tensor_img, size=(Hout, Wout), interpolation=InterpolationMode.NEAREST_EXACT)\n",
	" out_tv_float_nearest_exact = tv_resize(tensor_img, size=(Hout, Wout), interpolation=InterpolationMode.NEAREST_EXACT)\n",
	"\n",
	" assert_exactly_equal(out_tv_nearest, out_cv2_nearest)\n",
	" assert_exactly_equal(out_tv_nearest, out_tv_float_nearest)\n",
	"\n",
	" assert_close(out_cv2_nearest_exact, out_pil, atol=0, allowed_percent=5)\n",
	" assert_close(out_tv_nearest_exact, out_pil, atol=0, allowed_percent=5)\n",
	" assert_close(out_tv_nearest_exact, out_cv2_nearest_exact, atol=0, allowed_percent=7)\n",
	" assert_exactly_equal(out_tv_nearest_exact, out_tv_float_nearest_exact)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "7fe96b3a",
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3 (ipykernel)",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.9.16"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 5
	}