Skip to content

Instantly share code, notes, and snippets.

@lagru
Created April 23, 2024 08:53
Show Gist options
  • Save lagru/c541a0080ea2baa9642e34c6271f8657 to your computer and use it in GitHub Desktop.
Save lagru/c541a0080ea2baa9642e34c6271f8657 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"id": "653fb84c-df84-42c7-9e1a-a73ca118dbe1",
"metadata": {},
"source": [
"See also & sources:\n",
"\n",
"- https://stackoverflow.com/questions/57591990/line-profiling-with-cython-in-jupyter-notebook\n",
"- https://stackoverflow.com/questions/28301931/how-to-profile-cython-functions-line-by-line\n",
"- https://github.com/mesonbuild/meson/issues/13111"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "14579ae2-b6a0-4370-866d-e33c8b54b68a",
"metadata": {},
"outputs": [],
"source": [
"%load_ext cython\n",
"%load_ext line_profiler\n",
"\n",
"import sys\n",
"sys.path.append(\"/home/lg/Res/scikit-image/\")"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "30863b73-9a80-4777-ba3f-f5c4f7e5a2dd",
"metadata": {},
"outputs": [],
"source": [
"%%cython\n",
"#cython: cdivision=True\n",
"#cython: boundscheck=False\n",
"#cython: nonecheck=False\n",
"#cython: wraparound=False\n",
"#cython: linetrace=True\n",
"#cython: binding=True\n",
"#distutils: define_macros=CYTHON_TRACE_NOGIL=1\n",
"\n",
"cimport numpy as cnp\n",
"\n",
"from skimage._shared.fused_numerics cimport np_anyint\n",
"\n",
"\n",
"def cy_remove_near_objects(\n",
" np_anyint[::1] out not None,\n",
" Py_ssize_t[::1] border_indices not None,\n",
" Py_ssize_t[::1] inner_indices not None,\n",
" kdtree,\n",
" cnp.float64_t p_norm,\n",
" cnp.float64_t min_distance,\n",
" tuple shape,\n",
"):\n",
" cdef:\n",
" Py_ssize_t i_indices, j_indices # Loop variables to index `indices`\n",
" Py_ssize_t i_out # Loop variable to index `out`\n",
" np_anyint object_id, other_id\n",
" list neighborhood\n",
" set remembered_ids\n",
"\n",
" remembered_ids = set()\n",
" for i_indices in range(border_indices.shape[0]):\n",
" i_out = border_indices[i_indices]\n",
" object_id = out[i_out]\n",
" # Skip if sample is part of a removed object\n",
" if object_id == 0:\n",
" continue\n",
"\n",
" neighborhood = kdtree.query_ball_point(\n",
" kdtree.data[i_indices, ...],\n",
" r=min_distance,\n",
" p=p_norm,\n",
" )\n",
" for j_indices in neighborhood:\n",
" # Check object IDs in neighborhood\n",
" other_id = out[border_indices[j_indices]]\n",
" if other_id != 0 and other_id != object_id:\n",
" # If neighbor ID wasn't already removed or is the current one\n",
" # remove the boundary and remember the ID\n",
" _remove_object(out, border_indices, j_indices)\n",
" remembered_ids.add(other_id)\n",
"\n",
" # Delete inner parts of remembered objects\n",
" for j_indices in range(inner_indices.shape[0]):\n",
" object_id = out[inner_indices[j_indices]]\n",
" if object_id != 0 and object_id in remembered_ids:\n",
" _remove_object(out, inner_indices, j_indices)\n",
"\n",
"\n",
"cdef inline _remove_object(\n",
" np_anyint[::1] out,\n",
" Py_ssize_t[::1] indices,\n",
" Py_ssize_t start\n",
"):\n",
" cdef:\n",
" Py_ssize_t k_indices, k_labels\n",
" np_anyint remove_id\n",
"\n",
" with nogil:\n",
" remove_id = out[indices[start]]\n",
"\n",
" for k_indices in range(start, -1, -1):\n",
" k_labels = indices[k_indices]\n",
" if remove_id == out[k_labels]:\n",
" out[k_labels] = 0\n",
" else:\n",
" break\n",
" for k_indices in range(start + 1, indices.shape[0]):\n",
" k_labels = indices[k_indices]\n",
" if remove_id == out[k_labels]:\n",
" out[k_labels] = 0\n",
" else:\n",
" break\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "cd58fe00-3141-4c20-af45-b2b62b34ccbc",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import functools\n",
"from scipy import ndimage as ndi\n",
"from scipy.spatial import cKDTree\n",
"\n",
"\n",
"def remove_near_objects(\n",
" label_image,\n",
" min_distance,\n",
" *,\n",
" priority=None,\n",
" p_norm=2,\n",
" spacing=None,\n",
" out=None,\n",
"):\n",
" if min_distance < 0:\n",
" raise ValueError(f\"min_distance must be >= 0, was {min_distance}\")\n",
" if not np.issubdtype(label_image.dtype, np.integer):\n",
" raise ValueError(\n",
" f\"`label_image` must be of integer dtype, got {label_image.dtype}\"\n",
" )\n",
" if out is None:\n",
" out = label_image.copy(order=\"C\")\n",
" elif out is not label_image:\n",
" out[:] = label_image\n",
" # May create a copy if order is not C, account for that later\n",
" out_raveled = out.ravel(order=\"C\")\n",
"\n",
" if spacing is not None:\n",
" spacing = np.array(spacing)\n",
" if spacing.shape != (out.ndim,) or spacing.min() <= 0:\n",
" raise ValueError(\n",
" \"`spacing` must contain exactly one positive factor \"\n",
" \"for each dimension of `label_image`\"\n",
" )\n",
"\n",
" indices = np.nonzero(out_raveled)[0]\n",
" # Optimization: Split indices into those on the object boundaries and inner\n",
" # ones. The KDTree is built only from the boundary indices, which reduces\n",
" # the size of the critical loop significantly! Remaining indices are only\n",
" # used to remove the inner parts of objects as well.\n",
" if (spacing is None or np.all(spacing[0] == spacing)) and p_norm <= 2:\n",
" # For unity spacing we can make the borders more sparse by using a\n",
" # lower connectivity\n",
" footprint = ndi.generate_binary_structure(out.ndim, 1)\n",
" else:\n",
" footprint = ndi.generate_binary_structure(out.ndim, out.ndim)\n",
" border = (\n",
" ndi.maximum_filter(out, footprint=footprint)\n",
" != ndi.minimum_filter(out, footprint=footprint)\n",
" ).ravel()[indices]\n",
" border_indices = indices[border]\n",
" inner_indices = indices[~border]\n",
"\n",
" if border_indices.size == 0:\n",
" # Image without any or only one object, return early\n",
" return out\n",
"\n",
" # Sort by label ID first, so that IDs of the same object are contiguous\n",
" # in the sorted index. This allows fast discovery of the whole object by\n",
" # simple iteration up or down the index!\n",
" border_indices = border_indices[np.argsort(out_raveled[border_indices])]\n",
" inner_indices = inner_indices[np.argsort(out_raveled[inner_indices])]\n",
"\n",
" if priority is None:\n",
" priority = np.bincount(out_raveled)\n",
" # `priority` can only be indexed by positive object IDs,\n",
" # `border_indices` contains all unique sorted IDs so check the lowest / first\n",
" smallest_id = out_raveled[border_indices[0]]\n",
" if smallest_id < 0:\n",
" raise ValueError(f\"found object with negative ID {smallest_id!r}\")\n",
"\n",
" try:\n",
" # Sort by priority second using a stable sort to preserve the contiguous\n",
" # sorting of objects. Because each pixel in an object has the same\n",
" # priority we don't need to worry about separating objects.\n",
" border_indices = border_indices[\n",
" np.argsort(priority[out_raveled[border_indices]], kind=\"stable\")[::-1]\n",
" ]\n",
" except IndexError as error:\n",
" # Use np.amax only for the exception path to provide a nicer error message\n",
" expected_shape = (np.amax(out_raveled) + 1,)\n",
" if priority.shape != expected_shape:\n",
" raise ValueError(\n",
" \"shape of `priority` must be (np.amax(label_image) + 1,), \"\n",
" f\"expected {expected_shape}, got {priority.shape} instead\"\n",
" ) from error\n",
" else:\n",
" raise\n",
"\n",
" # Construct kd-tree from unraveled border indices (optionally scale by `spacing`)\n",
" unraveled_indices = np.unravel_index(border_indices, out.shape)\n",
" if spacing is not None:\n",
" unraveled_indices = tuple(\n",
" unraveled_indices[dim] * spacing[dim] for dim in range(out.ndim)\n",
" )\n",
" kdtree = cKDTree(data=np.asarray(unraveled_indices, dtype=np.float64).T)\n",
"\n",
" cy_remove_near_objects(\n",
" out=out_raveled,\n",
" border_indices=border_indices,\n",
" inner_indices=inner_indices,\n",
" kdtree=kdtree,\n",
" min_distance=min_distance,\n",
" p_norm=p_norm,\n",
" shape=label_image.shape,\n",
" )\n",
"\n",
" if out_raveled.base is not out:\n",
" # `out_raveled` is a copy, re-assign\n",
" out[:] = out_raveled.reshape(out.shape)\n",
" return out"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "257b2656-2ce3-40e6-b554-3e0cc7630f56",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Timer unit: 1e-09 s"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"import skimage as ski\n",
"\n",
"# Extract foreground by thresholding an image taken by the Hubble Telescope\n",
"image = ski.color.rgb2gray(ski.data.hubble_deep_field())\n",
"foreground = image > ski.filters.threshold_li(image)\n",
"objects = ski.measure.label(foreground)\n",
"\n",
"%lprun -f cy_remove_near_objects remove_near_objects(objects, min_distance=5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a7e921cc-3886-4699-86c6-b7963572d0d8",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment