rfezzani/get_crop.py

## get_crop.py
from tifffile import TiffFile
import numpy as np


def get_crop(page, i0, j0, h, w):
    """Extract a crop from a TIFF image file directory (IFD).

    Only the tiles englobing the crop area are loaded and not the whole page.
    This is usefull for large Whole slide images that can't fit int RAM.

    Parameters
    ----------
    page : TiffPage
        TIFF image file directory (IFD) from which the crop must be extracted.
    i0, j0: int
        Coordinates of the top left corner of the desired crop.
    h: int
        Desired crop height.
    w: int
        Desired crop width.

    Returns
    -------
    out : ndarray of shape (imagedepth, h, w, sampleperpixel)
        Extracted crop.

    """

    if not page.is_tiled:
        raise ValueError("Input page must be tiled.")

    im_width = page.imagewidth
    im_height = page.imagelength

    if h < 1 or w < 1:
        raise ValueError("h and w must be strictly positive.")

    if i0 < 0 or j0 < 0 or i0 + h >= im_height or j0 + w >= im_width:
        raise ValueError("Requested crop area is out of image bounds.")

    tile_width, tile_height = page.tilewidth, page.tilelength
    i1, j1 = i0 + h, j0 + w

    tile_i0, tile_j0 = i0 // tile_height, j0 // tile_width
    tile_i1, tile_j1 = np.ceil([i1 / tile_height, j1 / tile_width]).astype(int)

    tile_per_line = int(np.ceil(im_width / tile_width))

    out = np.empty((page.imagedepth,
                    (tile_i1 - tile_i0) * tile_height,
                    (tile_j1 - tile_j0) * tile_width,
                    page.samplesperpixel), dtype=page.dtype)

    fh = page.parent.filehandle

    jpegtables = page.tags.get('JPEGTables', None)
    if jpegtables is not None:
        jpegtables = jpegtables.value

    for i in range(tile_i0, tile_i1):
        for j in range(tile_j0, tile_j1):
            index = int(i * tile_per_line + j)

            offset = page.dataoffsets[index]
            bytecount = page.databytecounts[index]

            fh.seek(offset)
            data = fh.read(bytecount)
            tile, indices, shape = page.decode(data, index, jpegtables,
                                               jpegheader=page.jpegheader)

            im_i = (i - tile_i0) * tile_height
            im_j = (j - tile_j0) * tile_width
            out[:, im_i: im_i + tile_height, im_j: im_j + tile_width, :] = tile

    im_i0 = i0 - tile_i0 * tile_height
    im_j0 = j0 - tile_j0 * tile_width

    return out[:, im_i0: im_i0 + h, im_j0: im_j0 + w, :]
	from tifffile import TiffFile
	import numpy as np


	def get_crop(page, i0, j0, h, w):
	"""Extract a crop from a TIFF image file directory (IFD).

	Only the tiles englobing the crop area are loaded and not the whole page.
	This is usefull for large Whole slide images that can't fit int RAM.

	Parameters
	----------
	page : TiffPage
	TIFF image file directory (IFD) from which the crop must be extracted.
	i0, j0: int
	Coordinates of the top left corner of the desired crop.
	h: int
	Desired crop height.
	w: int
	Desired crop width.

	Returns
	-------
	out : ndarray of shape (imagedepth, h, w, sampleperpixel)
	Extracted crop.

	"""

	if not page.is_tiled:
	raise ValueError("Input page must be tiled.")

	im_width = page.imagewidth
	im_height = page.imagelength

	if h < 1 or w < 1:
	raise ValueError("h and w must be strictly positive.")

	if i0 < 0 or j0 < 0 or i0 + h >= im_height or j0 + w >= im_width:
	raise ValueError("Requested crop area is out of image bounds.")

	tile_width, tile_height = page.tilewidth, page.tilelength
	i1, j1 = i0 + h, j0 + w

	tile_i0, tile_j0 = i0 // tile_height, j0 // tile_width
	tile_i1, tile_j1 = np.ceil([i1 / tile_height, j1 / tile_width]).astype(int)

	tile_per_line = int(np.ceil(im_width / tile_width))

	out = np.empty((page.imagedepth,
	(tile_i1 - tile_i0) * tile_height,
	(tile_j1 - tile_j0) * tile_width,
	page.samplesperpixel), dtype=page.dtype)

	fh = page.parent.filehandle

	jpegtables = page.tags.get('JPEGTables', None)
	if jpegtables is not None:
	jpegtables = jpegtables.value

	for i in range(tile_i0, tile_i1):
	for j in range(tile_j0, tile_j1):
	index = int(i * tile_per_line + j)

	offset = page.dataoffsets[index]
	bytecount = page.databytecounts[index]

	fh.seek(offset)
	data = fh.read(bytecount)
	tile, indices, shape = page.decode(data, index, jpegtables,
	jpegheader=page.jpegheader)

	im_i = (i - tile_i0) * tile_height
	im_j = (j - tile_j0) * tile_width
	out[:, im_i: im_i + tile_height, im_j: im_j + tile_width, :] = tile

	im_i0 = i0 - tile_i0 * tile_height
	im_j0 = j0 - tile_j0 * tile_width

	return out[:, im_i0: im_i0 + h, im_j0: im_j0 + w, :]