gatoatigrado/gist:96374dbafd3082f0b019

## gistfile1.py
# -*- coding: utf-8 -*-
"""
Testing tiff processing on the gpu
"""
from __future__ import absolute_import
from __future__ import print_function

import math


__author__ = 'gatoatigrado'


import numpy
# This library is really, really fast and supports 16-bit images.
# You should use it instead of PIL.
import tifffile
import pycuda.driver as cuda
import pycuda.compiler as compiler
import pycuda.autoinit


__pyflakes_ignore__ = [pycuda.autoinit]


kernel = compiler.SourceModule("""
texture<ushort4, cudaTextureType2D, cudaReadModeNormalizedFloat> tex;

__device__ inline unsigned short float_to_ushort(const float v) {
    return (unsigned short)(65535.0 * fminf(1.0, fmaxf(0.0, v)));
}

__device__ ushort4 float4_to_ushort4(const float4 v) {
    return make_ushort4(
        float_to_ushort(v.x),
        float_to_ushort(v.y),
        float_to_ushort(v.z),
        float_to_ushort(v.w)
    );
}

__device__ float4 read_texture(
    int w,
    int h,
    int x,
    int y
) {
    return tex2D(tex, x, y);
}

// blurs the red channel of an image
__global__ void blur_red(
    int w,
    int h,
    ushort4* output
) {
    const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
    const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;

    if ((x < w) && (y < h)) {
        float4 center = read_texture(w, h, x, y);
        float4 north = read_texture(w, h, x, y - 1);
        float4 east = read_texture(w, h, x + 1, y);
        float4 south = read_texture(w, h, x, y + 1);
        float4 west = read_texture(w, h, x, y - 1);
        center.x = (
           0.6 * center.x +
           0.1 * north.x +
           0.1 * east.x +
           0.1 * south.x +
           0.1 * west.x
        );
        output[y * w + x] = float4_to_ushort4(center);
    }
}
""")


texref = kernel.get_texref('tex')


kernel_fcn = kernel.get_function('blur_red')


def rgb_to_rgba(data):
    """
    Converts an RGB image to an RGBA one with 1's in the alpha channel.
    This is necessary for copying to a CUDA array, since CUDA arrays
    can only have 1, 2, or 4 channels (not 3).
    """
    height, width, num_channels = data.shape
    assert data.dtype == numpy.uint16, "Expected 16-bit image."
    if num_channels == 3:
        alpha_channel = (2 ** 16 - 1) * numpy.ones((height, width, 1), dtype=numpy.uint16)
        rgba = numpy.concatenate((data, alpha_channel), axis=2)
        assert rgba.shape == (height, width, num_channels + 1)
        return rgba
    else:
        assert num_channels == 4
        return data


def read_16_bit_image(filename):
    """Reads a numpy array with tifffile, and converts it to RGBA"""
    with tifffile.TIFFfile(filename) as tif:
        return rgb_to_rgba(tif.asarray())


def write_image(data, filename):
    """Writes an RGBA image to `filename`."""
    with tifffile.TiffWriter(filename=filename) as out:
        out.save(data)


def get_covering_grid_kernel_args(w, h):
    """Returns a grid (block and grid dimensions) that will cover
    an image of a given width and height."""
    ceil_div = lambda x, y: int(math.ceil(float(x) / float(y)))
    assert ceil_div(17, 16) == 2
    return {
        'block': (16, 16, 1),
        'grid': (ceil_div(w, 16), ceil_div(h, 16))
    }


def call_kernel(image, w, h):
    """Calls the blur_red() kernel. Does a bunch of texture setup.
    Modify with care!!

    :param image:
        CUDA array from a h x w x 4 image (4 for 4 channels, RGBA)
    :param w: image width
    :param h: image height
    """
    output = numpy.zeros((h, w, 4), dtype=numpy.uint16, order='C')
    texref.set_array(image)
    texref.set_address_mode(0, cuda.address_mode.CLAMP)
    texref.set_address_mode(1, cuda.address_mode.CLAMP)
    texref.set_filter_mode(cuda.filter_mode.LINEAR)
    # Note: If you use cudaReadModeElementType, you *need*
    # to do this!!
    # texref.set_flags(cuda.TRSF_READ_AS_INTEGER)
    kernel_fcn(
        numpy.int32(w),
        numpy.int32(h),
        cuda.InOut(output),
        texrefs=[texref],
        **get_covering_grid_kernel_args(w, h)
    )
    return output


def main():
    data = read_16_bit_image(u'test.tif')
    h, w, _ = data.shape
    device_array = cuda.make_multichannel_2d_array(data, 'C')

    output = call_kernel(device_array, w, h)
    assert output.dtype == data.dtype
    assert output.shape == data.shape
    write_image(data[:, :, 0:3], "original_input.tif")
    write_image(output, "output.tif")


if __name__ == "__main__":
    main()
	# -- coding: utf-8 --
	"""
	Testing tiff processing on the gpu
	"""
	from __future__ import absolute_import
	from __future__ import print_function

	import math


	__author__ = 'gatoatigrado'


	import numpy
	# This library is really, really fast and supports 16-bit images.
	# You should use it instead of PIL.
	import tifffile
	import pycuda.driver as cuda
	import pycuda.compiler as compiler
	import pycuda.autoinit


	__pyflakes_ignore__ = [pycuda.autoinit]


	kernel = compiler.SourceModule("""
	texture<ushort4, cudaTextureType2D, cudaReadModeNormalizedFloat> tex;

	__device__ inline unsigned short float_to_ushort(const float v) {
	return (unsigned short)(65535.0 * fminf(1.0, fmaxf(0.0, v)));
	}

	__device__ ushort4 float4_to_ushort4(const float4 v) {
	return make_ushort4(
	float_to_ushort(v.x),
	float_to_ushort(v.y),
	float_to_ushort(v.z),
	float_to_ushort(v.w)
	);
	}

	__device__ float4 read_texture(
	int w,
	int h,
	int x,
	int y
	) {
	return tex2D(tex, x, y);
	}

	// blurs the red channel of an image
	__global__ void blur_red(
	int w,
	int h,
	ushort4* output
	) {
	const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
	const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;

	if ((x < w) && (y < h)) {
	float4 center = read_texture(w, h, x, y);
	float4 north = read_texture(w, h, x, y - 1);
	float4 east = read_texture(w, h, x + 1, y);
	float4 south = read_texture(w, h, x, y + 1);
	float4 west = read_texture(w, h, x, y - 1);
	center.x = (
	0.6 * center.x +
	0.1 * north.x +
	0.1 * east.x +
	0.1 * south.x +
	0.1 * west.x
	);
	output[y * w + x] = float4_to_ushort4(center);
	}
	}
	""")


	texref = kernel.get_texref('tex')


	kernel_fcn = kernel.get_function('blur_red')


	def rgb_to_rgba(data):
	"""
	Converts an RGB image to an RGBA one with 1's in the alpha channel.
	This is necessary for copying to a CUDA array, since CUDA arrays
	can only have 1, 2, or 4 channels (not 3).
	"""
	height, width, num_channels = data.shape
	assert data.dtype == numpy.uint16, "Expected 16-bit image."
	if num_channels == 3:
	alpha_channel = (2 ** 16 - 1) * numpy.ones((height, width, 1), dtype=numpy.uint16)
	rgba = numpy.concatenate((data, alpha_channel), axis=2)
	assert rgba.shape == (height, width, num_channels + 1)
	return rgba
	else:
	assert num_channels == 4
	return data


	def read_16_bit_image(filename):
	"""Reads a numpy array with tifffile, and converts it to RGBA"""
	with tifffile.TIFFfile(filename) as tif:
	return rgb_to_rgba(tif.asarray())


	def write_image(data, filename):
	"""Writes an RGBA image to `filename`."""
	with tifffile.TiffWriter(filename=filename) as out:
	out.save(data)


	def get_covering_grid_kernel_args(w, h):
	"""Returns a grid (block and grid dimensions) that will cover
	an image of a given width and height."""
	ceil_div = lambda x, y: int(math.ceil(float(x) / float(y)))
	assert ceil_div(17, 16) == 2
	return {
	'block': (16, 16, 1),
	'grid': (ceil_div(w, 16), ceil_div(h, 16))
	}


	def call_kernel(image, w, h):
	"""Calls the blur_red() kernel. Does a bunch of texture setup.
	Modify with care!!

	:param image:
	CUDA array from a h x w x 4 image (4 for 4 channels, RGBA)
	:param w: image width
	:param h: image height
	"""
	output = numpy.zeros((h, w, 4), dtype=numpy.uint16, order='C')
	texref.set_array(image)
	texref.set_address_mode(0, cuda.address_mode.CLAMP)
	texref.set_address_mode(1, cuda.address_mode.CLAMP)
	texref.set_filter_mode(cuda.filter_mode.LINEAR)
	# Note: If you use cudaReadModeElementType, you need
	# to do this!!
	# texref.set_flags(cuda.TRSF_READ_AS_INTEGER)
	kernel_fcn(
	numpy.int32(w),
	numpy.int32(h),
	cuda.InOut(output),
	texrefs=[texref],
	**get_covering_grid_kernel_args(w, h)
	)
	return output


	def main():
	data = read_16_bit_image(u'test.tif')
	h, w, _ = data.shape
	device_array = cuda.make_multichannel_2d_array(data, 'C')

	output = call_kernel(device_array, w, h)
	assert output.dtype == data.dtype
	assert output.shape == data.shape
	write_image(data[:, :, 0:3], "original_input.tif")
	write_image(output, "output.tif")


	if __name__ == "__main__":
	main()