Skip to content

Instantly share code, notes, and snippets.

@gatoatigrado
Last active August 29, 2015 14:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gatoatigrado/96374dbafd3082f0b019 to your computer and use it in GitHub Desktop.
Save gatoatigrado/96374dbafd3082f0b019 to your computer and use it in GitHub Desktop.
Tiff processing on the GPU [ ntung.com/hacks/tiff-gpu-processing ]
# -*- coding: utf-8 -*-
"""
Testing tiff processing on the gpu
"""
from __future__ import absolute_import
from __future__ import print_function
import math
__author__ = 'gatoatigrado'
import numpy
# This library is really, really fast and supports 16-bit images.
# You should use it instead of PIL.
import tifffile
import pycuda.driver as cuda
import pycuda.compiler as compiler
import pycuda.autoinit
__pyflakes_ignore__ = [pycuda.autoinit]
kernel = compiler.SourceModule("""
texture<ushort4, cudaTextureType2D, cudaReadModeNormalizedFloat> tex;
__device__ inline unsigned short float_to_ushort(const float v) {
return (unsigned short)(65535.0 * fminf(1.0, fmaxf(0.0, v)));
}
__device__ ushort4 float4_to_ushort4(const float4 v) {
return make_ushort4(
float_to_ushort(v.x),
float_to_ushort(v.y),
float_to_ushort(v.z),
float_to_ushort(v.w)
);
}
__device__ float4 read_texture(
int w,
int h,
int x,
int y
) {
return tex2D(tex, x, y);
}
// blurs the red channel of an image
__global__ void blur_red(
int w,
int h,
ushort4* output
) {
const unsigned int x = blockDim.x * blockIdx.x + threadIdx.x;
const unsigned int y = blockDim.y * blockIdx.y + threadIdx.y;
if ((x < w) && (y < h)) {
float4 center = read_texture(w, h, x, y);
float4 north = read_texture(w, h, x, y - 1);
float4 east = read_texture(w, h, x + 1, y);
float4 south = read_texture(w, h, x, y + 1);
float4 west = read_texture(w, h, x, y - 1);
center.x = (
0.6 * center.x +
0.1 * north.x +
0.1 * east.x +
0.1 * south.x +
0.1 * west.x
);
output[y * w + x] = float4_to_ushort4(center);
}
}
""")
texref = kernel.get_texref('tex')
kernel_fcn = kernel.get_function('blur_red')
def rgb_to_rgba(data):
"""
Converts an RGB image to an RGBA one with 1's in the alpha channel.
This is necessary for copying to a CUDA array, since CUDA arrays
can only have 1, 2, or 4 channels (not 3).
"""
height, width, num_channels = data.shape
assert data.dtype == numpy.uint16, "Expected 16-bit image."
if num_channels == 3:
alpha_channel = (2 ** 16 - 1) * numpy.ones((height, width, 1), dtype=numpy.uint16)
rgba = numpy.concatenate((data, alpha_channel), axis=2)
assert rgba.shape == (height, width, num_channels + 1)
return rgba
else:
assert num_channels == 4
return data
def read_16_bit_image(filename):
"""Reads a numpy array with tifffile, and converts it to RGBA"""
with tifffile.TIFFfile(filename) as tif:
return rgb_to_rgba(tif.asarray())
def write_image(data, filename):
"""Writes an RGBA image to `filename`."""
with tifffile.TiffWriter(filename=filename) as out:
out.save(data)
def get_covering_grid_kernel_args(w, h):
"""Returns a grid (block and grid dimensions) that will cover
an image of a given width and height."""
ceil_div = lambda x, y: int(math.ceil(float(x) / float(y)))
assert ceil_div(17, 16) == 2
return {
'block': (16, 16, 1),
'grid': (ceil_div(w, 16), ceil_div(h, 16))
}
def call_kernel(image, w, h):
"""Calls the blur_red() kernel. Does a bunch of texture setup.
Modify with care!!
:param image:
CUDA array from a h x w x 4 image (4 for 4 channels, RGBA)
:param w: image width
:param h: image height
"""
output = numpy.zeros((h, w, 4), dtype=numpy.uint16, order='C')
texref.set_array(image)
texref.set_address_mode(0, cuda.address_mode.CLAMP)
texref.set_address_mode(1, cuda.address_mode.CLAMP)
texref.set_filter_mode(cuda.filter_mode.LINEAR)
# Note: If you use cudaReadModeElementType, you *need*
# to do this!!
# texref.set_flags(cuda.TRSF_READ_AS_INTEGER)
kernel_fcn(
numpy.int32(w),
numpy.int32(h),
cuda.InOut(output),
texrefs=[texref],
**get_covering_grid_kernel_args(w, h)
)
return output
def main():
data = read_16_bit_image(u'test.tif')
h, w, _ = data.shape
device_array = cuda.make_multichannel_2d_array(data, 'C')
output = call_kernel(device_array, w, h)
assert output.dtype == data.dtype
assert output.shape == data.shape
write_image(data[:, :, 0:3], "original_input.tif")
write_image(output, "output.tif")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment