Centrinia/ImageFilter2D.cl

## ImageFilter2D.cl

// Gaussian filter of image

__kernel void gaussian_filter(__read_only image2d_t srcImg,
                              __write_only image2d_t dstImg,
                              sampler_t sampler,
                              int width, int height)
{
    // Gaussian Kernel is:
    // 1  2  1
    // 2  4  2
    // 1  2  1
    float kernelWeights[9] = { 1.0f, 2.0f, 1.0f,
                               2.0f, 4.0f, 2.0f,
                               1.0f, 2.0f, 1.0f };

    int2 startImageCoord = (int2) (get_global_id(0) - 1, get_global_id(1) - 1);
    int2 endImageCoord   = (int2) (get_global_id(0) + 1, get_global_id(1) + 1);
    int2 outImageCoord = (int2) (get_global_id(0), get_global_id(1));
    printf("%d %d : %d %d\n", width, height, outImageCoord.x, outImageCoord.y);

    if (outImageCoord.x < width && outImageCoord.y < height)
    {
        int weight = 0;
        float4 outColor = (float4)(0.0f, 0.0f, 0.0f, 0.0f);
        for( int y = startImageCoord.y; y <= endImageCoord.y; y++)
        {
            for( int x = startImageCoord.x; x <= endImageCoord.x; x++)
            {
                outColor += (read_imagef(srcImg, sampler, (int2)(x, y)) * (kernelWeights[weight] / 16.0f));
                weight += 1;
            }
        }
        float4 centerColor = read_imagef(srcImg, sampler, outImageCoord);

        // Write the output value to image
        write_imagef(dstImg, outImageCoord, -(outColor-centerColor)*16);
    }
}

## ImageFilter2D.py
#
# Book:      OpenCL(R) Programming Guide
# Authors:   Aaftab Munshi, Benedict Gaster, Timothy Mattson, James Fung, Dan Ginsburg
# ISBN-10:   0-321-74964-2
# ISBN-13:   978-0-321-74964-2
# Publisher: Addison-Wesley Professional
# URLs:      http://safari.informit.com/9780132488006/
#            http://www.openclprogrammingguide.com
#

# ImageFilter2D.py
#
#    This example demonstrates performing gaussian filtering on a 2D image using
#    OpenCL.  This is the same as the OpenCL C example in Chapter 8, but ported to
#    Python


import pyopencl as cl
import sys
import Image # Python Image Library (PIL)
import numpy

#
#  Create an OpenCL context on the first available platform using
#  either a GPU or CPU depending on what is available.
#
def CreateContext():
    platforms = cl.get_platforms();
    if len(platforms) == 0:
        print "Failed to find any OpenCL platforms."
        return None

    # Next, create an OpenCL context on the first platform.  Attempt to
    # create a GPU-based context, and if that fails, try to create
    # a CPU-based context.
    devices = platforms[0].get_devices(cl.device_type.GPU)
    if len(devices) == 0:
        print "Could not find GPU device, trying CPU..."
        devices = platforms[0].get_devices(cl.device_type.CPU)
        if len(devices) == 0:
            print "Could not find OpenCL GPU or CPU device."
            return None

    # Create a context using the first device
    context = cl.Context([devices[0]])
    return context, devices[0]

#
#  Create an OpenCL program from the kernel source file
#
def CreateProgram(context, device, fileName):
    kernelFile = open(fileName, 'r')
    kernelStr = kernelFile.read()

    # Load the program source
    program = cl.Program(context, kernelStr)

    # Build the program and check for errors
    program.build(devices=[device])

    return program


#
#  Load an image using the Python Image Library and create an OpenCL
#  image out of it
#
def LoadImage(context, fileName):
    im = Image.open(fileName)
    # Make sure the image is RGBA formatted
    if im.mode != "RGBA":
        im = im.convert("RGBA")


    # Convert to uint8 buffer
    buffer = im.tostring()
    clImageFormat = cl.ImageFormat(cl.channel_order.RGBA,
                                   cl.channel_type.UNORM_INT8)

    clImage = cl.Image(context,
                       cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR,
                       clImageFormat,
                       im.size,
                       None,
                       buffer
                       )

    return clImage, im.size

#
#  Save an image using the Python Image Library (PIL)
#
def SaveImage(fileName, buffer, imgSize):
    im = Image.fromstring("RGBA", imgSize, buffer.tostring())
    im.save(fileName)

#
#  Round up to the nearest multiple of the group size
#
def RoundUp(groupSize, globalSize):
    r = globalSize % groupSize;
    if r == 0:
        return globalSize;
    else:
        return globalSize + groupSize - r;


def main():

    imageObjects = [ 0, 0 ]

    # Main
    if len(sys.argv) != 3:
        print "USAGE: " + sys.argv[0] + " <inputImageFile> <outputImageFile>"
        return 1


    # Create an OpenCL context on first available platform
    context, device = CreateContext();
    if context == None:
        print "Failed to create OpenCL context."
        return 1

    # Create a command-queue on the first device available
    # on the created context
    commandQueue = cl.CommandQueue(context, device)

    # Make sure the device supports images, otherwise exit
    if not device.get_info(cl.device_info.IMAGE_SUPPORT):
        print "OpenCL device does not support images."
        return 1

    # Load input image from file and load it into
    # an OpenCL image object
    imageObjects[0], imgSize = LoadImage(context, sys.argv[1])

    # Create ouput image object
    clImageFormat = cl.ImageFormat(cl.channel_order.RGBA,
                                   cl.channel_type.UNORM_INT8)
    imageObjects[1] = cl.Image(context,
                               cl.mem_flags.WRITE_ONLY,
                               clImageFormat,
                               imgSize)

    # Create sampler for sampling image object
    sampler = cl.Sampler(context,
                         False, #  Non-normalized coordinates
                         cl.addressing_mode.CLAMP_TO_EDGE,
                         cl.filter_mode.NEAREST)

    # Create OpenCL program
    program = CreateProgram(context, device, "ImageFilter2D.cl")

    # Call the kernel directly
    localWorkSize = ( 16, 16 )
    globalWorkSize = ( RoundUp(localWorkSize[0], imgSize[0]),
                       RoundUp(localWorkSize[1], imgSize[1]) )

    program.gaussian_filter(commandQueue,
                            globalWorkSize,
                            localWorkSize,
                            imageObjects[0],
                            imageObjects[1],
                            sampler,
                            numpy.int32(imgSize[0]),
                            numpy.int32(imgSize[1]))

    # Read the output buffer back to the Host
    buffer = numpy.zeros(imgSize[0] * imgSize[1] * 4, numpy.uint8)
    origin = ( 0, 0, 0 )
    region = ( imgSize[0], imgSize[1], 1 )

    cl.enqueue_read_image(commandQueue, imageObjects[1],
                          origin, region, buffer).wait()

    print "Executed program succesfully."

    # Save the image to disk
    SaveImage(sys.argv[2], buffer, imgSize)

main()

## ImageFilter2D3.py
#
# Book:      OpenCL(R) Programming Guide
# Authors:   Aaftab Munshi, Benedict Gaster, Timothy Mattson, James Fung, Dan Ginsburg
# ISBN-10:   0-321-74964-2
# ISBN-13:   978-0-321-74964-2
# Publisher: Addison-Wesley Professional
# URLs:      http://safari.informit.com/9780132488006/
#            http://www.openclprogrammingguide.com
#

# ImageFilter2D.py
#
#    This example demonstrates performing gaussian filtering on a 2D image using
#    OpenCL.  This is the same as the OpenCL C example in Chapter 8, but ported to
#    Python


import pyopencl as cl
import sys
import scipy.misc
import numpy

#
#  Create an OpenCL context on the first available platform using
#  either a GPU or CPU depending on what is available.
#
def CreateContext():
    platforms = cl.get_platforms();
    if len(platforms) == 0:
        print("Failed to find any OpenCL platforms.")
        return None

    # Next, create an OpenCL context on the first platform.  Attempt to
    # create a GPU-based context, and if that fails, try to create
    # a CPU-based context.
    devices = platforms[0].get_devices(cl.device_type.GPU)
    if len(devices) == 0:
        print("Could not find GPU device, trying CPU...")
        devices = platforms[0].get_devices(cl.device_type.CPU)
        if len(devices) == 0:
            print("Could not find OpenCL GPU or CPU device.")
            return None

    # Create a context using the first device
    context = cl.Context([devices[0]])
    return context, devices[0]

#
#  Create an OpenCL program from the kernel source file
#
def CreateProgram(context, device, fileName):
    kernelFile = open(fileName, 'r')
    kernelStr = kernelFile.read()

    # Load the program source
    program = cl.Program(context, kernelStr)

    # Build the program and check for errors
    program.build(devices=[device])

    return program


#
#  Load an image using the Python Image Library and create an OpenCL
#  image out of it
#
def LoadImage(context, fileName):
    im = scipy.misc.imread(fileName)
    # Make sure the image is RGBA formatted
    if im.shape[2] < 4:
        im = numpy.resize(im, im.shape[:2] + (4,))
        im[:,:,3] = 0


    # Convert to uint8 buffer
    clImageFormat = cl.ImageFormat(cl.channel_order.RGBA,
                                   cl.channel_type.UNORM_INT8)

    size = im.shape[:2]
    clImage = cl.Image(context,
                       cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR,
                       clImageFormat,
                       size,
                       None,
                       hostbuf=im
                       )

    return clImage, size

#
#  Save an image using the Python Image Library (PIL)
#
def SaveImage(fileName, buffer, imgSize):
    scipy.misc.imsave(fileName, buffer)

#
#  Round up to the nearest multiple of the group size
#
def RoundUp(groupSize, globalSize):
    r = globalSize % groupSize;
    if r == 0:
        return globalSize;
    else:
        return globalSize + groupSize - r;


def main():

    imageObjects = [ 0, 0 ]

    # Main
    if len(sys.argv) != 3:
        print("USAGE: " + sys.argv[0] + " <inputImageFile> <outputImageFile>")
        return 1


    # Create an OpenCL context on first available platform
    context, device = CreateContext();
    if context == None:
        print("Failed to create OpenCL context.")
        return 1

    # Create a command-queue on the first device available
    # on the created context
    commandQueue = cl.CommandQueue(context, device)

    # Make sure the device supports images, otherwise exit
    if not device.get_info(cl.device_info.IMAGE_SUPPORT):
        print("OpenCL device does not support images.")
        return 1

    # Load input image from file and load it into
    # an OpenCL image object
    imageObjects[0], imgSize = LoadImage(context, sys.argv[1])

    # Create ouput image object
    clImageFormat = cl.ImageFormat(cl.channel_order.RGBA,
                                   cl.channel_type.UNORM_INT8)
    imageObjects[1] = cl.Image(context,
                               cl.mem_flags.WRITE_ONLY,
                               clImageFormat,
                               imgSize)

    # Create sampler for sampling image object
    sampler = cl.Sampler(context,
                         False, #  Non-normalized coordinates
                         cl.addressing_mode.CLAMP_TO_EDGE,
                         cl.filter_mode.NEAREST)

    # Create OpenCL program
    program = CreateProgram(context, device, "ImageFilter2D.cl")

    # Call the kernel directly
    localWorkSize = ( 16, 16 )
    globalWorkSize = ( RoundUp(localWorkSize[0], imgSize[0]),
                       RoundUp(localWorkSize[1], imgSize[1]) )

    program.gaussian_filter(commandQueue,
                            globalWorkSize,
                            localWorkSize,
                            imageObjects[0],
                            imageObjects[1],
                            sampler,
                            numpy.int32(imgSize[0]),
                            numpy.int32(imgSize[1]))

    # Read the output buffer back to the Host
    buffer = numpy.zeros(imgSize[0] * imgSize[1] * 4, numpy.uint8)
    origin = ( 0, 0, 0 )
    region = ( imgSize[0], imgSize[1], 1 )

    cl.enqueue_read_image(commandQueue, imageObjects[1],
                          origin, region, buffer).wait()

    print("Executed program succesfully.")

    # Save the image to disk
    SaveImage(sys.argv[2], buffer, imgSize)

main()

## log
exc@lambda ~/src/examples/opencl/ex $ optirun python ImageFilter2D.py ~/images.jpg foobar.jpg | head -n 20
225 225 : 0 232
225 225 : 1 232
225 225 : 2 232
225 225 : 3 232
225 225 : 4 232
225 225 : 5 232
225 225 : 6 232
225 225 : 7 232
225 225 : 8 232
225 225 : 9 232
225 225 : 10 232
225 225 : 11 232
225 225 : 12 232
225 225 : 13 232
225 225 : 14 232
225 225 : 15 232
225 225 : 0 233
225 225 : 1 233
225 225 : 2 233
225 225 : 3 233
close failed in file object destructor:
sys.excepthook is missing
lost sys.stderr
exc@lambda ~/src/examples/opencl/ex $ optirun python3 ImageFilter2D3.py ~/images.jpg foobar.jpg | head -n 20
0 0 : 0 228
0 0 : 1 228
0 0 : 2 228
0 0 : 3 228
0 0 : 4 228
0 0 : 5 228
0 0 : 6 228
0 0 : 7 228
0 0 : 8 228
0 0 : 9 228
0 0 : 10 228
0 0 : 11 228
0 0 : 12 228
0 0 : 13 228
0 0 : 14 228
0 0 : 15 228
0 0 : 0 229
0 0 : 1 229
0 0 : 2 229
0 0 : 3 229
Traceback (most recent call last):
  File "ImageFilter2D3.py", line 182, in <module>
    main()
  File "ImageFilter2D3.py", line 180, in main
    SaveImage(sys.argv[2], buffer, imgSize)
  File "ImageFilter2D3.py", line 96, in SaveImage
    scipy.misc.imsave(fileName, buffer)
  File "/home/exc/.local/lib/python3.4/site-packages/scipy/misc/pilutil.py", line 166, in imsave
    im = toimage(arr)
  File "/home/exc/.local/lib/python3.4/site-packages/scipy/misc/pilutil.py", line 235, in toimage
    raise ValueError("'arr' does not have a suitable array shape for any mode.")
ValueError: 'arr' does not have a suitable array shape for any mode.
Exception ignored in: <_io.TextIOWrapper name='<stdout>' mode='w' encoding='UTF-8'>
BrokenPipeError: [Errno 32] Broken pipe
exc@lambda ~/src/examples/opencl/ex $ optirun python3 ImageFilter2D3.py ~/images.jpg foobar.jpg | head -n 20
0 0 : 32 230
0 0 : 33 230
0 0 : 34 230
0 0 : 35 230
0 0 : 36 230
0 0 : 37 230
0 0 : 38 230
0 0 : 39 230
0 0 : 40 230
0 0 : 41 230
0 0 : 42 230
0 0 : 43 230
0 0 : 44 230
0 0 : 45 230
0 0 : 46 230
0 0 : 47 230
0 0 : 32 231
0 0 : 33 231
0 0 : 34 231
0 0 : 35 231
Traceback (most recent call last):
  File "ImageFilter2D3.py", line 182, in <module>
    main()
  File "ImageFilter2D3.py", line 180, in main
    SaveImage(sys.argv[2], buffer, imgSize)
  File "ImageFilter2D3.py", line 96, in SaveImage
    scipy.misc.imsave(fileName, buffer)
  File "/home/exc/.local/lib/python3.4/site-packages/scipy/misc/pilutil.py", line 166, in imsave
    im = toimage(arr)
  File "/home/exc/.local/lib/python3.4/site-packages/scipy/misc/pilutil.py", line 235, in toimage
    raise ValueError("'arr' does not have a suitable array shape for any mode.")
ValueError: 'arr' does not have a suitable array shape for any mode.
Exception ignored in: <_io.TextIOWrapper name='<stdout>' mode='w' encoding='UTF-8'>
BrokenPipeError: [Errno 32] Broken pipe
exc@lambda ~/src/examples/opencl/ex $ optirun python3 ImageFilter2D3.py ~/images.jpg foobar.jpg | head -n 20
0 0 : 144 208
0 0 : 145 208
0 0 : 146 208
0 0 : 147 208
0 0 : 148 208
0 0 : 149 208
0 0 : 150 208
0 0 : 151 208
0 0 : 152 208
0 0 : 153 208
0 0 : 154 208
0 0 : 155 208
0 0 : 156 208
0 0 : 157 208
0 0 : 158 208
0 0 : 159 208
0 0 : 144 209
0 0 : 145 209
0 0 : 146 209
0 0 : 147 209
Traceback (most recent call last):
  File "ImageFilter2D3.py", line 182, in <module>
    main()
  File "ImageFilter2D3.py", line 180, in main
    SaveImage(sys.argv[2], buffer, imgSize)
  File "ImageFilter2D3.py", line 96, in SaveImage
    scipy.misc.imsave(fileName, buffer)
  File "/home/exc/.local/lib/python3.4/site-packages/scipy/misc/pilutil.py", line 166, in imsave
    im = toimage(arr)
  File "/home/exc/.local/lib/python3.4/site-packages/scipy/misc/pilutil.py", line 235, in toimage
    raise ValueError("'arr' does not have a suitable array shape for any mode.")
ValueError: 'arr' does not have a suitable array shape for any mode.
Exception ignored in: <_io.TextIOWrapper name='<stdout>' mode='w' encoding='UTF-8'>
BrokenPipeError: [Errno 32] Broken pipe
exc@lambda ~/src/examples/opencl/ex $ optirun python3 ImageFilter2D3.py ~/images.jpg foobar.jpg | head -n 20
51638640 51638640 : 80 12
51638640 51638640 : 81 12
51638640 51638640 : 82 12
51638640 51638640 : 83 12
51638640 51638640 : 84 12
51638640 51638640 : 85 12
51638640 51638640 : 86 12
51638640 51638640 : 87 12
51638640 51638640 : 88 12
51638640 51638640 : 89 12
51638640 51638640 : 90 12
51638640 51638640 : 91 12
51638640 51638640 : 92 12
51638640 51638640 : 93 12
51638640 51638640 : 94 12
51638640 51638640 : 95 12
51638640 51638640 : 80 13
51638640 51638640 : 81 13
51638640 51638640 : 82 13
51638640 51638640 : 83 13
Traceback (most recent call last):
  File "ImageFilter2D3.py", line 182, in <module>
    main()
  File "ImageFilter2D3.py", line 175, in main
    origin, region, buffer).wait()
  File "/usr/local/lib/python3.4/dist-packages/pyopencl/__init__.py", line 1351, in new_func
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.4/dist-packages/pyopencl/cffi_cl.py", line 1479, in _enqueue_read_image
    bool(is_blocking), NannyEvent._handle(hostbuf)))
  File "/usr/local/lib/python3.4/dist-packages/pyopencl/cffi_cl.py", line 549, in _handle_error
    raise e
pyopencl.cffi_cl.RuntimeError: clenqueuereadimage failed: OUT_OF_RESOURCES

## patch
--- ImageFilter2D.py	2016-01-15 09:00:53.932296609 -0500
+++ ImageFilter2D3.py	2016-01-15 09:13:38.436274867 -0500
@@ -17,7 +17,7 @@

 import pyopencl as cl
 import sys
-import Image # Python Image Library (PIL)
+import scipy.misc
 import numpy

 #
@@ -27,7 +27,7 @@
 def CreateContext():
     platforms = cl.get_platforms();
     if len(platforms) == 0:
-        print "Failed to find any OpenCL platforms."
+        print("Failed to find any OpenCL platforms.")
         return None

     # Next, create an OpenCL context on the first platform.  Attempt to
@@ -35,10 +35,10 @@
     # a CPU-based context.
     devices = platforms[0].get_devices(cl.device_type.GPU)
     if len(devices) == 0:
-        print "Could not find GPU device, trying CPU..."
+        print("Could not find GPU device, trying CPU...")
         devices = platforms[0].get_devices(cl.device_type.CPU)
         if len(devices) == 0:
-            print "Could not find OpenCL GPU or CPU device."
+            print("Could not find OpenCL GPU or CPU device.")
             return None

     # Create a context using the first device
@@ -67,33 +67,33 @@
 #  image out of it
 #
 def LoadImage(context, fileName):
-    im = Image.open(fileName)
+    im = scipy.misc.imread(fileName)
     # Make sure the image is RGBA formatted
-    if im.mode != "RGBA":
-        im = im.convert("RGBA")
-
+    if im.shape[2] < 4:
+        im = numpy.resize(im, im.shape[:2] + (4,))
+        im[:,:,3] = 0
+

     # Convert to uint8 buffer
-    buffer = im.tostring()
     clImageFormat = cl.ImageFormat(cl.channel_order.RGBA,
                                    cl.channel_type.UNORM_INT8)

+    size = im.shape[:2]
     clImage = cl.Image(context,
                        cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR,
                        clImageFormat,
-                       im.size,
+                       size,
                        None,
-                       buffer
+                       hostbuf=im
                        )

-    return clImage, im.size
+    return clImage, size

 #
 #  Save an image using the Python Image Library (PIL)
 #
 def SaveImage(fileName, buffer, imgSize):
-    im = Image.fromstring("RGBA", imgSize, buffer.tostring())
-    im.save(fileName)
+    scipy.misc.imsave(fileName, buffer)

 #
 #  Round up to the nearest multiple of the group size
@@ -112,14 +112,14 @@

     # Main
     if len(sys.argv) != 3:
-        print "USAGE: " + sys.argv[0] + " <inputImageFile> <outputImageFile>"
+        print("USAGE: " + sys.argv[0] + " <inputImageFile> <outputImageFile>")
         return 1


     # Create an OpenCL context on first available platform
     context, device = CreateContext();
     if context == None:
-        print "Failed to create OpenCL context."
+        print("Failed to create OpenCL context.")
         return 1

     # Create a command-queue on the first device available
@@ -128,7 +128,7 @@

     # Make sure the device supports images, otherwise exit
     if not device.get_info(cl.device_info.IMAGE_SUPPORT):
-        print "OpenCL device does not support images."
+        print("OpenCL device does not support images.")
         return 1

     # Load input image from file and load it into
@@ -174,7 +174,7 @@
     cl.enqueue_read_image(commandQueue, imageObjects[1],
                           origin, region, buffer).wait()

-    print "Executed program succesfully."
+    print("Executed program succesfully.")

     # Save the image to disk
     SaveImage(sys.argv[2], buffer, imgSize)

	// Gaussian filter of image

	__kernel void gaussian_filter(__read_only image2d_t srcImg,
	__write_only image2d_t dstImg,
	sampler_t sampler,
	int width, int height)
	{
	// Gaussian Kernel is:
	// 1 2 1
	// 2 4 2
	// 1 2 1
	float kernelWeights[9] = { 1.0f, 2.0f, 1.0f,
	2.0f, 4.0f, 2.0f,
	1.0f, 2.0f, 1.0f };

	int2 startImageCoord = (int2) (get_global_id(0) - 1, get_global_id(1) - 1);
	int2 endImageCoord = (int2) (get_global_id(0) + 1, get_global_id(1) + 1);
	int2 outImageCoord = (int2) (get_global_id(0), get_global_id(1));
	printf("%d %d : %d %d\n", width, height, outImageCoord.x, outImageCoord.y);

	if (outImageCoord.x < width && outImageCoord.y < height)
	{
	int weight = 0;
	float4 outColor = (float4)(0.0f, 0.0f, 0.0f, 0.0f);
	for( int y = startImageCoord.y; y <= endImageCoord.y; y++)
	{
	for( int x = startImageCoord.x; x <= endImageCoord.x; x++)
	{
	outColor += (read_imagef(srcImg, sampler, (int2)(x, y)) * (kernelWeights[weight] / 16.0f));
	weight += 1;
	}
	}
	float4 centerColor = read_imagef(srcImg, sampler, outImageCoord);

	// Write the output value to image
	write_imagef(dstImg, outImageCoord, -(outColor-centerColor)*16);
	}
	}
	#
	# Book: OpenCL(R) Programming Guide
	# Authors: Aaftab Munshi, Benedict Gaster, Timothy Mattson, James Fung, Dan Ginsburg
	# ISBN-10: 0-321-74964-2
	# ISBN-13: 978-0-321-74964-2
	# Publisher: Addison-Wesley Professional
	# URLs: http://safari.informit.com/9780132488006/
	# http://www.openclprogrammingguide.com
	#

	# ImageFilter2D.py
	#
	# This example demonstrates performing gaussian filtering on a 2D image using
	# OpenCL. This is the same as the OpenCL C example in Chapter 8, but ported to
	# Python


	import pyopencl as cl
	import sys
	import Image # Python Image Library (PIL)
	import numpy

	#
	# Create an OpenCL context on the first available platform using
	# either a GPU or CPU depending on what is available.
	#
	def CreateContext():
	platforms = cl.get_platforms();
	if len(platforms) == 0:
	print "Failed to find any OpenCL platforms."
	return None

	# Next, create an OpenCL context on the first platform. Attempt to
	# create a GPU-based context, and if that fails, try to create
	# a CPU-based context.
	devices = platforms[0].get_devices(cl.device_type.GPU)
	if len(devices) == 0:
	print "Could not find GPU device, trying CPU..."
	devices = platforms[0].get_devices(cl.device_type.CPU)
	if len(devices) == 0:
	print "Could not find OpenCL GPU or CPU device."
	return None

	# Create a context using the first device
	context = cl.Context([devices[0]])
	return context, devices[0]

	#
	# Create an OpenCL program from the kernel source file
	#
	def CreateProgram(context, device, fileName):
	kernelFile = open(fileName, 'r')
	kernelStr = kernelFile.read()

	# Load the program source
	program = cl.Program(context, kernelStr)

	# Build the program and check for errors
	program.build(devices=[device])

	return program



	#
	# Load an image using the Python Image Library and create an OpenCL
	# image out of it
	#
	def LoadImage(context, fileName):
	im = Image.open(fileName)
	# Make sure the image is RGBA formatted
	if im.mode != "RGBA":
	im = im.convert("RGBA")


	# Convert to uint8 buffer
	buffer = im.tostring()
	clImageFormat = cl.ImageFormat(cl.channel_order.RGBA,
	cl.channel_type.UNORM_INT8)

	clImage = cl.Image(context,
	cl.mem_flags.READ_ONLY \| cl.mem_flags.COPY_HOST_PTR,
	clImageFormat,
	im.size,
	None,
	buffer
	)

	return clImage, im.size

	#
	# Save an image using the Python Image Library (PIL)
	#
	def SaveImage(fileName, buffer, imgSize):
	im = Image.fromstring("RGBA", imgSize, buffer.tostring())
	im.save(fileName)

	#
	# Round up to the nearest multiple of the group size
	#
	def RoundUp(groupSize, globalSize):
	r = globalSize % groupSize;
	if r == 0:
	return globalSize;
	else:
	return globalSize + groupSize - r;


	def main():

	imageObjects = [ 0, 0 ]

	# Main
	if len(sys.argv) != 3:
	print "USAGE: " + sys.argv[0] + " <inputImageFile> <outputImageFile>"
	return 1


	# Create an OpenCL context on first available platform
	context, device = CreateContext();
	if context == None:
	print "Failed to create OpenCL context."
	return 1

	# Create a command-queue on the first device available
	# on the created context
	commandQueue = cl.CommandQueue(context, device)

	# Make sure the device supports images, otherwise exit
	if not device.get_info(cl.device_info.IMAGE_SUPPORT):
	print "OpenCL device does not support images."
	return 1

	# Load input image from file and load it into
	# an OpenCL image object
	imageObjects[0], imgSize = LoadImage(context, sys.argv[1])

	# Create ouput image object
	clImageFormat = cl.ImageFormat(cl.channel_order.RGBA,
	cl.channel_type.UNORM_INT8)
	imageObjects[1] = cl.Image(context,
	cl.mem_flags.WRITE_ONLY,
	clImageFormat,
	imgSize)

	# Create sampler for sampling image object
	sampler = cl.Sampler(context,
	False, # Non-normalized coordinates
	cl.addressing_mode.CLAMP_TO_EDGE,
	cl.filter_mode.NEAREST)

	# Create OpenCL program
	program = CreateProgram(context, device, "ImageFilter2D.cl")

	# Call the kernel directly
	localWorkSize = ( 16, 16 )
	globalWorkSize = ( RoundUp(localWorkSize[0], imgSize[0]),
	RoundUp(localWorkSize[1], imgSize[1]) )

	program.gaussian_filter(commandQueue,
	globalWorkSize,
	localWorkSize,
	imageObjects[0],
	imageObjects[1],
	sampler,
	numpy.int32(imgSize[0]),
	numpy.int32(imgSize[1]))

	# Read the output buffer back to the Host
	buffer = numpy.zeros(imgSize[0] * imgSize[1] * 4, numpy.uint8)
	origin = ( 0, 0, 0 )
	region = ( imgSize[0], imgSize[1], 1 )

	cl.enqueue_read_image(commandQueue, imageObjects[1],
	origin, region, buffer).wait()

	print "Executed program succesfully."

	# Save the image to disk
	SaveImage(sys.argv[2], buffer, imgSize)

	main()
	exc@lambda ~/src/examples/opencl/ex $ optirun python ImageFilter2D.py ~/images.jpg foobar.jpg \| head -n 20
	225 225 : 0 232
	225 225 : 1 232
	225 225 : 2 232
	225 225 : 3 232
	225 225 : 4 232
	225 225 : 5 232
	225 225 : 6 232
	225 225 : 7 232
	225 225 : 8 232
	225 225 : 9 232
	225 225 : 10 232
	225 225 : 11 232
	225 225 : 12 232
	225 225 : 13 232
	225 225 : 14 232
	225 225 : 15 232
	225 225 : 0 233
	225 225 : 1 233
	225 225 : 2 233
	225 225 : 3 233
	close failed in file object destructor:
	sys.excepthook is missing
	lost sys.stderr
	exc@lambda ~/src/examples/opencl/ex $ optirun python3 ImageFilter2D3.py ~/images.jpg foobar.jpg \| head -n 20
	0 0 : 0 228
	0 0 : 1 228
	0 0 : 2 228
	0 0 : 3 228
	0 0 : 4 228
	0 0 : 5 228
	0 0 : 6 228
	0 0 : 7 228
	0 0 : 8 228
	0 0 : 9 228
	0 0 : 10 228
	0 0 : 11 228
	0 0 : 12 228
	0 0 : 13 228
	0 0 : 14 228
	0 0 : 15 228
	0 0 : 0 229
	0 0 : 1 229
	0 0 : 2 229
	0 0 : 3 229
	Traceback (most recent call last):
	File "ImageFilter2D3.py", line 182, in <module>
	main()
	File "ImageFilter2D3.py", line 180, in main
	SaveImage(sys.argv[2], buffer, imgSize)
	File "ImageFilter2D3.py", line 96, in SaveImage
	scipy.misc.imsave(fileName, buffer)
	File "/home/exc/.local/lib/python3.4/site-packages/scipy/misc/pilutil.py", line 166, in imsave
	im = toimage(arr)
	File "/home/exc/.local/lib/python3.4/site-packages/scipy/misc/pilutil.py", line 235, in toimage
	raise ValueError("'arr' does not have a suitable array shape for any mode.")
	ValueError: 'arr' does not have a suitable array shape for any mode.
	Exception ignored in: <_io.TextIOWrapper name='<stdout>' mode='w' encoding='UTF-8'>
	BrokenPipeError: [Errno 32] Broken pipe
	exc@lambda ~/src/examples/opencl/ex $ optirun python3 ImageFilter2D3.py ~/images.jpg foobar.jpg \| head -n 20
	0 0 : 32 230
	0 0 : 33 230
	0 0 : 34 230
	0 0 : 35 230
	0 0 : 36 230
	0 0 : 37 230
	0 0 : 38 230
	0 0 : 39 230
	0 0 : 40 230
	0 0 : 41 230
	0 0 : 42 230
	0 0 : 43 230
	0 0 : 44 230
	0 0 : 45 230
	0 0 : 46 230
	0 0 : 47 230
	0 0 : 32 231
	0 0 : 33 231
	0 0 : 34 231
	0 0 : 35 231
	Traceback (most recent call last):
	File "ImageFilter2D3.py", line 182, in <module>
	main()
	File "ImageFilter2D3.py", line 180, in main
	SaveImage(sys.argv[2], buffer, imgSize)
	File "ImageFilter2D3.py", line 96, in SaveImage
	scipy.misc.imsave(fileName, buffer)
	File "/home/exc/.local/lib/python3.4/site-packages/scipy/misc/pilutil.py", line 166, in imsave
	im = toimage(arr)
	File "/home/exc/.local/lib/python3.4/site-packages/scipy/misc/pilutil.py", line 235, in toimage
	raise ValueError("'arr' does not have a suitable array shape for any mode.")
	ValueError: 'arr' does not have a suitable array shape for any mode.
	Exception ignored in: <_io.TextIOWrapper name='<stdout>' mode='w' encoding='UTF-8'>
	BrokenPipeError: [Errno 32] Broken pipe
	exc@lambda ~/src/examples/opencl/ex $ optirun python3 ImageFilter2D3.py ~/images.jpg foobar.jpg \| head -n 20
	0 0 : 144 208
	0 0 : 145 208
	0 0 : 146 208
	0 0 : 147 208
	0 0 : 148 208
	0 0 : 149 208
	0 0 : 150 208
	0 0 : 151 208
	0 0 : 152 208
	0 0 : 153 208
	0 0 : 154 208
	0 0 : 155 208
	0 0 : 156 208
	0 0 : 157 208
	0 0 : 158 208
	0 0 : 159 208
	0 0 : 144 209
	0 0 : 145 209
	0 0 : 146 209
	0 0 : 147 209
	Traceback (most recent call last):
	File "ImageFilter2D3.py", line 182, in <module>
	main()
	File "ImageFilter2D3.py", line 180, in main
	SaveImage(sys.argv[2], buffer, imgSize)
	File "ImageFilter2D3.py", line 96, in SaveImage
	scipy.misc.imsave(fileName, buffer)
	File "/home/exc/.local/lib/python3.4/site-packages/scipy/misc/pilutil.py", line 166, in imsave
	im = toimage(arr)
	File "/home/exc/.local/lib/python3.4/site-packages/scipy/misc/pilutil.py", line 235, in toimage
	raise ValueError("'arr' does not have a suitable array shape for any mode.")
	ValueError: 'arr' does not have a suitable array shape for any mode.
	Exception ignored in: <_io.TextIOWrapper name='<stdout>' mode='w' encoding='UTF-8'>
	BrokenPipeError: [Errno 32] Broken pipe
	exc@lambda ~/src/examples/opencl/ex $ optirun python3 ImageFilter2D3.py ~/images.jpg foobar.jpg \| head -n 20
	51638640 51638640 : 80 12
	51638640 51638640 : 81 12
	51638640 51638640 : 82 12
	51638640 51638640 : 83 12
	51638640 51638640 : 84 12
	51638640 51638640 : 85 12
	51638640 51638640 : 86 12
	51638640 51638640 : 87 12
	51638640 51638640 : 88 12
	51638640 51638640 : 89 12
	51638640 51638640 : 90 12
	51638640 51638640 : 91 12
	51638640 51638640 : 92 12
	51638640 51638640 : 93 12
	51638640 51638640 : 94 12
	51638640 51638640 : 95 12
	51638640 51638640 : 80 13
	51638640 51638640 : 81 13
	51638640 51638640 : 82 13
	51638640 51638640 : 83 13
	Traceback (most recent call last):
	File "ImageFilter2D3.py", line 182, in <module>
	main()
	File "ImageFilter2D3.py", line 175, in main
	origin, region, buffer).wait()
	File "/usr/local/lib/python3.4/dist-packages/pyopencl/__init__.py", line 1351, in new_func
	return func(args, *kwargs)
	File "/usr/local/lib/python3.4/dist-packages/pyopencl/cffi_cl.py", line 1479, in _enqueue_read_image
	bool(is_blocking), NannyEvent._handle(hostbuf)))
	File "/usr/local/lib/python3.4/dist-packages/pyopencl/cffi_cl.py", line 549, in _handle_error
	raise e
	pyopencl.cffi_cl.RuntimeError: clenqueuereadimage failed: OUT_OF_RESOURCES
	--- ImageFilter2D.py 2016-01-15 09:00:53.932296609 -0500
	+++ ImageFilter2D3.py 2016-01-15 09:13:38.436274867 -0500
	@@ -17,7 +17,7 @@

	import pyopencl as cl
	import sys
	-import Image # Python Image Library (PIL)
	+import scipy.misc
	import numpy

	#
	@@ -27,7 +27,7 @@
	def CreateContext():
	platforms = cl.get_platforms();
	if len(platforms) == 0:
	- print "Failed to find any OpenCL platforms."
	+ print("Failed to find any OpenCL platforms.")
	return None

	# Next, create an OpenCL context on the first platform. Attempt to
	@@ -35,10 +35,10 @@
	# a CPU-based context.
	devices = platforms[0].get_devices(cl.device_type.GPU)
	if len(devices) == 0:
	- print "Could not find GPU device, trying CPU..."
	+ print("Could not find GPU device, trying CPU...")
	devices = platforms[0].get_devices(cl.device_type.CPU)
	if len(devices) == 0:
	- print "Could not find OpenCL GPU or CPU device."
	+ print("Could not find OpenCL GPU or CPU device.")
	return None

	# Create a context using the first device
	@@ -67,33 +67,33 @@
	# image out of it
	#
	def LoadImage(context, fileName):
	- im = Image.open(fileName)
	+ im = scipy.misc.imread(fileName)
	# Make sure the image is RGBA formatted
	- if im.mode != "RGBA":
	- im = im.convert("RGBA")
	-
	+ if im.shape[2] < 4:
	+ im = numpy.resize(im, im.shape[:2] + (4,))
	+ im[:,:,3] = 0
	+

	# Convert to uint8 buffer
	- buffer = im.tostring()
	clImageFormat = cl.ImageFormat(cl.channel_order.RGBA,
	cl.channel_type.UNORM_INT8)

	+ size = im.shape[:2]
	clImage = cl.Image(context,
	cl.mem_flags.READ_ONLY \| cl.mem_flags.COPY_HOST_PTR,
	clImageFormat,
	- im.size,
	+ size,
	None,
	- buffer
	+ hostbuf=im
	)

	- return clImage, im.size
	+ return clImage, size

	#
	# Save an image using the Python Image Library (PIL)
	#
	def SaveImage(fileName, buffer, imgSize):
	- im = Image.fromstring("RGBA", imgSize, buffer.tostring())
	- im.save(fileName)
	+ scipy.misc.imsave(fileName, buffer)

	#
	# Round up to the nearest multiple of the group size
	@@ -112,14 +112,14 @@

	# Main
	if len(sys.argv) != 3:
	- print "USAGE: " + sys.argv[0] + " <inputImageFile> <outputImageFile>"
	+ print("USAGE: " + sys.argv[0] + " <inputImageFile> <outputImageFile>")
	return 1


	# Create an OpenCL context on first available platform
	context, device = CreateContext();
	if context == None:
	- print "Failed to create OpenCL context."
	+ print("Failed to create OpenCL context.")
	return 1

	# Create a command-queue on the first device available
	@@ -128,7 +128,7 @@

	# Make sure the device supports images, otherwise exit
	if not device.get_info(cl.device_info.IMAGE_SUPPORT):
	- print "OpenCL device does not support images."
	+ print("OpenCL device does not support images.")
	return 1

	# Load input image from file and load it into
	@@ -174,7 +174,7 @@
	cl.enqueue_read_image(commandQueue, imageObjects[1],
	origin, region, buffer).wait()

	- print "Executed program succesfully."
	+ print("Executed program succesfully.")

	# Save the image to disk
	SaveImage(sys.argv[2], buffer, imgSize)