Skip to content

Instantly share code, notes, and snippets.

@maweigert
Created July 29, 2021 21:19
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save maweigert/f8ac56aa6e01db9bf8fc393188ed4b3e to your computer and use it in GitHub Desktop.
Save maweigert/f8ac56aa6e01db9bf8fc393188ed4b3e to your computer and use it in GitHub Desktop.
Average-Downsampling example in OpenCL (via gputools)
# Average-Downsampling example in OpenCL (via gputools)
# please install it first via
# pip install gputools
import numpy as np
from gputools import OCLProgram, OCLArray, get_device
from timeit import default_timer
from skimage.transform import downscale_local_mean
# opencl kernel
kernel="""
__kernel void downsample2d(__global short * input,
__global short * output){
int i = get_global_id(0);
int j = get_global_id(1);
int Nx = get_global_size(0);
int Ny = get_global_size(1);
int res = 0;
for (int m = 0; m < BLOCK; ++m)
for (int n = 0; n < BLOCK; ++n)
res+=input[BLOCK*Nx*(BLOCK*j+m)+BLOCK*i+n];
output[Nx*j+i] = (short)(res/BLOCK/BLOCK);
}
"""
downsample_factor=4
prog = OCLProgram(src_str=kernel, build_options=['-D',f'BLOCK={downsample_factor}'])
shape=(10000,)*2
x = np.random.randint(0,1000,shape).astype(np.uint16)
# flush device queue to not bias timings
get_device().queue.finish()
# the actual downsampling
t = default_timer()
x_g = OCLArray.from_array(x)
y_g = OCLArray.empty(tuple(s//downsample_factor for s in x.shape), x.dtype)
prog.run_kernel(f'downsample2d', y_g.shape[::-1], None, x_g.data, y_g.data)
y = y_g.get()
t = (default_timer()-t)
print(f'image shape {x.shape}')
print(f'downsample factor {downsample_factor}')
print(f'runtime {1000*t:.2f} ms')
print(f'throughput {x.nbytes/1e6/t:.2f} MB/s')
# compare with scikit image
y0 = downscale_local_mean(x,(downsample_factor,)*x.ndim).astype(x.dtype)
print(f'\nclose to skimage? {np.allclose(y,y0)}')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment