Skip to content

Instantly share code, notes, and snippets.

@fede-vaccaro
Created December 4, 2018 15:51
Show Gist options
  • Save fede-vaccaro/d26cc37b3752a88d514f1ad330b74f19 to your computer and use it in GitHub Desktop.
Save fede-vaccaro/d26cc37b3752a88d514f1ad330b74f19 to your computer and use it in GitHub Desktop.
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "cuda_runtime_api.h"
// useful defines
#ifndef USEFULDEFINES
#define USEFULDEFINES
#define Mask_width 3
#define Mask_radius Mask_width / 2
#define TILE_WIDTH 16
#define w (TILE_WIDTH + Mask_width - 1)
#define clamp(x) (min(max((x), 0.0), 1.0))
#endif // !
// CUDA Convolution script for bidimensional image, with multiple channel. This implementation uses shared memory!
__global__ void convolution(float *I, const float *__restrict__ M, float *P,
int channels, int width, int height) {
for (int k = 0; k < channels; k++) {
__shared__ float localPattern[w][w];
int tx = threadIdx.x; int ty = threadIdx.y;
// mapping to local pattern, including zero borders
int col = blockIdx.x*TILE_WIDTH + tx - Mask_radius;
int row = blockIdx.y*TILE_WIDTH + ty - Mask_radius;
// load local pattern into shared memory
if (col > -1 && col < width && row > -1 && row < height) {
localPattern[ty][tx] = I[(row*width + col) * channels + k];
}
else {
localPattern[ty][tx] = 0.0f;
}
__syncthreads();
// col, row are remapped to the output
col = blockIdx.x*TILE_WIDTH + tx;
row = blockIdx.y*TILE_WIDTH + ty;
float value = 0.0f;
bool isWriter = (tx > Mask_radius - 1 && tx < TILE_WIDTH + Mask_radius) && (ty > Mask_radius - 1 && ty < TILE_WIDTH + Mask_radius);
// computing
for (int i = 0; i < Mask_width; ++i) {
for (int j = 0; j < Mask_width; ++j) {
if (isWriter) {
value += localPattern[ty + i - Mask_radius][tx + j - Mask_radius] * M[i *Mask_width + j];
}
}
}
if (row < height && col < width && isWriter) {
P[(row*width + col) * channels + k] = clamp(value, 0.0, 1.0);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment