Skip to content

Instantly share code, notes, and snippets.

@yousefhamza
Last active December 3, 2015 11:52
Show Gist options
  • Save yousefhamza/eae794e47bd941b7b932 to your computer and use it in GitHub Desktop.
Save yousefhamza/eae794e47bd941b7b932 to your computer and use it in GitHub Desktop.
Soble algorithm in Cuda
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#define BLOCKSIZE 2
#define THREADSIZE 3
void cpu_convolve(float *image, float *filter, float *result, unsigned int N) {
float sum;
for (unsigned int i = 1; i < N - 1; i++)
for (unsigned int j = 1; j < N - 1; j++) {
sum = 0;
sum += image[(i - 1) * N + j - 1] * filter[0] + image[(i - 1) * N + j] * filter[1] + image[(i - 1) * N + j + 1] * filter[2]
+ image[i * N + j - 1] * filter[3] + image[i * N + j] * filter[4] + image[i * N + j + 1] * filter[5]
+ image[(i + 1) * N + j - 1] * filter[6] + image[(i + 1) * N + j] * filter[7] + image[(i + 1) * N + j + 1] * filter[8];
result[i * N + j] = sum;
}
}
__global__ void gpu_convolve(float *image, float *filter, float *result, unsigned int N) {
__shared__ float s_filter[9];
for (unsigned char i = 0; i < 9; ++i) s_filter[i] = filter[i];
__syncthreads();
float sum = 0;
int j = threadIdx.x + blockDim.x * blockIdx.x + 1;
int i = threadIdx.y + blockDim.y * blockIdx.y + 1;
/* Add image to shared memory */
__shared__ float s_image[BLOCKSIZE + 2][BLOCKSIZE + 2];
int ty = threadIdx.x + 1; int tx = threadIdx.y + 1;
s_image[tx][ty] = image[i * N + j];
// Handling corners
if (threadIdx.x == 0 &&
threadIdx.y == 0) s_image[tx - 1][ty - 1] = image[(i - 1) * N + j - 1];
else if (threadIdx.x == THREADSIZE - 1 &&
threadIdx.y == THREADSIZE - 1) s_image[tx + 1][ty + 1] = image[(i + 1) * N + j + 1];
else if (threadIdx.x == 0 &&
threadIdx.y == THREADSIZE - 1) s_image[tx + 1][ty - 1] = image[(i + 1) * N + j - 1];
else if (threadIdx.y == 0 &&
threadIdx.x == THREADSIZE - 1) s_image[tx - 1][ty + 1] = image[(i - 1) * N + j + 1];
// Handling edges
if (threadIdx.x == 0) s_image[tx][ty - 1] = image[i * N + j - 1];
if (threadIdx.y == 0) s_image[tx - 1][ty] = image[(i - 1) * N + j];
if (threadIdx.x == THREADSIZE - 1) s_image[tx][ty + 1] = image[i * N + j + 1];
if (threadIdx.y == THREADSIZE - 1) s_image[tx + 1][ty] = image[(i + 1) * N + j];
__syncthreads();
sum = s_image[i - 1][j - 1] * s_filter[0] + s_image[i - 1][j] * s_filter[1] + s_image[i - 1][j + 1] * s_filter[2]
+ s_image[i][j - 1] * s_filter[3] + s_image[i][j] * s_filter[4] + s_image[i][j + 1] * s_filter[5]
+ s_image[i + 1][j - 1] * s_filter[6] + s_image[i + 1][j] * s_filter[7] + s_image[i + 1][j + 1] * s_filter[8];
result[i * N + j] = sum;
}
// decearing the cuda Constant filter of size 9
__constant__ float dev_filter[9];
__global__ void gpu_convolve_constant_filter(float *image, float *result, unsigned int N) {
float sum = 0;
int j = threadIdx.x + blockDim.x * blockIdx.x + 1;
int i = threadIdx.y + blockDim.y * blockIdx.y + 1;
/* Add image to shared memory */
__shared__ float s_image[BLOCKSIZE + 2][BLOCKSIZE + 2];
int ty = threadIdx.x + 1; int tx = threadIdx.y + 1;
s_image[tx][ty] = image[i * N + j];
// Handling corners
if (threadIdx.x == 0 &&
threadIdx.y == 0) s_image[tx - 1][ty - 1] = image[(i - 1) * N + j - 1];
else if (threadIdx.x == THREADSIZE - 1 &&
threadIdx.y == THREADSIZE - 1) s_image[tx + 1][ty + 1] = image[(i + 1) * N + j + 1];
else if (threadIdx.x == 0 &&
threadIdx.y == THREADSIZE - 1) s_image[tx + 1][ty - 1] = image[(i + 1) * N + j - 1];
else if (threadIdx.y == 0 &&
threadIdx.x == THREADSIZE - 1) s_image[tx - 1][ty + 1] = image[(i - 1) * N + j + 1];
// Handling edges
if (threadIdx.x == 0) s_image[tx][ty - 1] = image[i * N + j - 1];
if (threadIdx.y == 0) s_image[tx - 1][ty] = image[(i - 1) * N + j];
if (threadIdx.x == THREADSIZE - 1) s_image[tx][ty + 1] = image[i * N + j + 1];
if (threadIdx.y == THREADSIZE - 1) s_image[tx + 1][ty] = image[(i + 1) * N + j];
__syncthreads();
sum = s_image[i - 1][j - 1] * dev_filter[0] + s_image[i - 1][j] * dev_filter[1] + s_image[i - 1][j + 1] * dev_filter[2]
+ s_image[i][j - 1] * dev_filter[3] + s_image[i][j] * dev_filter[4] + s_image[i][j + 1] * dev_filter[5]
+ s_image[i + 1][j - 1] * dev_filter[6] + s_image[i + 1][j] * dev_filter[7] + s_image[i + 1][j + 1] * dev_filter[8];
result[i * N + j] = sum;
}
void print_matrix(float arr[], unsigned int N) {
for (unsigned int i = 1; i < N - 1; ++i) {
for (unsigned int j = 1; j < N - 1; ++j) {
printf("%10f", arr[i * N + j]);
}
printf("\n");
}
}
int main(int argc, char *argv[]) {
int Grid_Dim_x = BLOCKSIZE, Grid_Dim_y = BLOCKSIZE;
int Block_Dim_x = THREADSIZE, Block_Dim_y = THREADSIZE;
const unsigned int N = 4;
float image[] = { 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 0,
0, 1, 1, 1, 1, 0,
0, 1, 1, 1, 1, 0,
0, 1, 1, 1, 1, 0,
0, 0, 0, 0, 0, 0 };
// host copy for the filter
float filter[] = { 1, 1, 1,
1, 1, 1,
1, 1, 1 };
float result[(N + 2) * (N + 2)];
float d_result_cpu[(N + 2) * (N + 2)];
dim3 Grid(Grid_Dim_x, Grid_Dim_y);
dim3 Block(Block_Dim_x, Block_Dim_y);
float *d_image, *d_filter, *d_result;
cudaMalloc((void**)&d_image, sizeof(image));
cudaMalloc((void**)&d_filter, sizeof(filter));
cudaMalloc((void**)&d_result, sizeof(result));
cudaMemcpy(d_image, image, sizeof(image), cudaMemcpyHostToDevice);
cudaMemcpy(d_filter, filter, sizeof(filter), cudaMemcpyHostToDevice);
cpu_convolve(image, filter, result, N + 2);
// normal gpu_convolve function
gpu_convolve << <Grid, Block >> >(d_image, d_filter, d_result, N + 2);
cudaMemcpy(d_result_cpu, d_result, sizeof(d_result_cpu), cudaMemcpyDeviceToHost);
print_matrix(result, N + 2);
printf("\n");
print_matrix(d_result_cpu, N + 2);
// prepare filter to send to constant memory
cudaMemcpyToSymbol(dev_filter, filter, sizeof(filter));
// gpu_convolve_constant_filter function with constant filter @author Moataz_Farid
// gpu_convolve_constant_filter << <Grid, Block >> >(d_image, d_result, N + 2);
//cuda copy result
cudaMemcpy(d_result_cpu, d_result, sizeof(d_result_cpu), cudaMemcpyDeviceToHost);
//print result
print_matrix(d_result_cpu, N + 2);
cudaFree(d_image);
cudaFree(d_filter);
cudaFree(d_result);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment