Skip to content

Instantly share code, notes, and snippets.

@ehzawad
Created August 12, 2023 06:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ehzawad/f433f6348cb01a1fa9258d60896a8177 to your computer and use it in GitHub Desktop.
Save ehzawad/f433f6348cb01a1fa9258d60896a8177 to your computer and use it in GitHub Desktop.
cuda programming
#include <stdio.h>
#include <cuda_runtime.h>
#define X 3
#define Y 6
#define Z 10
__global__ void multiplyMatrices(int *a, int *b, int *c) {
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
int z = blockIdx.z * blockDim.z + threadIdx.z;
if (x < X && y < Y && z < Z) {
c[x * Y * Z + y * Z + z] = a[x * Y * Z + y * Z + z] * b[x * Y * Z + y * Z + z];
}
}
int main() {
int a[X*Y*Z];
int b[X*Y*Z];
int c[X*Y*Z];
// Initializing a and b with values
for (int i = 0; i < X * Y * Z; i++) {
a[i] = i + 1;
b[i] = i + 1;
}
int *d_a, *d_b, *d_c;
cudaMalloc((void**)&d_a, X*Y*Z*sizeof(int));
cudaMalloc((void**)&d_b, X*Y*Z*sizeof(int));
cudaMalloc((void**)&d_c, X*Y*Z*sizeof(int));
cudaMemcpy(d_a, a, X*Y*Z*sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(d_b, b, X*Y*Z*sizeof(int), cudaMemcpyHostToDevice);
dim3 threadsPerBlock(Z, Y, X);
dim3 numBlocks(1, 1, 1);
multiplyMatrices<<<numBlocks, threadsPerBlock>>>(d_a, d_b, d_c);
cudaMemcpy(c, d_c, X*Y*Z*sizeof(int), cudaMemcpyDeviceToHost);
cudaFree(d_a); cudaFree(d_b); cudaFree(d_c);
// Output or further processing...
// Printing the result
printf("Result (3D Matrix):\n");
for (int x = 0; x < X; x++) {
for (int y = 0; y < Y; y++) {
for (int z = 0; z < Z; z++) {
printf("%d ", c[x * Y * Z + y * Z + z]);
}
printf("\n");
}
printf("\n");
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment