Skip to content

Instantly share code, notes, and snippets.

@Bhavya031
Created September 11, 2023 04:34
Show Gist options
  • Save Bhavya031/8932c375424b9751a58610d0018d3095 to your computer and use it in GitHub Desktop.
Save Bhavya031/8932c375424b9751a58610d0018d3095 to your computer and use it in GitHub Desktop.
cuda matrix multiplication
#include <stdio.h>
// Matrix dimensions
#define N 4
#define M 4
#define P 4
// CUDA kernel for matrix multiplication
__global__ void matrixMul(int *a, int *b, int *c) {
int row = blockIdx.y * blockDim.y + threadIdx.y;
int col = blockIdx.x * blockDim.x + threadIdx.x;
if (row < N && col < P) {
int sum = 0;
for (int k = 0; k < M; ++k) {
sum += a[row * M + k] * b[k * P + col];
}
c[row * P + col] = sum;
}
}
int main() {
int *a, *b, *c;
int *d_a, *d_b, *d_c; // Device pointers
// Allocate memory on the CPU
a = (int *)malloc(N * M * sizeof(int));
b = (int *)malloc(M * P * sizeof(int));
c = (int *)malloc(N * P * sizeof(int));
// Initialize matrices a and b here
// Allocate memory on the GPU
cudaMalloc((void **)&d_a, N * M * sizeof(int));
cudaMalloc((void **)&d_b, M * P * sizeof(int));
cudaMalloc((void **)&d_c, N * P * sizeof(int));
// Copy data from CPU to GPU
cudaMemcpy(d_a, a, N * M * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(d_b, b, M * P * sizeof(int), cudaMemcpyHostToDevice);
// Define grid and block dimensions
dim3 dimGrid(N / 16, P / 16);
dim3 dimBlock(16, 16);
// Launch the CUDA kernel
matrixMul<<<dimGrid, dimBlock>>>(d_a, d_b, d_c);
// Copy the result from GPU to CPU
cudaMemcpy(c, d_c, N * P * sizeof(int), cudaMemcpyDeviceToHost);
// Free GPU memory
cudaFree(d_a);
cudaFree(d_b);
cudaFree(d_c);
// Free CPU memory
free(a);
free(b);
free(c);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment