Skip to content

Instantly share code, notes, and snippets.

@Bhavya031
Created September 11, 2023 04:36
Show Gist options
  • Save Bhavya031/ea2abc4edc4903f3674720494990e77e to your computer and use it in GitHub Desktop.
Save Bhavya031/ea2abc4edc4903f3674720494990e77e to your computer and use it in GitHub Desktop.
cuda matix multiplication
#include <stdio.h>
#include <cuda.h>
#define N 3 // Matrix size (3x3)
__global__ void matrixMultiply(int *a, int *b, int *c)
{
int row = blockIdx.y * blockDim.y + threadIdx.y;
int col = blockIdx.x * blockDim.x + threadIdx.x;
int sum = 0;
for (int i = 0; i < N; i++) {
sum += a[row * N + i] * b[i * N + col];
}
c[row * N + col] = sum;
}
int main()
{
int a[N][N], b[N][N], c[N][N];
int *d_a, *d_b, *d_c;
// Initialize matrices a and b
printf("Enter values for matrix A (3x3):\n");
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
scanf("%d", &a[i][j]);
}
}
printf("Enter values for matrix B (3x3):\n");
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
scanf("%d", &b[i][j]);
}
}
// Allocate memory on the GPU
cudaMalloc((void **)&d_a, N * N * sizeof(int));
cudaMalloc((void **)&d_b, N * N * sizeof(int));
cudaMalloc((void **)&d_c, N * N * sizeof(int));
// Copy matrices a and b from host to device
cudaMemcpy(d_a, a, N * N * sizeof(int), cudaMemcpyHostToDevice);
cudaMemcpy(d_b, b, N * N * sizeof(int), cudaMemcpyHostToDevice);
// Define grid and block dimensions
dim3 dimGrid(1, 1);
dim3 dimBlock(N, N);
// Launch the matrix multiplication kernel
matrixMultiply<<<dimGrid, dimBlock>>>(d_a, d_b, d_c);
// Copy the result matrix c from device to host
cudaMemcpy(c, d_c, N * N * sizeof(int), cudaMemcpyDeviceToHost);
// Print the result matrix c
printf("Result matrix C (3x3):\n");
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
printf("%d\t", c[i][j]);
}
printf("\n");
}
// Free device memory
cudaFree(d_a);
cudaFree(d_b);
cudaFree(d_c);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment