Skip to content

Instantly share code, notes, and snippets.

@mickeyouyou
Created March 10, 2020 10:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mickeyouyou/26807675e6c413bc957d5517eec3db1c to your computer and use it in GitHub Desktop.
Save mickeyouyou/26807675e6c413bc957d5517eec3db1c to your computer and use it in GitHub Desktop.
dd
#include<cuda.h>
#define THREAD_NUM 256
#define BLOCK_NUM 32
#define R_SIZE THREAD_NUM * BLOCK_NUM
#define M_SIZE R_SIZE * R_SIZE
__global__ void mat_mul(int* mat1, int* mat2, int* result){
int bid = blockIdx.x;
int tid = threadIdx.x;
int index = bid * THRREAD_NUM + tid;
for(int c=0; i< R_SIZE; i++) {
for(int n= 0; n < R_SIZE;n++) {
result[index * R_SIZE + c] +=mat1[index*R_SIZE+n]* mat2[n*R_SIZE+c];
}
{
}
int main(){
int* mat1;
int* mat2;
int* result;
int* g_mat1;
int* g_mat2;
int* g_mat_result;
mat1 = (int*) malloc(M_SIZE*sizeof(int);
// same to mat2 result
cudaMalloc((void**)&g_mat1, sizeof(int)* M_SIZE);
// some to g_mat2 g_mat_result;
cudaMemcpy(g_mat1, mat1, sizeof(int)* M_SIZE, cudaMemcpyHosttoDevice);
// some from mat2 to g_mat2;
mat_mul<<BLOCK_NUM, THREAD_NUM>>>(g_mat1, g_mat2, g_mat_result);
cudaMemcpy(result, g_mat_result, sizeof(int) * M_SIZE, cudaMemcpyDevicetohost);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment