Skip to content

Instantly share code, notes, and snippets.

@krk

krk/matMul3.cu Secret

Created June 27, 2017 14:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save krk/27a835d34ae47d8dbc224eef53176206 to your computer and use it in GitHub Desktop.
Save krk/27a835d34ae47d8dbc224eef53176206 to your computer and use it in GitHub Desktop.
__global__
void matMul3(
float* d_C,
float* d_A,
float* d_B,
int m,
int n,
int r)
{
__shared__ float Ais[BLOCK_SIZE][BLOCK_SIZE];
__shared__ float Bis[BLOCK_SIZE][BLOCK_SIZE];
int i = blockIdx.x * BLOCK_SIZE + threadIdx.x;
int k = blockIdx.y * BLOCK_SIZE + threadIdx.y;
int cIdx = i*m + k;
float val = 0; // ara toplam değişkeni
for(int j=0; j < n / BLOCK_SIZE; j++)
{
Ais[threadIdx.y][threadIdx.x] = *loc( d_A, n, i, j );
Bis[threadIdx.y][threadIdx.x] = *loc( d_B, n, j, k );
// kullanılacak elemanlar paylaşık hafızaya yüklenir.
__syncthreads();
for(int k=0; k<BLOCK_SIZE;k++)
{
val += Ais[threadIdx.y][k] * Bis[k][threadIdx.x];
}
// tüm threadlerin sonucu yazması beklenir.
__syncthreads();
}
// C matrisine bir kere yazılır.
d_C[ cIdx ] = val;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment