Created
November 5, 2019 04:02
-
-
Save hayunjong83/b7069b64bbbb7ba60530727eb9676547 to your computer and use it in GitHub Desktop.
cuda sdk sample code : 0_Simple/matrixMul.cu
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <assert.h> | |
#include <cuda_runtime.h> | |
#include <helper_function.h> | |
#include <helper_cuda.h> | |
template <int BLOCK_SIZE> __global__ void MatrixMulCUDA(float *C, float *A, | |
float *B, int wA, int wB){ | |
//Block index | |
int bx = blockIdx.x; | |
int by = blockIdx.y; | |
//Thread index | |
int tx = threadIdx.x; | |
int ty = threadIdx.y; | |
// Index of the first sub-matrix of A processed by the block | |
int aBegin = wA * BLOCK_SIZE * by; | |
// Index of the last sub-matrix of A processed by the block | |
int aEnd = aBegin + wA - 1; | |
// step size used to iterate through the sub-matrix of A | |
int aStep = BLOCK_SIZE; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment