Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
__global__ void kMartixByMatrixElementwise(const float *m1, const float *m2, float *output) {
/* Computes the product of two arrays (elementwise multiplication).
Inputs:
m1: array
m2: array
output: array,the results of the multiplication are to be stored here
*/
const int id = blockIdx.x * blockDim.x + threadIdx.x;
output[id] = m1[id] * m2[id];
}
__device__ float* dMartixByMatrixElementwise(const float *m1, const float *m2, float *output, const int width, const int height){
kMartixByMatrixElementwise <<< width, height >>> ( m1, m2, output );
cudaDeviceSynchronize();
return output;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment