sbugrov/MartixByMatrixElementwise.cu

## MartixByMatrixElementwise.cu
__global__ void kMartixByMatrixElementwise(const float *m1, const float *m2, float *output) {
  /*  Computes the product of two arrays (elementwise multiplication).
   Inputs:
   m1: array
   m2: array
   output: array,the results of the multiplication are to be stored here
  */

  const int id = blockIdx.x * blockDim.x + threadIdx.x;

  output[id] = m1[id] * m2[id];
}

__device__ float* dMartixByMatrixElementwise(const float *m1, const float *m2, float *output, const int width, const int height){

  kMartixByMatrixElementwise <<< width, height >>> ( m1, m2, output );
  cudaDeviceSynchronize();
  return output;
}
	__global__ void kMartixByMatrixElementwise(const float m1, const float m2, float *output) {
	/* Computes the product of two arrays (elementwise multiplication).
	Inputs:
	m1: array
	m2: array
	output: array,the results of the multiplication are to be stored here
	*/

	const int id = blockIdx.x * blockDim.x + threadIdx.x;

	output[id] = m1[id] * m2[id];
	}

	__device__ float* dMartixByMatrixElementwise(const float m1, const float m2, float *output, const int width, const int height){

	kMartixByMatrixElementwise <<< width, height >>> ( m1, m2, output );
	cudaDeviceSynchronize();
	return output;
	}