__global__ void kMartixSubstractMatrix(const float *m1, const float *m2, float *output) {
/* Computes the (elementwise) difference between two arrays
m1: array
m2: array
output: array,the results of the computation are to be stored here
const int id = blockIdx.x * blockDim.x + threadIdx.x;
output[id] = m1[id] - m2[id];
__device__ float* dMartixSubstractMatrix(const float *m1, const float *m2, float *output, const int width, const int height){
kMartixSubstractMatrix <<< width, height >>> ( m1, m2, output );
return output;
