Skip to content

Instantly share code, notes, and snippets.

@abidrahmank
Created October 17, 2013 08:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save abidrahmank/7021023 to your computer and use it in GitHub Desktop.
Save abidrahmank/7021023 to your computer and use it in GitHub Desktop.
#include <stdio.h>
typedef unsigned int uint;
__global__ void square(uint *d_out, uint *d_in, int rows){
int loc = blockIdx.x*rows + 2*threadIdx.x;
int f = d_in[loc]+d_in[loc+1];
d_out[loc/2] = f*f;
// printf("%d %f %f %f %f %f \n", loc, d_in[loc], d_in[loc+1], f, temp, d_out[loc/2]);
}
int main(){
const int ELEMS = 64;
const int ARRAY_SIZE = ELEMS*ELEMS*sizeof(uint);
const int E = 32;
uint h_in[ELEMS][ELEMS];
uint h_out[ELEMS][E];
for (int i=0;i<ELEMS;i++){
for (int j=0; j<ELEMS; j++){
h_in[i][j] = uint(i*ELEMS+j);
//printf("%d ", (int)h_in[i][j]);
}
//getchar();
}
uint* d_in;
uint* d_out;
cudaMalloc((void**) &d_in, ARRAY_SIZE);
cudaMalloc((void**) &d_out, ARRAY_SIZE);
cudaMemcpy(d_in, h_in, ARRAY_SIZE, cudaMemcpyHostToDevice);
square<<<ELEMS,ELEMS>>>(d_out, d_in, ELEMS);
cudaMemcpy(h_out, d_out, ELEMS*E*sizeof(uint), cudaMemcpyDeviceToHost);
cudaFree(d_in);
cudaFree(d_out);
/* ----------------- Print the result -------------------------- */
printf("\n");
for(int i=0; i<ELEMS;i++){
for (int j=0; j<E; j++){
printf("%d %d \n", i*E+j,h_out[i][j]);
//continue;
}
//getchar();
}
getchar();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment