Skip to content

Instantly share code, notes, and snippets.

@nghiahsgs
Created May 18, 2020 08:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nghiahsgs/68ba08a8dc2ba404bd87e1d38b411588 to your computer and use it in GitHub Desktop.
Save nghiahsgs/68ba08a8dc2ba404bd87e1d38b411588 to your computer and use it in GitHub Desktop.
///usr/local/cuda-10.2/bin/nvcc test_quantum_parallel_16_16_16_block.cu -arch=sm_75
#include <stdio.h>
#include <time.h>
#include<math.h>
void random_array(int len, double *x){
for(int i=0;i<len;i++){
x[i]=rand();
}
}
void load_matrix_from_file(FILE * file, int nb_rows,int nb_cols, double* mat){
for(int i = 0; i < nb_rows; i++){
for(int j = 0; j < nb_cols; j++){
//Use lf format specifier, %c is for character
//if (!fscanf(file, "%lf", &mat[i][j])){
int index=i*nb_cols + j;
if (!fscanf(file, "%lf", &mat[index])){
break;
}
}
}
}
void load_matrix_from_file_2(FILE * file, int n, double* mat){
for(int j = 0; j < n; j++){
if (!fscanf(file, "%lf", &mat[j])){
break;
}
}
}
void print_matrix(int nb_rows, int nb_cols, double * mat){
for(int i = 0; i < nb_rows; i++){
for(int j = 0; j < nb_cols; j++){
int index = i*nb_cols+j;
printf("%d %lf\n",index,mat[index]);
}
}
}
void in_ma_tran(int len, double *x){
for(int i=0;i<len;i++){
printf("element %d : %f\n",i,x[i]);
}
}
int tong_ma_tran(int len, double *x){
double total=0;
for(int i=0;i<len;i++){
total+=x[i];
}
return total;
}
__global__ void map_3d_array(int N, double *C, double *B, double *E, double *result, double *result_2){
int r = blockIdx.x*blockDim.x+ threadIdx.x;
int s = blockIdx.y*blockDim.y+ threadIdx.y;
int q = blockIdx.z*blockDim.z+ threadIdx.z;
if(r < N && s<N && q<N){
atomicAdd(result_2, C[r*N+s]*B[s*N+q]*(cos(E[q]-E[r])-cos(E[s]-E[q]))/(10+E[s]-E[q]/2-E[r]/2));
//atomicAdd(result_2,r+s+q);
//result[r+s*N+q*N*N]=(double)(r+s+q);
}
}
int main(){
int N =1000;
double *h_C, *h_B, *h_E, *h_result,*h_result_2;
double *d_C, *d_B, *d_E, *d_result, *d_result_2;
//khai bao vung nho trong host cho C,B,E, result
h_C = (double*)malloc(N*N*sizeof(double));
h_B = (double*)malloc(N*N*sizeof(double));
h_E = (double*)malloc(N*sizeof(double));
h_result = (double*)malloc(N*N*N*sizeof(double));
h_result_2 = (double*)malloc(1*sizeof(double));
//khai bao vung nho trong device cho C,B,E, result
cudaMalloc(&d_C, N*N*sizeof(double));
cudaMalloc(&d_B, N*N*sizeof(double));
cudaMalloc(&d_E, N*sizeof(double));
cudaMalloc(&d_result, N*N*N*sizeof(double));
cudaMalloc(&d_result_2, 1*sizeof(double));
//khoi tao gia tri cho h_C,h_B,h_E
random_array(N*N,h_C);
random_array(N*N,h_B);
FILE *file;
file=fopen("C_1000_1000.txt", "r");
int nb_rows=N; //ma trix nb_rows * nb_cols
int nb_cols=N;
load_matrix_from_file(file, nb_rows, nb_cols,h_C);
fclose(file);
FILE *file2;
file2=fopen("B_1000_1000.txt", "r");
load_matrix_from_file(file2, N,N,h_B);
fclose(file2);
FILE *file3;
file3=fopen("E_1000.txt", "r");
load_matrix_from_file_2(file3, 1000,h_E);
fclose(file3);
printf("E[100]: %lf \n",h_E[100]);
//random_array(N,h_E);
//=========START=======
clock_t begin=clock();
//copy value from host to devide: C,B,E
cudaMemcpy(d_C, h_C, N*N*sizeof(double), cudaMemcpyHostToDevice);
cudaMemcpy(d_B, h_B, N*N*sizeof(double), cudaMemcpyHostToDevice);
cudaMemcpy(d_E, h_E, N*sizeof(double), cudaMemcpyHostToDevice);
//excute kernel
int nb_hyper=N;//16
nb_hyper=8;
dim3 nb_block(int(N/nb_hyper)+1,int(N/nb_hyper)+1,int(N/nb_hyper)+1);
dim3 nb_thread_per_block(nb_hyper, nb_hyper, nb_hyper);
map_3d_array<<<nb_block, nb_thread_per_block>>>(N, d_C, d_B, d_E,d_result,d_result_2);
//copy result from device to host
cudaMemcpy(h_result, d_result, N*N*N*sizeof(double), cudaMemcpyDeviceToHost);
cudaMemcpy(h_result_2, d_result_2, sizeof(double), cudaMemcpyDeviceToHost);
//in_ma_tran(N*N*N, h_result);
//double total=tong_ma_tran(N*N*N, h_result);
//printf("Tong cac phan tu cua ma tran la %f\n",total) ;
printf("Tong cac phan tu cua ma tran la (tinh tren gpu) %f\n",h_result_2[0]);
//cudaFree(d_C);
//cudaFree(d_B);
//cudaFree(d_E);
cudaFree(d_result);
free(h_C);
free(h_B);
free(h_E);
free(h_result);
clock_t end = clock();
double time_spent = (double)(end - begin) / CLOCKS_PER_SEC;
printf("Total time:%f\n",time_spent);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment