Created
August 1, 2021 06:19
-
-
Save Mohammad-debug/0786645a4ea8ed9e0af70dc3a33aceea to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include<iostream> | |
#include<math.h> | |
#include <cuda_runtime_api.h> | |
#include <stdio.h> | |
#include <thrust/host_vector.h> | |
#include <thrust/device_vector.h> | |
#include "cuda_runtime.h" | |
#include <malloc.h> | |
#include "device_launch_parameters.h" | |
using namespace std; | |
#define PI 3.14 | |
#define real double | |
#define size 100 | |
//note size should be equal to nz nx ny | |
//cuda error check | |
#define cudaCheckError(code) \ | |
{ \ | |
if ((code) != cudaSuccess) \ | |
{ \ | |
fprintf(stderr, "Cuda failure %s:%d: '%s' \n", __FILE__, __LINE__, \ | |
cudaGetErrorString(code)); \ | |
} \ | |
} | |
////////allocate 2d | |
void allot(real **&preal, const int dim1, const int dim2) | |
{ | |
// Contiguous allocation of 2D arrays | |
preal = new real *[dim1]; | |
preal[0] = new real[dim1 * dim2]; | |
for (int i = 1; i < dim1; i++) | |
preal[i] = preal[i - 1] + dim2; | |
for (int i = 0; i < dim1; i++) | |
{ | |
for (int j = 0; j < dim2; j++) | |
{ | |
preal[i][j] = 0; | |
} | |
} | |
} | |
/////////allocate 2d | |
////////////////////////////3d random//////////// | |
void random_generator(double ***&a,int x,int y,int z) | |
{ | |
for (int i = 0; i < x; i++) | |
{for(int j=0;j<y;j++) | |
{for(int k=0;k<z;k++) | |
{ | |
a[i][j][k]=rand()%100; | |
//a[i][j][k]=i+j+k; | |
} | |
} | |
} | |
} | |
///////////////3d random ends/////////////////// | |
///////2d random | |
void random_generator(double **&a,int x,int y) | |
{ | |
for(int j=0;j<x;j++) | |
{for(int k=0;k<y;k++) | |
{ | |
a[j][k]=rand()%100; | |
//a[j][k]=j+k; | |
} | |
} | |
} | |
//////2d random | |
//3d allocation | |
void allot(real ***&pointer, int dim1, int dim2, int dim3) { | |
pointer = new real **[dim1]; | |
pointer[0] = new real*[dim1 * dim2]; | |
pointer[0][0] = new real[dim1 * dim2 * dim3]; | |
for (int i = 0; i < dim1; i++) { | |
if (i > 0) { | |
pointer[i] = pointer[i - 1] + dim2; | |
pointer[i][0] = pointer[i - 1][0] + dim2 * dim3; | |
} | |
for (int j = 1; j < dim2; j++) { | |
pointer[i][j] = pointer[i][j - 1] + dim3; | |
} | |
} | |
for (int i = 0; i < dim1; i++) | |
{for(int j=0;j<dim2;j++) | |
{for(int k=0;k<dim3;k++) | |
{ | |
pointer[i][j][k]=0; | |
//a[i][j][k]=i+j+k; | |
} | |
} | |
} | |
//initialise to 0 | |
} | |
void energy_weights2( | |
// Energy Weights (forward and reverse) | |
real **&We, real **&We_adj, | |
// space snap parameters | |
int snap_z1, int snap_z2, int snap_x1, int snap_x2){ | |
// Scale gradients to the Energy Weight | |
// We: input as forward energy weight, and output as combined energy weight | |
real max_We = 0; | |
real max_w1 = 0, max_w2=0; | |
real epsilon_We = 0.005; | |
for (int iz=snap_z1;iz<snap_z2;iz++){ | |
for (int ix=snap_x1;ix<snap_x2;ix++){ | |
if (We[iz][ix] > max_w1){ | |
max_w1 = We[iz][ix]; | |
} | |
if (We_adj[iz][ix] > max_w2){ | |
max_w2 = We_adj[iz][ix]; | |
} | |
We[iz][ix] = sqrt(We[iz][ix]*We_adj[iz][ix]); | |
} | |
} | |
// Finding maximum of the energy weight | |
for (int iz=snap_z1;iz<snap_z2;iz++){ | |
for (int ix=snap_x1;ix<snap_x2;ix++){ | |
// Estimate maximum energy weight in CPU | |
if (We[iz][ix] > max_We){ | |
max_We = We[iz][ix]; | |
} | |
} | |
} | |
// Regularize energy weight to avoid division by zero | |
for (int iz=snap_z1;iz<snap_z2;iz++){ | |
for (int ix=snap_x1;ix<snap_x2;ix++){ | |
We[iz][ix] += epsilon_We * max_We; | |
} | |
} | |
std::cout << "Max. Energy Weight = " << max_We << std::endl; | |
// std::cout << "Max. Energy part = " << max_w1<<", "<< max_w2 << std::endl; | |
} | |
__global__ void cuda_energy_weights2_GPU( | |
// Energy Weights (forward and reverse) | |
real *We, real *We_adj, | |
// space snap parameters | |
int snap_z1, int snap_z2, int snap_x1, int snap_x2, int nx) | |
{ | |
int ix = blockIdx.x * blockDim.x + threadIdx.x; | |
int iz = blockIdx.y * blockDim.y + threadIdx.y; | |
if( iz>=snap_z1 && iz<snap_z2 && ix>=snap_x1 && ix<snap_x2) | |
{ | |
We[iz*nx+ix] = sqrt(We[iz*nx + ix]*We_adj[iz*nx+ix]); | |
} | |
} | |
__global__ void cuda_energy_weights2_GPU2( | |
// Energy Weights (forward and reverse) | |
real *We, | |
// space snap parameters | |
int snap_z1, int snap_z2, int snap_x1, int snap_x2, int nx, real epsilon_We, real max_We ) | |
{ | |
int ix = blockIdx.x * blockDim.x + threadIdx.x; | |
int iz = blockIdx.y * blockDim.y + threadIdx.y; | |
if( iz>=snap_z1 && iz<snap_z2 && ix>=snap_x1 && ix<snap_x2) | |
{ | |
We[iz*nx+ix] += epsilon_We * max_We; | |
} | |
} | |
void energy_weights2_GPU( | |
// Energy Weights (forward and reverse) | |
real *&We, real *&We_adj, | |
// space snap parameters | |
int snap_z1, int snap_z2, int snap_x1, int snap_x2, int nx){ | |
// Scale gradients to the Energy Weight | |
// We: input as forward energy weight, and output as combined energy weight | |
real max_We = 0; | |
real max_w1 = 0, max_w2=0; | |
real epsilon_We = 0.005; | |
int box1 = 32, box2 = 32; | |
dim3 threadsPerBlock(box1, box2); | |
dim3 blocksPerGrid(snap_x2 / box1 + 1, snap_z2 / box2 + 1); | |
cuda_energy_weights2_GPU<<<blocksPerGrid, threadsPerBlock>>> | |
( We,We_adj, snap_z1, snap_z2, snap_x1,snap_x2, nx); | |
cudaCheckError(cudaDeviceSynchronize()); | |
thrust::device_ptr<real> dev_ptr1 = thrust::device_pointer_cast(We); | |
// thrust::device_ptr<real> dev_ptr2 = thrust::device_pointer_cast(We_adj); | |
for (int iz = snap_z1; iz < snap_z2; iz++) | |
{ | |
real ma = thrust::reduce(dev_ptr1 +iz*nx+snap_x1 , dev_ptr1 + iz * nx + snap_x2, 0.0, thrust::maximum<real>()); | |
if(ma>max_We) | |
max_We=ma; | |
} | |
cuda_energy_weights2_GPU2<<<blocksPerGrid, threadsPerBlock>>> | |
( We, snap_z1, snap_z2, snap_x1,snap_x2, nx, epsilon_We, max_We ); | |
cudaCheckError(cudaDeviceSynchronize()); | |
// for (int iz=snap_z1;iz<snap_z2;iz++){ | |
// for (int ix=snap_x1;ix<snap_x2;ix++){ | |
// if (We[iz][ix] > max_w1){ | |
// max_w1 = We[iz][ix]; | |
// } | |
// if (We_adj[iz][ix] > max_w2){ | |
// max_w2 = We_adj[iz][ix]; | |
// } | |
// We[iz][ix] = sqrt(We[iz][ix]*We_adj[iz][ix]); | |
// } | |
// } | |
// // Finding maximum of the energy weight | |
// for (int iz=snap_z1;iz<snap_z2;iz++){ | |
// for (int ix=snap_x1;ix<snap_x2;ix++){ | |
// // Estimate maximum energy weight in CPU | |
// if (We[iz][ix] > max_We){ | |
// max_We = We[iz][ix]; | |
// } | |
// } | |
// } | |
// // Regularize energy weight to avoid division by zero | |
// for (int iz=snap_z1;iz<snap_z2;iz++){ | |
// for (int ix=snap_x1;ix<snap_x2;ix++){ | |
// We[iz][ix] += epsilon_We * max_We; | |
// } | |
// } | |
std::cout << "Max. Energy Weight = " << max_We << std::endl; | |
//std::cout << "Max. Energy part = " << max_w1<<", "<< max_w2 << std::endl; | |
} | |
int main() | |
{ | |
int nz=20,nt=10,nx=15; | |
real dt=rand()%10; | |
int tf=1; | |
int snap_dt=1; | |
int snap_z1=1; | |
int snap_z2=9; | |
int snap_x1=1; | |
int snap_x2=4; | |
int snap_dz=2; | |
int snap_dx=2; | |
int snap_nt = 1 + (nt-1)/snap_dt; | |
int snap_nz = 1 + (snap_z2 - snap_z1)/snap_dz; | |
int snap_nx = 1 + (snap_x2 - snap_x1)/snap_dx; | |
real **We;allot(We,nz,nx);random_generator(We,nz,nx); | |
real **We_adj;allot(We_adj,nz,nx);random_generator(We_adj,nz,nx); | |
//for device | |
real *d_We_adj,*d_We; | |
//for devicee | |
//malloc device// | |
cudaCheckError(cudaMalloc(&d_We_adj, nz * nx * sizeof(real)));//device allocation | |
cudaCheckError(cudaMalloc(&d_We, nz * nx * sizeof(real)));//device allocation | |
//malloc device// | |
//cuda memory copy// | |
cudaCheckError(cudaMemcpy(d_We_adj, We_adj[0], nz * nx * sizeof(real), cudaMemcpyHostToDevice)); | |
cudaCheckError(cudaMemcpy(d_We, We[0], nz * nx * sizeof(real), cudaMemcpyHostToDevice)); | |
//cuda memory copy// | |
energy_weights2( | |
// Energy Weights (forward and reverse) | |
We, We_adj, | |
// space snap parameters | |
snap_z1,snap_z2, snap_x1, snap_x2); | |
energy_weights2_GPU( | |
// Energy Weights (forward and reverse) | |
d_We, d_We_adj, | |
// space snap parameters | |
snap_z1, snap_z2, snap_x1, snap_x2, nx); | |
real **test_We; | |
//real ***t_accu_sxx; allot(t_accu_sxx, snap_nt, snap_nz, snap_nx); | |
allot(test_We,nz,nx); | |
cudaCheckError(cudaMemcpy(test_We[0], d_We, nz * nx * sizeof(real), cudaMemcpyDeviceToHost)); | |
for (int i = 0; i < nz; i++) | |
{for(int j=0;j<nx;j++) | |
if (abs(test_We[i][j] - We[i][j] ) > 0.0000000001) | |
{ | |
cout << "Failed at" << i << " " << j << " " << test_We[i][j] << " " << We[i][j] << "\n"; | |
} | |
} | |
std::cout<<"done"<<std::endl; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment