Created
December 27, 2016 13:58
-
-
Save maiorpa/d2b853f87a0c4d58163ba4de8e06bac8 to your computer and use it in GitHub Desktop.
cuda question
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
bool LsSol::Solve_PCG(double& conv_ratio, unsigned int& iteration, | |
LsSol::ILinearSystemPreconditioner_Sol& ls) | |
{ | |
const double conv_ratio_tol = conv_ratio; | |
const unsigned int mx_iter = iteration; | |
if( ls.GetTmpVectorArySize() < 2 ){ ls.ReSizeTmpVecSolver(2); } | |
const int ix = -2; | |
const int ir = -1; | |
const int ip = 0; | |
const int iz = 1; | |
// x = 0.0 | |
ls.SCAL(0.0,ix); | |
double sq_inv_norm_res0; | |
{ | |
const double sq_norm_res0 = ls.DOT(ir,ir); | |
if( sq_norm_res0 < 1.0e-30 ){ | |
conv_ratio = 0.0; | |
iteration = 0; | |
return true; | |
} | |
sq_inv_norm_res0 = 1.0 / sq_norm_res0; | |
} | |
ls.COPY(ir,iz); | |
ls.SolvePrecond(iz); | |
ls.COPY(iz,ip); | |
double inpro_rz = ls.DOT(ir,iz); | |
for(unsigned int iitr=0;iitr<mx_iter;iitr++){ | |
double alpha; | |
{ // calc alpha | |
ls.MATVEC(1.0,ip,0.0,iz); | |
const double val_pAp = ls.DOT(ip,iz); | |
alpha = inpro_rz / val_pAp; | |
} | |
ls.AXPY(-alpha,iz,ir); | |
ls.AXPY(alpha,ip,ix); // Converge Judgement‚М‘O‚Й“ь‚к‚й | |
{ // Converge Judgement | |
double sq_norm_res = ls.DOT(ir,ir); | |
// std::cout << iitr << " " << sqrt(sq_norm_res * sq_inv_norm_res0) << std::endl; | |
if( sq_norm_res * sq_inv_norm_res0 < conv_ratio_tol*conv_ratio_tol ){ | |
conv_ratio = sqrt( sq_norm_res * sq_inv_norm_res0 ); | |
//conv_ratio = sq_norm_res; | |
iteration = iitr; | |
return true; | |
} | |
} | |
double beta; | |
{ // calc beta | |
ls.COPY(ir,iz); | |
ls.SolvePrecond(iz); | |
const double inpro_rz_new = ls.DOT(ir,iz); | |
beta = inpro_rz_new/inpro_rz; | |
inpro_rz = inpro_rz_new; | |
} | |
ls.SCAL(beta,ip); | |
ls.AXPY(1.0,iz,ip); | |
} | |
// Converge Judgement | |
double sq_norm_res = ls.DOT(ir,ir); | |
conv_ratio = sqrt( sq_norm_res * sq_inv_norm_res0 ); | |
//conv_ratio = sq_norm_res; | |
return false; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
std::pair<unsigned int, double> StrategyBsrWithSepareteLUMatrices::Solve(unsigned int max_iterate, double tolerance) | |
{ | |
assert(mInstaller->mCusparseHandle != nullptr); | |
assert(mInstaller->mCublasHandle != nullptr); | |
checkCudaErrors(cudaMemcpy(mInstaller->mDeviceCol, mBsrColInd, mNnzb*sizeof(int), cudaMemcpyHostToDevice)); | |
checkCudaErrors(cudaMemcpy(mInstaller->mDeviceRow, mBsrRowPtr, (mMb+1)*sizeof(int), cudaMemcpyHostToDevice)); | |
checkCudaErrors(cudaMemcpy(mInstaller->mDeviceData, mData, mDataSize*sizeof(double), cudaMemcpyHostToDevice)); | |
checkCudaErrors(cudaMemcpy(mInstaller->mDeviceX, mX, mResSize*sizeof(double), cudaMemcpyHostToDevice)); | |
checkCudaErrors(cudaMemcpy(mInstaller->mDeviceRes, mRes, mResSize*sizeof(double), cudaMemcpyHostToDevice)); | |
checkCudaErrors(cudaMemcpy(mDeviceLCol, mLColInd, mLNnzb*sizeof(int), cudaMemcpyHostToDevice)); | |
checkCudaErrors(cudaMemcpy(mDeviceLRow, mLRowPtr, (mMb+1)*sizeof(int), cudaMemcpyHostToDevice)); | |
checkCudaErrors(cudaMemcpy(mDeviceValsL, mValsL, mLNnzb*mBlockDim*mBlockDim*sizeof(double), cudaMemcpyHostToDevice)); | |
checkCudaErrors(cudaMemcpy(mDeviceUCol, mUColInd, mUNnzb*sizeof(int), cudaMemcpyHostToDevice)); | |
checkCudaErrors(cudaMemcpy(mDeviceURow, mURowPtr, (mMb+1)*sizeof(int), cudaMemcpyHostToDevice)); | |
checkCudaErrors(cudaMemcpy(mDeviceValsU, mValsU, mUNnzb*mBlockDim*mBlockDim*sizeof(double), cudaMemcpyHostToDevice)); | |
double alpha = 1.0; | |
double beta = 0.0; | |
int structural_zero=0; | |
int numerical_zero=0; | |
const double double_one = 1.0; | |
const double double_zero = 0.0; | |
cusparseStatus_t status; | |
// x = 0 | |
cublasDscal(mInstaller->mCublasHandle, mResSize, &beta, mInstaller->mDeviceX, 1); | |
double sq_inv_norm_res0; | |
double inpro_rz; | |
double val_pAp; | |
double sq_norm_res; | |
double inpro_rz_new; | |
double sq_norm_res0; | |
{ | |
cublasDdot(mInstaller->mCublasHandle, mResSize, mInstaller->mDeviceRes, 1, mInstaller->mDeviceRes, 1, &sq_norm_res0); | |
if( sq_norm_res0 < 1.0e-30 ){ | |
cudaMemcpy(mX, mInstaller->mDeviceX, mResSize*sizeof(double), cudaMemcpyDeviceToHost); | |
return std::make_pair(0, 0.0); | |
} | |
sq_inv_norm_res0 = 1.0 / sq_norm_res0; | |
} | |
{ | |
// Solve precondition system | |
// Forward Solve, we can re-use infoA since the sparsity pattern of A matches that of L | |
status = cusparseDbsrsv2_solve(mInstaller->mCusparseHandle, CUSPARSE_DIRECTION_ROW, CUSPARSE_OPERATION_NON_TRANSPOSE, mMb, mLNnzb, &double_one, mDescrL, | |
mDeviceValsL, mDeviceLRow, mDeviceLCol, mBlockDim, | |
mInfo_L, mInstaller->mDeviceRes, mDeviceY, /*CUSPARSE_SOLVE_POLICY_NO_LEVEL*/CUSPARSE_SOLVE_POLICY_USE_LEVEL, mBuffer); | |
checkCudaErrors(status); | |
// Back Substitution | |
status = cusparseDbsrsv2_solve(mInstaller->mCusparseHandle, CUSPARSE_DIRECTION_ROW, CUSPARSE_OPERATION_NON_TRANSPOSE, mMb, mUNnzb, &double_one, mDescrU, | |
mDeviceValsU, mDeviceURow, mDeviceUCol, mBlockDim, | |
mInfo_U, mDeviceY, mDeviceZ, /*CUSPARSE_SOLVE_POLICY_NO_LEVEL*/CUSPARSE_SOLVE_POLICY_USE_LEVEL, mBuffer); | |
checkCudaErrors(status); | |
// End Solve precondition system | |
cublasDdot(mInstaller->mCublasHandle, mResSize, mInstaller->mDeviceRes, 1, mDeviceZ, 1, &inpro_rz); | |
cublasDcopy(mInstaller->mCublasHandle, mResSize, mDeviceZ, 1, mInstaller->mDeviceP, 1); | |
unsigned int iitr; | |
for(iitr=0;iitr<max_iterate;iitr++){ | |
{ // calc alpha | |
cusparseDbsrmv(mInstaller->mCusparseHandle, CUSPARSE_DIRECTION_ROW, CUSPARSE_OPERATION_NON_TRANSPOSE, mMb, mNb, mNnzb, &double_one, mInstaller->mMatDescr | |
, mInstaller->mDeviceData, mInstaller->mDeviceRow, mInstaller->mDeviceCol, mBlockDim | |
, mInstaller->mDeviceP, &double_zero, mInstaller->mDeviceAp); | |
cublasDdot(mInstaller->mCublasHandle, mResSize, mInstaller->mDeviceP, 1, mInstaller->mDeviceAp, 1, &val_pAp); | |
alpha = inpro_rz / val_pAp; | |
} | |
{ | |
// Converge Judgement | |
cublasDaxpy(mInstaller->mCublasHandle, mResSize, &alpha, mInstaller->mDeviceP, 1, mInstaller->mDeviceX, 1); | |
const double nalpha = -alpha; | |
cublasDaxpy(mInstaller->mCublasHandle, mResSize, &nalpha, mInstaller->mDeviceAp, 1, mInstaller->mDeviceRes, 1); | |
} | |
{ // check converge | |
cublasDdot(mInstaller->mCublasHandle, mResSize, mInstaller->mDeviceRes, 1, mInstaller->mDeviceRes, 1, &sq_norm_res); | |
if( sq_norm_res * sq_inv_norm_res0 < tolerance*tolerance /*|| sq_norm_res < 1e-7*/){ | |
tolerance = sqrt( sq_norm_res * sq_inv_norm_res0 ); | |
cudaMemcpy(mX, mInstaller->mDeviceX, mResSize*sizeof(double), cudaMemcpyDeviceToHost); | |
return std::make_pair(iitr,tolerance); | |
} | |
} | |
{ // calc beta | |
{ | |
// Solve precondition system | |
// Forward Solve, we can re-use infoA since the sparsity pattern of A matches that of L | |
status = cusparseDbsrsv2_solve(mInstaller->mCusparseHandle, CUSPARSE_DIRECTION_ROW, CUSPARSE_OPERATION_NON_TRANSPOSE, mMb, mLNnzb, &double_one, mDescrL, | |
mDeviceValsL, mDeviceLRow, mDeviceLCol, mBlockDim, | |
mInfo_L, mInstaller->mDeviceRes, mDeviceY,/*CUSPARSE_SOLVE_POLICY_NO_LEVEL*/CUSPARSE_SOLVE_POLICY_USE_LEVEL, mBuffer); | |
checkCudaErrors(status); | |
// Back Substitution | |
status = cusparseDbsrsv2_solve(mInstaller->mCusparseHandle, CUSPARSE_DIRECTION_ROW, CUSPARSE_OPERATION_NON_TRANSPOSE, mMb, mUNnzb, &double_one, mDescrU, | |
mDeviceValsU, mDeviceURow, mDeviceUCol, mBlockDim, | |
mInfo_U, mDeviceY, mDeviceZ, /*CUSPARSE_SOLVE_POLICY_NO_LEVEL*/CUSPARSE_SOLVE_POLICY_USE_LEVEL, mBuffer); | |
checkCudaErrors(status); | |
// End Solve precondition system | |
} | |
cublasDdot(mInstaller->mCublasHandle, mResSize, mInstaller->mDeviceRes, 1, mDeviceZ, 1, &inpro_rz_new); | |
beta = inpro_rz_new/inpro_rz; | |
inpro_rz = inpro_rz_new; | |
} | |
cublasDscal(mInstaller->mCublasHandle, mResSize, &beta, mInstaller->mDeviceP, 1); | |
cublasDaxpy(mInstaller->mCublasHandle, mResSize, &double_one, mDeviceZ, 1, mInstaller->mDeviceP, 1); | |
} | |
cudaMemcpy(mX, mInstaller->mDeviceX, mResSize*sizeof(double), cudaMemcpyDeviceToHost); | |
return std::make_pair(iitr, sqrt( sq_norm_res * sq_inv_norm_res0 )); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment