Last active
November 14, 2018 13:09
-
-
Save ddemidov/41c57413b209f986ef6cb3ab83d4950b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <vector> | |
#include <tuple> | |
#include <amgcl/adapter/crs_tuple.hpp> | |
#include <amgcl/backend/builtin.hpp> | |
#include <amgcl/amg.hpp> | |
#include <amgcl/coarsening/smoothed_aggregation.hpp> | |
#include <amgcl/relaxation/spai0.hpp> | |
#include <amgcl/solver/cg.hpp> | |
#include <amgcl/profiler.hpp> | |
// Generates matrix for poisson problem in a unit cube. | |
template <typename ValueType, typename IndexType, typename RhsType> | |
int sample_problem( | |
ptrdiff_t n, | |
std::vector<ValueType> &val, | |
std::vector<IndexType> &col, | |
std::vector<IndexType> &ptr, | |
std::vector<RhsType> &rhs, | |
double anisotropy = 1.0 | |
) | |
{ | |
ptrdiff_t n3 = n * n * n; | |
ptr.clear(); | |
col.clear(); | |
val.clear(); | |
rhs.clear(); | |
ptr.reserve(n3 + 1); | |
col.reserve(n3 * 7); | |
val.reserve(n3 * 7); | |
rhs.reserve(n3); | |
ValueType one = amgcl::math::identity<ValueType>(); | |
double hx = 1; | |
double hy = hx * anisotropy; | |
double hz = hy * anisotropy; | |
ptr.push_back(0); | |
for(ptrdiff_t k = 0, idx = 0; k < n; ++k) { | |
for(ptrdiff_t j = 0; j < n; ++j) { | |
for (ptrdiff_t i = 0; i < n; ++i, ++idx) { | |
if (k > 0) { | |
col.push_back(idx - n * n); | |
val.push_back(-1.0/(hz * hz) * one); | |
} | |
if (j > 0) { | |
col.push_back(idx - n); | |
val.push_back(-1.0/(hy * hy) * one); | |
} | |
if (i > 0) { | |
col.push_back(idx - 1); | |
val.push_back(-1.0/(hx * hx) * one); | |
} | |
col.push_back(idx); | |
val.push_back((2 / (hx * hx) + 2 / (hy * hy) + 2 / (hz * hz)) * one); | |
if (i + 1 < n) { | |
col.push_back(idx + 1); | |
val.push_back(-1.0/(hx * hx) * one); | |
} | |
if (j + 1 < n) { | |
col.push_back(idx + n); | |
val.push_back(-1.0/(hy * hy) * one); | |
} | |
if (k + 1 < n) { | |
col.push_back(idx + n * n); | |
val.push_back(-1.0/(hz * hz) * one); | |
} | |
rhs.push_back( amgcl::math::constant<RhsType>(1.0) ); | |
ptr.push_back( static_cast<IndexType>(col.size()) ); | |
} | |
} | |
} | |
return n3; | |
} | |
int main() { | |
// Preconditioner is in single precision: | |
typedef | |
amgcl::amg< | |
amgcl::backend::builtin<float>, | |
amgcl::coarsening::smoothed_aggregation, | |
amgcl::relaxation::spai0 | |
> | |
Precond; | |
// Solver is in double precision: | |
typedef | |
amgcl::solver::cg< | |
amgcl::backend::builtin<double> | |
> | |
Solver; | |
std::vector<int> ptr, col; | |
std::vector<double> val_d; | |
std::vector<double> rhs; | |
int n = sample_problem(128, val_d, col, ptr, rhs); | |
std::vector<double> x(n, 0.0); | |
std::vector<float> val_f(val_d.begin(), val_d.end()); | |
auto A_f = std::tie(n, ptr, col, val_f); | |
auto A_d = std::tie(n, ptr, col, val_d); | |
amgcl::profiler<> prof; | |
prof.tic("setup"); | |
Solver S(n); | |
Precond P(A_f); | |
prof.toc("setup"); | |
std::cout << P << std::endl; | |
int iters; | |
double error; | |
prof.tic("solve"); | |
std::tie(iters, error) = S(A_d, P, rhs, x); | |
prof.toc("solve"); | |
std::cout << "Iterations: " << iters << std::endl | |
<< "Error: " << error << std::endl | |
<< prof << std::endl; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cmake_minimum_required(VERSION 3.1) | |
project(hello) | |
find_package(amgcl) | |
add_executable(amgcl_mixed_precision amgcl_mixed_precision.cpp) | |
target_link_libraries(amgcl_mixed_precision amgcl::amgcl) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This actually works!
With preconditioner in single precision:
With preconditioner in double precision (just replaced
float
withdouble
in lines 91 and 111):Note the increase in reported memory usage for double precision, and the constant convergence rate!
Also, mixed precision variant is 15% faster for setup and 20% faster for solve.
Not sure how well will that work for more complex problems though.