Skip to content

Instantly share code, notes, and snippets.

@ddemidov
Last active November 14, 2018 13:09
Show Gist options
  • Save ddemidov/41c57413b209f986ef6cb3ab83d4950b to your computer and use it in GitHub Desktop.
Save ddemidov/41c57413b209f986ef6cb3ab83d4950b to your computer and use it in GitHub Desktop.
#include <vector>
#include <tuple>
#include <amgcl/adapter/crs_tuple.hpp>
#include <amgcl/backend/builtin.hpp>
#include <amgcl/amg.hpp>
#include <amgcl/coarsening/smoothed_aggregation.hpp>
#include <amgcl/relaxation/spai0.hpp>
#include <amgcl/solver/cg.hpp>
#include <amgcl/profiler.hpp>
// Generates matrix for poisson problem in a unit cube.
template <typename ValueType, typename IndexType, typename RhsType>
int sample_problem(
ptrdiff_t n,
std::vector<ValueType> &val,
std::vector<IndexType> &col,
std::vector<IndexType> &ptr,
std::vector<RhsType> &rhs,
double anisotropy = 1.0
)
{
ptrdiff_t n3 = n * n * n;
ptr.clear();
col.clear();
val.clear();
rhs.clear();
ptr.reserve(n3 + 1);
col.reserve(n3 * 7);
val.reserve(n3 * 7);
rhs.reserve(n3);
ValueType one = amgcl::math::identity<ValueType>();
double hx = 1;
double hy = hx * anisotropy;
double hz = hy * anisotropy;
ptr.push_back(0);
for(ptrdiff_t k = 0, idx = 0; k < n; ++k) {
for(ptrdiff_t j = 0; j < n; ++j) {
for (ptrdiff_t i = 0; i < n; ++i, ++idx) {
if (k > 0) {
col.push_back(idx - n * n);
val.push_back(-1.0/(hz * hz) * one);
}
if (j > 0) {
col.push_back(idx - n);
val.push_back(-1.0/(hy * hy) * one);
}
if (i > 0) {
col.push_back(idx - 1);
val.push_back(-1.0/(hx * hx) * one);
}
col.push_back(idx);
val.push_back((2 / (hx * hx) + 2 / (hy * hy) + 2 / (hz * hz)) * one);
if (i + 1 < n) {
col.push_back(idx + 1);
val.push_back(-1.0/(hx * hx) * one);
}
if (j + 1 < n) {
col.push_back(idx + n);
val.push_back(-1.0/(hy * hy) * one);
}
if (k + 1 < n) {
col.push_back(idx + n * n);
val.push_back(-1.0/(hz * hz) * one);
}
rhs.push_back( amgcl::math::constant<RhsType>(1.0) );
ptr.push_back( static_cast<IndexType>(col.size()) );
}
}
}
return n3;
}
int main() {
// Preconditioner is in single precision:
typedef
amgcl::amg<
amgcl::backend::builtin<float>,
amgcl::coarsening::smoothed_aggregation,
amgcl::relaxation::spai0
>
Precond;
// Solver is in double precision:
typedef
amgcl::solver::cg<
amgcl::backend::builtin<double>
>
Solver;
std::vector<int> ptr, col;
std::vector<double> val_d;
std::vector<double> rhs;
int n = sample_problem(128, val_d, col, ptr, rhs);
std::vector<double> x(n, 0.0);
std::vector<float> val_f(val_d.begin(), val_d.end());
auto A_f = std::tie(n, ptr, col, val_f);
auto A_d = std::tie(n, ptr, col, val_d);
amgcl::profiler<> prof;
prof.tic("setup");
Solver S(n);
Precond P(A_f);
prof.toc("setup");
std::cout << P << std::endl;
int iters;
double error;
prof.tic("solve");
std::tie(iters, error) = S(A_d, P, rhs, x);
prof.toc("solve");
std::cout << "Iterations: " << iters << std::endl
<< "Error: " << error << std::endl
<< prof << std::endl;
}
cmake_minimum_required(VERSION 3.1)
project(hello)
find_package(amgcl)
add_executable(amgcl_mixed_precision amgcl_mixed_precision.cpp)
target_link_libraries(amgcl_mixed_precision amgcl::amgcl)
@ddemidov
Copy link
Author

ddemidov commented Aug 26, 2018

This actually works!

With preconditioner in single precision:

Number of levels:    4
Operator complexity: 1.62
Grid complexity:     1.13
Memory footprint:    549.36 M

level     unknowns       nonzeros      memory
---------------------------------------------
    0      2097152       14581760    407.42 M (61.61%)
    1       263552        7918340    125.68 M (33.46%)
    2        16128        1114704     14.95 M ( 4.71%)
    3          789          53055      1.31 M ( 0.22%)

Iterations: 18
Error:      9.24305e-09

[Profile:     2.690 s] (100.00%)
[  setup:     0.702 s] ( 26.09%)
[  solve:     1.988 s] ( 73.91%)

With preconditioner in double precision (just replaced float with double in lines 91 and 111):

Number of levels:    4
Operator complexity: 1.62
Grid complexity:     1.13
Memory footprint:    744.72 M

level     unknowns       nonzeros      memory
---------------------------------------------
    0      2097152       14581760    553.22 M (61.61%)
    1       263552        7918340    168.88 M (33.46%)
    2        16128        1114704     20.01 M ( 4.71%)
    3          789          53055      2.61 M ( 0.22%)

Iterations: 18
Error:      9.24301e-09

[Profile:     3.218 s] (100.00%)
[  setup:     0.821 s] ( 25.50%)
[  solve:     2.398 s] ( 74.50%)

Note the increase in reported memory usage for double precision, and the constant convergence rate!
Also, mixed precision variant is 15% faster for setup and 20% faster for solve.
Not sure how well will that work for more complex problems though.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment