Last active
March 28, 2020 09:48
Benchmark for a simple sparse lower triangular backsolve (see https://stackoverflow.com/questions/60232977/optimizing-the-backward-solve-for-a-sparse-lower-triangular-linear-system)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <benchmark/benchmark.h> | |
#include <cassert> | |
#include <fstream> | |
#include <numeric> | |
#include <sstream> | |
#include <vector> | |
struct TestData { | |
std::vector<int> Lp; | |
std::vector<int> Li; | |
std::vector<double> Lx; | |
}; | |
TestData getTestDataFromMtxFile(const std::string& filepath) { | |
// world's worst implementation of a Matrix Market file reader, sorry | |
// only the lower triangle is read | |
std::ifstream file(filepath); | |
std::string line; | |
int current_col = -1; | |
TestData data; | |
int num_rows; | |
int num_cols; | |
int nnz; | |
bool first_line = true; | |
while (std::getline(file, line)) { | |
if (line[0] == '%') { | |
continue; | |
} | |
if (first_line) { | |
std::stringstream stream(line); | |
stream >> num_rows; | |
stream >> num_cols; | |
stream >> nnz; | |
assert(num_rows == num_cols); | |
data.Lp.reserve(num_cols + 1); | |
data.Li.reserve(nnz); | |
data.Lx.reserve(nnz); | |
first_line = false; | |
continue; | |
} | |
int row; | |
int col; | |
double val; | |
{ | |
std::stringstream stream(line); | |
stream >> row; | |
stream >> col; | |
stream >> val; | |
} | |
row -= 1; | |
col -= 1; | |
while (col > current_col) { | |
data.Lp.push_back(data.Li.size()); | |
current_col += 1; | |
} | |
if (row > current_col) { | |
data.Li.push_back(row); | |
data.Lx.push_back(val); | |
} | |
} | |
while (current_col < num_cols) { | |
data.Lp.push_back(data.Li.size()); | |
current_col += 1; | |
} | |
assert(data.Lp.size() == num_cols + 1); | |
assert(data.Li.size() == nnz); | |
assert(data.Lx.size() == nnz); | |
return data; | |
} | |
struct BacksolveBaseline { | |
void operator()(const int* __restrict__ Lp, | |
const int* __restrict__ Li, | |
const double* __restrict__ Lx, | |
const int n, | |
double* __restrict__ x) { | |
for (int i=n-1; i>=0; --i) { | |
for (int j=Lp[i]; j<Lp[i+1]; ++j) { | |
x[i] -= Lx[j] * x[Li[j]]; | |
} | |
} | |
} | |
}; | |
struct BacksolveOptimized { | |
void operator()(const int* __restrict__ Lp, | |
const int* __restrict__ Li, | |
const double* __restrict__ Lx, | |
const int n, | |
double* __restrict__ x) { | |
for (int i=n-1; i>=0; --i) { | |
const int col_begin = Lp[i]; | |
const int col_end = Lp[i+1]; | |
const bool is_col_nnz_odd = (col_end - col_begin) & 1; | |
double xi_temp = x[i]; | |
int j = col_end - 1; | |
if (is_col_nnz_odd) { | |
xi_temp -= Lx[j] * x[Li[j]]; | |
--j; | |
} | |
for (; j >= col_begin; j -= 2) { | |
xi_temp -= Lx[j - 0] * x[Li[j - 0]] + | |
Lx[j - 1] * x[Li[j - 1]]; | |
} | |
x[i] = xi_temp; | |
} | |
} | |
}; | |
void fillRhs(std::vector<double>& x) { | |
// just simulate some kind of semi complex calculation for the right hand side | |
std::iota(x.begin(), x.end(), -x.size() / 2.0); | |
} | |
void benchFillRhs(benchmark::State& state) { | |
const auto test_data = getTestDataFromMtxFile("../reorientation_6.mtx"); | |
const auto dimension = test_data.Lp.size() - 1; | |
std::vector<double> x(dimension); | |
for (auto _ : state) { | |
fillRhs(x); | |
benchmark::DoNotOptimize(x.data()); | |
benchmark::ClobberMemory(); | |
} | |
} | |
template <class BacksolveFunction> | |
void benchBacksolve(benchmark::State& state) { | |
const auto test_data = getTestDataFromMtxFile("../reorientation_6.mtx"); | |
const auto dimension = test_data.Lp.size() - 1; | |
std::vector<double> x(dimension); | |
BacksolveFunction backsolveFunction; | |
for (auto _ : state) { | |
fillRhs(x); | |
backsolveFunction(test_data.Lp.data(), test_data.Li.data(), | |
test_data.Lx.data(), dimension, x.data()); | |
benchmark::DoNotOptimize(x.data()); | |
benchmark::ClobberMemory(); | |
} | |
} | |
void benchBacksolveBaseline(benchmark::State& state) { | |
benchBacksolve<BacksolveBaseline>(state); | |
} | |
void benchBacksolveOptimized(benchmark::State& state) { | |
benchBacksolve<BacksolveOptimized>(state); | |
} | |
BENCHMARK(benchFillRhs); | |
BENCHMARK(benchBacksolveBaseline); | |
BENCHMARK(benchBacksolveOptimized); | |
BENCHMARK_MAIN(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment