Skip to content

Instantly share code, notes, and snippets.

@SteveBronder
Created March 7, 2021 07:33
Show Gist options
  • Save SteveBronder/87b202008faea2ab3c12948683a88a31 to your computer and use it in GitHub Desktop.
Save SteveBronder/87b202008faea2ab3c12948683a88a31 to your computer and use it in GitHub Desktop.
#include <benchmark/benchmark.h>
#include <stan/math.hpp>
#include <utility>
static bool needs_done = true;
// Just to fill up the var stack allocator
template <int max_alloc_r, int max_alloc_c>
static void toss_me(benchmark::State& state) {
using stan::math::var;
if (needs_done) {
needs_done = false;
using stan::math::var;
using stan::math::sum;
Eigen::Matrix<var, -1, -1> x(Eigen::MatrixXd::Random(max_alloc_r, max_alloc_c));
Eigen::Matrix<var, -1, 1> y(Eigen::VectorXd::Random(max_alloc_c));
stan::math::var lp = stan::math::sum(stan::math::multiply(x, y));
benchmark::DoNotOptimize(lp.vi_);
for (auto _ : state) {
lp.grad();
stan::math::set_zero_all_adjoints();
}
stan::math::recover_memory();
}
}
constexpr int extra_alloc_r = 65536;
constexpr int extra_alloc_c = 8192;
static stan::math::stack_alloc arena_mem(extra_alloc_r * extra_alloc_c);
inline auto make_x() {
stan::arena_t<Eigen::MatrixXd>::Base x_val(arena_mem.alloc_array<double>(extra_alloc_r * extra_alloc_c), extra_alloc_r, extra_alloc_c);
x_val = Eigen::MatrixXd::Random(extra_alloc_r, extra_alloc_c).eval();
return x_val;
}
auto x_val = make_x();
static void multiply_matrix_vector_var_double(benchmark::State& state) {
using stan::math::var;
using stan::math::promote_scalar;
for (auto _ : state) {
stan::math::var_value<Eigen::VectorXd> y(Eigen::VectorXd::Random(extra_alloc_c));
auto start = std::chrono::high_resolution_clock::now();
var lp = sum(multiply(x_val, y));
lp.grad();
benchmark::ClobberMemory();
auto end = std::chrono::high_resolution_clock::now();
auto elapsed_seconds =
std::chrono::duration_cast<std::chrono::duration<double>>(end - start);
state.SetIterationTime(elapsed_seconds.count());
stan::math::recover_memory();
benchmark::ClobberMemory();
}
}
// prealloc a bunch of mem like we are in the middle of iterations
BENCHMARK_TEMPLATE(toss_me, extra_alloc_r, extra_alloc_c);
BENCHMARK(multiply_matrix_vector_var_double)->UseManualTime();
BENCHMARK_MAIN();
#include <benchmark/benchmark.h>
#include <stan/math.hpp>
#include <utility>
static bool needs_done = true;
// Just to fill up the var stack allocator
template <int max_alloc_r, int max_alloc_c>
static void toss_me(benchmark::State& state) {
using stan::math::var;
if (needs_done) {
needs_done = false;
using stan::math::var;
using stan::math::sum;
Eigen::Matrix<var, -1, -1> x(Eigen::MatrixXd::Random(max_alloc_r, max_alloc_c));
Eigen::Matrix<var, -1, 1> y(Eigen::VectorXd::Random(max_alloc_c));
stan::math::var lp = stan::math::sum(stan::math::multiply(x, y));
benchmark::DoNotOptimize(lp.vi_);
for (auto _ : state) {
lp.grad();
stan::math::set_zero_all_adjoints();
}
stan::math::recover_memory();
}
}
constexpr int extra_alloc_r = 65536;
constexpr int extra_alloc_c = 8192;
inline auto make_x() {
return Eigen::MatrixXd::Random(extra_alloc_r, extra_alloc_c).eval();
}
auto x_val = make_x();
static void multiply_matrix_vector_var_double(benchmark::State& state) {
using stan::math::var;
using stan::math::promote_scalar;
for (auto _ : state) {
stan::math::var_value<Eigen::VectorXd> y(Eigen::VectorXd::Random(extra_alloc_c));
auto start = std::chrono::high_resolution_clock::now();
var lp = sum(multiply(x_val, y));
lp.grad();
benchmark::ClobberMemory();
auto end = std::chrono::high_resolution_clock::now();
auto elapsed_seconds =
std::chrono::duration_cast<std::chrono::duration<double>>(end - start);
state.SetIterationTime(elapsed_seconds.count());
stan::math::recover_memory();
benchmark::ClobberMemory();
}
}
// prealloc a bunch of mem like we are in the middle of iterations
BENCHMARK_TEMPLATE(toss_me, extra_alloc_r, extra_alloc_c);
BENCHMARK(multiply_matrix_vector_var_double)->UseManualTime();
BENCHMARK_MAIN();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment