Last active
September 4, 2018 14:17
-
-
Save bbbales2/a1689764f0fda6df561e858026f4e8d9 to your computer and use it in GitHub Desktop.
adj_jac_apply additiion benchmarks
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stan/math/rev/core.hpp> | |
#include <test/unit/math/rev/mat/util.hpp> | |
#include <gtest/gtest.h> | |
#include <algorithm> | |
#include <sstream> | |
#include <tuple> | |
#include <vector> | |
#include <chrono> | |
namespace stan { | |
namespace math { | |
/* | |
* This is the simplest adj_jac functor in town | |
*/ | |
struct AddFunctor { | |
template <std::size_t size> | |
Eigen::VectorXd operator()(const std::array<bool, size>& needs_adj, const Eigen::VectorXd& x1, const Eigen::VectorXd& x2) { | |
check_size_match("AddFunctor::operator()", "x1", x1.size(), "x2", x2.size()); | |
return x1 + x2; | |
} | |
template <std::size_t size> | |
auto multiply_adjoint_jacobian(const std::array<bool, size>& needs_adj, | |
const Eigen::VectorXd& adj) { | |
return std::make_tuple(adj, adj); | |
} | |
}; | |
/* | |
* This is the simplest adj_jac functor in town, inefficiently written | |
*/ | |
struct AddFunctorInefficient { | |
template <std::size_t size> | |
Eigen::VectorXd operator()(const std::array<bool, size>& needs_adj, const Eigen::VectorXd& x1, const Eigen::VectorXd& x2) { | |
check_size_match("AddFunctorInefficient::operator()", "x1", x1.size(), "x2", x2.size()); | |
return x1 + x2; | |
} | |
template <std::size_t size> | |
auto multiply_adjoint_jacobian(const std::array<bool, size>& needs_adj, | |
const Eigen::VectorXd& adj) { | |
Eigen::MatrixXd jac(adj.size(), adj.size()); | |
for (int i = 0; i < adj.size(); ++i) { | |
for (int j = 0; j < adj.size(); ++j) { | |
jac(i, j) = 0.0; | |
} | |
jac(i, i) = 1.0; | |
} | |
return std::make_tuple((adj.transpose() * jac).eval(), (adj.transpose() * jac).eval()); | |
} | |
}; | |
auto AddFunctorAutodiffed = [](auto& x1, auto& x2) { | |
check_size_match("AddFunctorAutodiffed::operator()", "x1", x1.size(), "x2", x2.size()); | |
return (x1 + x2).eval(); | |
}; | |
} | |
} | |
template<typename F> | |
double run_benchmark(F f, int N, int R) { | |
stan::math::recover_memory(); | |
EXPECT_EQ(0, stan::math::ChainableStack::instance().var_stack_.size()); | |
EXPECT_EQ(0, stan::math::ChainableStack::instance().var_nochain_stack_.size()); | |
Eigen::Matrix<stan::math::var, Eigen::Dynamic, 1> x1(N); | |
Eigen::Matrix<stan::math::var, Eigen::Dynamic, 1> x2(N); | |
for (int i = 0; i < N; ++i) { | |
x1(i) = i / static_cast<double>(N); | |
x2(i) = i / static_cast<double>(N); | |
} | |
stan::math::start_nested(); | |
/** | |
* By nesting y, we'll only be calling chain on the output variables. | |
* | |
* If you put the forward calculation of y inside the loop | |
* then you have to make sure you nest/unnest each iteration so | |
* you aren't dirtying up the autodiff stack. | |
* | |
* The results are about the same, and this focuses the benchmark | |
* on the reverse mode. | |
*/ | |
stan::math::var y = stan::math::sum(f(x1, x2)); | |
auto start = std::chrono::high_resolution_clock::now(); | |
for (int i = 0; i < R; ++i) { | |
stan::math::grad(y.vi_); | |
stan::math::set_zero_all_adjoints_nested(); | |
} | |
double time = std::chrono::duration<double>(std::chrono::high_resolution_clock::now() - start).count() / R; | |
stan::math::recover_memory_nested(); | |
return time; | |
} | |
TEST(AgradRev, benchmarks) { | |
std::cout << "method, N, time" << std::endl; | |
for(int N = 100; N <= 500; N += 100) { | |
for(int j = 0; j < 5; ++j) { | |
std::cout << "autodiffed, " << N << ", " << run_benchmark(stan::math::AddFunctorAutodiffed, N, 500) << std::endl; | |
std::cout << "inefficient, " << N << ", " << run_benchmark([](auto x1, auto x2) { return stan::math::adj_jac_apply<stan::math::AddFunctorInefficient>(x1, x2); }, N, 500) << std::endl; | |
std::cout << "efficient, " << N << ", " << run_benchmark([](auto x1, auto x2) { return stan::math::adj_jac_apply<stan::math::AddFunctor>(x1, x2); }, N, 500) << std::endl; | |
} | |
} | |
} | |
TEST(AgradRev, test_grads_match) { | |
using stan::math::var; | |
stan::math::recover_memory(); | |
Eigen::Matrix<stan::math::var, Eigen::Dynamic, 1> x1(5); | |
Eigen::Matrix<stan::math::var, Eigen::Dynamic, 1> x2(5); | |
x1 << 1.0, 2.0, 3.0, 4.0, 5.0; | |
x2 << 6.0, 7.0, 8.0, 9.0, 10.0; | |
Eigen::Matrix<stan::math::var, Eigen::Dynamic, 1> y1 = stan::math::adj_jac_apply<stan::math::AddFunctor>(x1, x2); | |
for(int j = 0; j < 5; ++j) { | |
stan::math::set_zero_all_adjoints(); | |
y1(j).grad(); | |
for(int i = 0; i < 5; ++i) { | |
if(i == j) { | |
EXPECT_FLOAT_EQ(1.0, x1(i).adj()); | |
EXPECT_FLOAT_EQ(1.0, x2(i).adj()); | |
} else { | |
EXPECT_FLOAT_EQ(0.0, x1(i).adj()); | |
EXPECT_FLOAT_EQ(0.0, x2(i).adj()); | |
} | |
} | |
} | |
Eigen::Matrix<stan::math::var, Eigen::Dynamic, 1> y2 = stan::math::adj_jac_apply<stan::math::AddFunctorInefficient>(x1, x2); | |
for(int j = 0; j < 5; ++j) { | |
stan::math::set_zero_all_adjoints(); | |
y2(j).grad(); | |
for(int i = 0; i < 5; ++i) { | |
if(i == j) { | |
EXPECT_FLOAT_EQ(1.0, x1(i).adj()); | |
EXPECT_FLOAT_EQ(1.0, x2(i).adj()); | |
} else { | |
EXPECT_FLOAT_EQ(0.0, x1(i).adj()); | |
EXPECT_FLOAT_EQ(0.0, x2(i).adj()); | |
} | |
} | |
} | |
Eigen::Matrix<stan::math::var, Eigen::Dynamic, 1> y3 = stan::math::AddFunctorAutodiffed(x1, x2); | |
for(int j = 0; j < 5; ++j) { | |
stan::math::set_zero_all_adjoints(); | |
y3(j).grad(); | |
for(int i = 0; i < 5; ++i) { | |
if(i == j) { | |
EXPECT_FLOAT_EQ(1.0, x1(i).adj()); | |
EXPECT_FLOAT_EQ(1.0, x2(i).adj()); | |
} else { | |
EXPECT_FLOAT_EQ(0.0, x1(i).adj()); | |
EXPECT_FLOAT_EQ(0.0, x2(i).adj()); | |
} | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stan/math/rev/core.hpp> | |
#include <test/unit/math/rev/mat/util.hpp> | |
#include <gtest/gtest.h> | |
#include <algorithm> | |
#include <sstream> | |
#include <tuple> | |
#include <vector> | |
#include <chrono> | |
namespace stan { | |
namespace math { | |
struct SimplexConstrainAutodiffed { | |
template <typename T> | |
Eigen::Matrix<T, Eigen::Dynamic, 1> operator()( | |
const Eigen::Matrix<T, Eigen::Dynamic, 1>& y) { | |
// cut & paste simplex_constrain(Eigen::Matrix, T) w/o Jacobian | |
using Eigen::Dynamic; | |
using Eigen::Matrix; | |
using std::log; | |
typedef typename index_type<Matrix<T, Dynamic, 1> >::type size_type; | |
int Km1 = y.size(); | |
Matrix<T, Dynamic, 1> x(Km1 + 1); | |
T stick_len(1.0); | |
for (size_type k = 0; k < Km1; ++k) { | |
T z_k(stan::math::inv_logit(y(k) - log(Km1 - k))); | |
x(k) = stick_len * z_k; | |
stick_len -= x(k); | |
} | |
x(Km1) = stick_len; | |
return x; | |
} | |
}; | |
} // namespace math | |
} // namespace stan | |
template <typename F> | |
double run_benchmark(F f, int N, int R) { | |
stan::math::recover_memory(); | |
EXPECT_EQ(0, stan::math::ChainableStack::instance().var_stack_.size()); | |
EXPECT_EQ(0, | |
stan::math::ChainableStack::instance().var_nochain_stack_.size()); | |
Eigen::Matrix<stan::math::var, Eigen::Dynamic, 1> x(N); | |
for (int i = 0; i < N; ++i) { | |
x(i) = i / static_cast<double>(N); | |
} | |
stan::math::start_nested(); | |
/** | |
* By nesting y, we'll only be calling chain on the output variables. | |
* | |
* If you put the forward calculation of y inside the loop | |
* then you have to make sure you nest/unnest each iteration so | |
* you aren't dirtying up the autodiff stack. | |
* | |
* The results are about the same, and this focuses the benchmark | |
* on the reverse mode. | |
*/ | |
stan::math::var y = stan::math::sum(f(x)); | |
auto start = std::chrono::high_resolution_clock::now(); | |
for (int i = 0; i < R; ++i) { | |
stan::math::grad(y.vi_); | |
stan::math::set_zero_all_adjoints_nested(); | |
} | |
double time = std::chrono::duration<double>( | |
std::chrono::high_resolution_clock::now() - start) | |
.count() | |
/ R; | |
stan::math::recover_memory_nested(); | |
return time; | |
} | |
TEST(AgradRev, benchmarks) { | |
std::cout << "method, N, time" << std::endl; | |
for (int j = 0; j < 5; ++j) { | |
for (int N = 10; N <= 200; N += 10) { | |
std::cout << "autodiffed, " << N << ", " | |
<< run_benchmark(stan::math::SimplexConstrainAutodiffed{}, N, | |
500) | |
<< std::endl; | |
std::cout << "efficient, " << N << ", " | |
<< run_benchmark( | |
[](auto x) { | |
return stan::math::adj_jac_apply< | |
stan::math::simplex_constrain_op>(x); | |
}, | |
N, 500) | |
<< std::endl; | |
} | |
} | |
} | |
TEST(AgradRev, test_grads_match) { | |
using stan::math::var; | |
stan::math::recover_memory(); | |
Eigen::Matrix<stan::math::var, Eigen::Dynamic, 1> x1(5); | |
x1 << 1.0, 2.0, 3.0, 4.0, 5.0; | |
Eigen::Matrix<stan::math::var, Eigen::Dynamic, 1> y1 | |
= stan::math::adj_jac_apply<stan::math::simplex_constrain_op>(x1); | |
Eigen::Matrix<stan::math::var, Eigen::Dynamic, 1> y2 | |
= stan::math::SimplexConstrainAutodiffed{}(x1); | |
for (int i = 0; i < y1.size(); ++i) { | |
EXPECT_FLOAT_EQ(y1(i).val(), y2(i).val()); | |
} | |
for (int i = 0; i < y1.size(); ++i) { | |
stan::math::set_zero_all_adjoints(); | |
Eigen::VectorXd adjs(x1.size()); | |
y1(i).grad(); | |
for (int j = 0; j < x1.size(); ++j) { | |
adjs(j) = x1(j).adj(); | |
} | |
stan::math::set_zero_all_adjoints(); | |
y2(i).grad(); | |
for (int j = 0; j < x1.size(); ++j) { | |
EXPECT_FLOAT_EQ(adjs(j), x1(j).adj()); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment