Skip to content

Instantly share code, notes, and snippets.

@t4c1
Created May 23, 2019 14:13
Show Gist options
  • Save t4c1/bb442ab936def8b0bf24f86fcb43363c to your computer and use it in GitHub Desktop.
Save t4c1/bb442ab936def8b0bf24f86fcb43363c to your computer and use it in GitHub Desktop.
benchamer for stan math ordinal regression
#define STAN_OPENCL
#define STAN_OPENCL_CACHE
#define OPENCL_PLATFORM_ID 0
#define OPENCL_DEVICE_ID 0
#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
#define __CL_ENABLE_EXCEPTIONS
#include <stan/math/prim/scal/meta/partials_return_type.hpp>
#include <stan/math.hpp>
#include <Eigen/Dense>
#include <vector>
#include <cstdio>
#include <chrono>
#include <cmath>
using namespace Eigen;
using namespace stan;
using namespace stan::math;
using namespace std;
inline double log1p_exp1(double a) {
if (a > 0.0)
return a + std::log1p(exp(-a));
return std::log1p(exp(a));
}
inline double log1p_exp2(double a) {
return a * (a > 0.0) + std::log1p(exp(-std::abs(a)));
}
void test_log1p_exp() {
int N = 200000000;
// vector<double> v1, v2;
// for (int i = 0; i < N; i++) {
// double j = Matrix<double, Dynamic, 1>::Random(1)[0]-0.5;
// if (log1p_exp1(j)!=log1p_exp2(j)){
// cout << j << " " << log1p_exp1(j) << " " << log1p_exp2(j) << endl;
// }
// v1.push_back(j);
// v2.push_back(0);
// }
for(int j=0;j<5;j++) {
double t = 10;
auto start = std::chrono::steady_clock::now();
for (int i = 0; i < N; i++) {
t = stan::math::log1p_exp(t) * (Matrix<double, Dynamic, 1>::Random(1)[0]-0.5);
}
int time1 = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - start).count();
t = 10;
start = std::chrono::steady_clock::now();
for (int i = 0; i < N; i++) {
t = log1p_exp2(t) * (Matrix<double, Dynamic, 1>::Random(1)[0]-0.5);
}
int time2 = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - start).count();
cout << time1 << " " << time2 << endl;
}
}
template <bool propto, typename T_y, typename T_x, typename T_beta, typename T_cuts>
typename stan::return_type<T_x, T_beta, T_cuts>::type
ordered_logistic_glm_simple_lpmf(
const T_y& y, const Matrix<T_x, Dynamic, Dynamic>& x,
const T_beta& beta, const T_cuts& cuts) {
typedef typename stan::return_type<T_x, T_beta>::type T_x_beta;
using stan::math::as_column_vector_or_scalar;
auto& beta_col = as_column_vector_or_scalar(beta);
Eigen::Matrix<T_x_beta, Dynamic, 1> location = x.template cast<T_x_beta>() * beta_col.template cast<T_x_beta>();
return ordered_logistic_lpmf<propto>(y, location, cuts);
}
template<typename T>
void perf_test(const char* name, T func, const size_t N_instances = 30000, const size_t N_attributes = 1000, const size_t N_classes = 20, int repeats = 10){
Matrix<int, Dynamic, 1> y(N_instances);
for(int i=0;i<N_instances;i++){
y[i] = Matrix<unsigned int, Dynamic, 1>::Random(1)[0] % N_classes + 1;
}
MatrixXd x = MatrixXd::Random(N_instances, N_attributes);
volatile VectorXd beta_ = VectorXd::Random(N_attributes);
volatile VectorXd cuts_ = (VectorXd::Random(N_classes-1).array()+1.1)/2/N_classes;
for(int i=1;i<N_classes-1;i++){
const_cast<Matrix<double, Dynamic, 1>*>(&cuts_)->coeffRef(i)+=const_cast<Matrix<double, Dynamic, 1>*>(&cuts_)->coeff(i-1);
}
check_bounded("", "Vector of dependent variables", y, 1, N_classes);
check_finite("", "Matrix of independent variables", x);
#ifdef STAN_OPENCL
#endif
auto start = std::chrono::steady_clock::now();
for (int i = 0; i < repeats; i++) {
Matrix<var, Dynamic, 1> cuts = *const_cast<Matrix<double, Dynamic, 1>*>(&cuts_);
Matrix<var, Dynamic, 1> beta = *const_cast<VectorXd*>(&beta_);
vector<var> grad_args(beta.data(), beta.data() + beta.size());
for(int j=0;j<N_classes-1;j++) {
grad_args.push_back(cuts[j]);
}
var target = func(y, x, beta, cuts);
volatile double no_opt = target.val();
vector<double> grad;
target.grad(grad_args, grad);
for (double val : grad) {
no_opt = val;
}
recover_memory();
}
int time = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - start).count();
printf("%s, %d, %d, %d, %d, %d\n", name, N_instances, N_attributes, N_classes, repeats, time);
recover_memory();
ChainableStack::instance().memalloc_.free_all();
}
int main(){
// perf_test("new", ordered_logistic_glm_lpmf<false, Matrix<int,Dynamic,1>, double, Matrix<var, Dynamic, 1>, Matrix<var, Dynamic, 1>>, 500000, 300, 100);
// perf_test("new", ordered_logistic_glm_lpmf<false, Matrix<int,Dynamic,1>, double, Matrix<var, Dynamic, 1>, Matrix<var, Dynamic, 1>>, 500000, 300, 3);
// perf_test("new", ordered_logistic_glm_lpmf<false, Matrix<int,Dynamic,1>, double, Matrix<var, Dynamic, 1>, Matrix<var, Dynamic, 1>>, 1000000, 3, 1000);
// perf_test("old", ordered_logistic_glm_simple_lpmf<false, Matrix<int,Dynamic,1>, double, Matrix<var, Dynamic, 1>, Matrix<var, Dynamic, 1>>, 50000, 300, 100);
// perf_test("old", ordered_logistic_glm_simple_lpmf<false, Matrix<int,Dynamic,1>, double, Matrix<var, Dynamic, 1>, Matrix<var, Dynamic, 1>>, 50000, 300, 3);
// perf_test("old", ordered_logistic_glm_simple_lpmf<false, Matrix<int,Dynamic,1>, double, Matrix<var, Dynamic, 1>, Matrix<var, Dynamic, 1>>, 1000000, 3, 1000);
// perf_test("old", ordered_logistic_glm_simple_lpmf<false, Matrix<int,Dynamic,1>, double, Matrix<var, Dynamic, 1>, Matrix<var, Dynamic, 1>>, 1000000, 3, 3);
for(int i=0;i<5;i++) {
for(int instances : {5000, 10000, 20000, 30000, 40000, 50000}){
perf_test("new", ordered_logistic_glm_lpmf<false, Matrix<int,Dynamic,1>, double, Matrix<var, Dynamic, 1>, Matrix<var, Dynamic, 1>>, instances, 300, 100);
perf_test("old", ordered_logistic_glm_simple_lpmf<false, Matrix<int,Dynamic,1>, double, Matrix<var, Dynamic, 1>, Matrix<var, Dynamic, 1>>, instances, 300, 100);
}
for(int instances : {5000, 10000, 20000, 30000, 40000, 50000}){
perf_test("new", ordered_logistic_glm_lpmf<false, Matrix<int,Dynamic,1>, double, Matrix<var, Dynamic, 1>, Matrix<var, Dynamic, 1>>, instances, 300, 3);
perf_test("old", ordered_logistic_glm_simple_lpmf<false, Matrix<int,Dynamic,1>, double, Matrix<var, Dynamic, 1>, Matrix<var, Dynamic, 1>>, instances, 300, 3);
}
for(int instances : {30000, 100000, 200000, 400000, 600000, 800000, 1000000}){
perf_test("new", ordered_logistic_glm_lpmf<false, Matrix<int,Dynamic,1>, double, Matrix<var, Dynamic, 1>, Matrix<var, Dynamic, 1>>, instances, 3, 1000);
perf_test("old", ordered_logistic_glm_simple_lpmf<false, Matrix<int,Dynamic,1>, double, Matrix<var, Dynamic, 1>, Matrix<var, Dynamic, 1>>, instances, 3, 1000);
}
for(int instances : {30000, 100000, 200000, 400000, 600000, 800000, 1000000}){
perf_test("new", ordered_logistic_glm_lpmf<false, Matrix<int,Dynamic,1>, double, Matrix<var, Dynamic, 1>, Matrix<var, Dynamic, 1>>, instances, 3, 3);
perf_test("old", ordered_logistic_glm_simple_lpmf<false, Matrix<int,Dynamic,1>, double, Matrix<var, Dynamic, 1>, Matrix<var, Dynamic, 1>>, instances, 3, 3);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment