boennecd/laplaceSurvTmbIssue.cpp

## laplaceSurvTmbIssue.cpp
/// tmb_includes.h
#ifndef TMB_INCLUDES_H
#define TMB_INCLUDES_H

/* TMB */
#include <TMB.hpp>

namespace CppAD {
inline double Value(double const x){
  return x;
}
}

template<class Type>
Type vec_dot(Type const *, Type const *, size_t const);

template<class Type>
Type vec_dot(vector<Type> const &x, vector<Type> const &y){
#ifdef DO_CHECKS
  if(x.size() != y.size())
    throw std::invalid_argument("vec_dot<Type>: dimension do not match");
#endif
  return vec_dot(x.data(), y.data(), x.size());
}

template<class Type>
Type vec_dot
  (Eigen::Map<const Eigen::Matrix<Type,Eigen::Dynamic,1> > &x,
   vector<Type> const &y){
#ifdef DO_CHECKS
  if(x.size() != y.size())
    throw std::invalid_argument("vec_dot<Type>: dimension do not match");
#endif
  if(x.innerStride() == 1L)
    return vec_dot(x.data(), y.data(), x.size());

  Type out(0.);
  for(int i = 0; i < x.size(); ++i)
    out += x[i] * y[i];
  return out;
}

template<class Type>
Type quad_form(Type const*, Type const*, Type const*, size_t const);

template<class Type>
Type quad_form
  (vector<Type> const &x, matrix<Type> const &A, vector<Type> const &y){
#ifdef DO_CHECKS
  if(x.size() != A.rows() or y.size() != A.cols())
    throw std::invalid_argument("quad_form<Type>: dimension do not match");
  if(A.rows() != A.cols())
    throw std::invalid_argument("quad_form<Type>: invalid A");
#endif
  return quad_form(x.data(), A.data(), y.data(), A.cols());
}

template<class Type>
Type quad_form_sym(Type const*, Type const*, size_t const);

template<class Type>
Type quad_form_sym(vector<Type> const &x, matrix<Type> const &A){
#ifdef DO_CHECKS
  if(x.size() != A.rows() or x.size() != A.cols())
    throw std::invalid_argument(
        "quad_form_sym<Type>: dimension do not match");
#endif
  return quad_form_sym(x.data(), A.data(), A.cols());
}

template<class Type>
Type mat_mult_trace(matrix<Type> const &X, matrix<Type> const &Y){
#ifdef DO_CHECKS
  if(X.rows() != Y.cols() or X.cols() != Y.rows())
    throw std::invalid_argument(
        "mat_mult_trace<Type>: dimension do not match");
#endif
  return vec_dot(X.data(), Y.data(), X.rows() * X.cols());
}

#endif // ifndef TMB_INCLUDES_H

/// src/common.h
/* Common arguments used in approximations.
 *
 * Args:
 *   tobs: observed times.
 *   event: zero/one variables for whether the event is observed.
 *   X: fixed effect design matrix.
 *   XD: derivative of X wrt time.
 *   Z: random effect design matrix.
 *   grp: integer vector with group identifier.
 *   eps: small tolerance for positivity constraint on hazards.
 *   kappa: one-sided L2 penalty for positivity constraint.
 *   b: fixed effect coefficients.
 *   theta: covariance matrix parameters. See get_vcov_from_trian.
 */
#ifndef COMMON_ARGS
#define COMMON_ARGS(TYPE, ACCUMLATOR)                          \
  ACCUMLATOR<TYPE> &result,                                    \
  vector<TYPE> const &tobs, vector<TYPE> const &event,         \
  matrix<TYPE> const &X, matrix<TYPE> const &XD,               \
 matrix<TYPE> const &Z, vector<int> const &grp,                \
 TYPE const &eps, TYPE const &kappa, vector<TYPE> const &b,    \
  vector<TYPE> const &theta, std::string const &link,          \
  vector<int> const &grp_size
#endif

#ifndef COMMON_CALL
#define COMMON_CALL                                              \
  result, tobs, event, X, XD, Z, grp, eps, kappa, b, theta,      \
  link, grp_size
#endif

#ifndef SETUP_DATA
#define SETUP_DATA                                             \
  /* common data objects and parameters */                     \
  DATA_STRING(app_type);                                       \
  DATA_VECTOR(tobs);                                           \
                                                               \
  DATA_VECTOR(event);                                          \
  DATA_MATRIX(X);                                              \
  DATA_MATRIX(XD);                                             \
  DATA_MATRIX(Z);                                              \
  DATA_IVECTOR(grp);                                           \
  DATA_STRING(link);                                           \
  DATA_IVECTOR(grp_size);                                      \
                                                               \
  /* They are marked as parameter such that the user can       \
   * change them later */                                      \
  PARAMETER(eps);                                              \
  PARAMETER(kappa);                                            \
                                                               \
  PARAMETER_VECTOR(b);                                         \
  PARAMETER_VECTOR(theta);
#endif

#ifndef SETUP_DATA_CHECK
#define SETUP_DATA_CHECK                                       \
  /* checks */                                                 \
  {                                                            \
    unsigned const n = tobs.size();                            \
    auto check_rows = [n](matrix<Type> const &x,               \
                          char const *msg){                    \
      if(x.rows() != n)                                        \
        error(msg);                                            \
    };                                                         \
    check_rows(X , "invalid 'X'");                             \
    check_rows(XD, "invalid 'XD'");                            \
    check_rows(Z , "invalid 'Z'");                             \
    if(n != event.size())                                      \
      error("invalid 'event'");                                \
    if(n != grp.size())                                        \
      error("invalid 'grp'");                                  \
                                                               \
    if(b.size()  != X.cols())                                  \
      error("invalid 'b'");                                    \
    if(XD.cols() != X.cols())                                  \
      error("invalid 'XD' (# columns)");                       \
                                                               \
    if(Z.cols() != get_rng_dim(theta))                \
      error("invalid 'Z' (# columns: %d %d)", Z.cols(),        \
            get_rng_dim(theta));                      \
                                                               \
    std::size_t grp_size_sum = 0;                              \
    for(int i = 0; i < grp_size.size(); ++i)                   \
      grp_size_sum += grp_size[i];                             \
    if(n != grp_size_sum)                                      \
      error("invalid 'grp_size'");                             \
                                                               \
    int g_max = 0;                                             \
    for(int i = 0; i < grp.size(); ++i)                        \
      g_max = std::max(grp[i], g_max);                         \
    if(g_max + 1L != grp_size.size())                          \
      error(                                                   \
        "invalid 'grp_size' (does not match with number of groups)"); \
                                                               \
    for(int i = 1L; i < grp.size(); ++i){                      \
      if(grp[i - 1L] > grp[i])                                 \
        error("'grp' is not sorted");                          \
      if(grp[i - 1L] - grp[i] < -1L)                           \
        error("too big gap in 'grp'");                         \
    }                                                          \
  }
#endif

/// fast-commutation.h

#ifndef FAST_COMMUTATION_H
#define FAST_COMMUTATION_H
#include <memory>

std::unique_ptr<size_t[]> get_commutation_unequal_vec
  (unsigned const, unsigned const, bool const);

size_t const * get_commutation_unequal_vec_cached(unsigned const);

#endif // ifndef FAST_COMMUTATION_H

/// fast-commutation.cpp
std::unique_ptr<size_t[]> get_commutation_unequal_vec
  (unsigned const n, unsigned const m, bool const transpose){
  unsigned const nm = n * m,
            nnm_p1 = n * nm + 1L,
             nm_pm = nm + m;
  std::unique_ptr<size_t[]> out(new size_t[nm]);
  size_t * const o_begin = out.get();
  size_t idx = 0L;
  for(unsigned i = 0; i < n; ++i, idx += nm_pm){
    size_t idx1 = idx;
    for(unsigned j = 0; j < m; ++j, idx1 += nnm_p1)
      if(transpose)
        *(o_begin + idx1 / nm) = (idx1 % nm);
      else
        *(o_begin + idx1 % nm) = (idx1 / nm);
  }

  return out;
}

size_t const * get_commutation_unequal_vec_cached(unsigned const n){
  constexpr std::size_t n_cache = 1000L;
  if(n > n_cache or n == 0L)
    throw std::invalid_argument(
        "get_commutation_unequal_vec_cached: invalid n (too large or zero)");

  static std::array<std::unique_ptr<size_t[]>, n_cache> cached_values;

  unsigned const idx = n - 1L;
  bool has_value = cached_values[idx].get();

  if(has_value)
    return cached_values[idx].get();

#ifdef _OPENMP
#pragma omp critical (coomuCached)
{
#endif
  has_value = cached_values[idx].get();
  if(!has_value){
    std::unique_ptr<size_t[]> new_val =
      get_commutation_unequal_vec(n, n, false);
    cached_values[idx].swap(new_val);
  }
#ifdef _OPENMP
}
#endif
  return cached_values[idx].get();
}

/// utils.h
#ifndef SURVTMB_UTILS_H
#define SURVTMB_UTILS_H

#ifdef _OPENMP
#include "omp.h"
#endif
#include <memory>

inline unsigned get_rng_dim(unsigned const n_vcov_params){
  double const n(n_vcov_params);
  return std::lround(std::sqrt(.25 + 2 * n) - .5);
}

template<class Type>
unsigned get_rng_dim(vector<Type> x){
  return get_rng_dim(x.size());
}

/* Returns the covariance matrix from a log Cholesky decomposition
*/
template<class Type>
class get_vcov_from_trian_atomic : public CppAD::atomic_base<Type> {
public:
  get_vcov_from_trian_atomic(char const *name):
  CppAD::atomic_base<Type>(name) {
    this->option(CppAD::atomic_base<Type>::bool_sparsity_enum);
  }

  template<class T>
  static void comp(T const *x, T * y, size_t const n_arg){
    // TODO: can be done smarter...
    typename
    Eigen::Map<Eigen::Matrix<T,Eigen::Dynamic,Eigen::Dynamic> >
      out(y, n_arg, n_arg);
    out.setZero();

    for(size_t c = 0; c < n_arg; ++c){
      for(size_t r = c; r < n_arg; ++r)
        out(r, c) = *x++;
    }

    // out * out.t()
    for(size_t c = n_arg; c-- > 0;){
      for(size_t r = c + 1L; r-- > 0;){
        Type new_term(0);
        for(size_t k = 0; k <= r; ++k)
          new_term += out(c, k) * out(r, k);
        out(c, r) = new_term;
        out(r, c) = new_term;
      }
    }
  }

  virtual bool forward(std::size_t p, std::size_t q,
                       const CppAD::vector<bool> &vx,
                       CppAD::vector<bool> &vy,
                       const CppAD::vector<Type> &tx,
                       CppAD::vector<Type> &ty){
    if(q > 0)
      return false;

    size_t const n = std::lround(
      .5 * (std::sqrt(8. * static_cast<double>(tx.size()) + 1.) - 1.));
    comp(&tx[0], &ty[0], n);

    /* set variable flags */
    if (vx.size() > 0) {
      bool anyvx = false;
      for (std::size_t i = 0; i < vx.size() and !anyvx; i++)
        anyvx |= vx[i];
      for (std::size_t i = 0; i < vy.size(); i++)
        vy[i] = anyvx;
    }

    return true;
  }

  virtual bool reverse(std::size_t q, const CppAD::vector<Type> &tx,
                       const CppAD::vector<Type> &ty,
                       CppAD::vector<Type> &px,
                       const CppAD::vector<Type> &py){
    if(q > 0)
      return false;

    size_t const n = std::lround(
      .5 * (std::sqrt(8. * static_cast<double>(tx.size()) + 1.) - 1.)),
                nn = n * n;

    size_t const * const com_vec = get_commutation_unequal_vec_cached(n);
    size_t bri(0L),
           brl(0L);

    for(size_t i = 0; i < px.size(); ++i)
      px[i] = Type(0.);

    for(size_t i = 0L; i < nn;
        bri = bri + 1L >= n ? 0L : bri + 1L,
        brl = bri == 0L ? brl + 1L : brl,
        ++i){
      size_t const idx_l = *(com_vec + i);
      Type const lfs = py[idx_l] + py[i];

      size_t idx_k(bri);
      for(size_t k = 0; k <= bri; idx_k += n - k, ++k){
        if(brl >= k){
          Type const rfs = tx[
            brl - k /* row index */ + idx_k - bri /* col index */];
          px[idx_k - k] += lfs * rfs;
        }
      }
    }

    return true;
  }

  virtual bool rev_sparse_jac(size_t q, const CppAD::vector<bool>& rt,
                              CppAD::vector<bool>& st) {
    bool anyrt = false;
    for (std::size_t i = 0; i < rt.size() and !anyrt; i++)
      anyrt |= rt[i];
    for (std::size_t i = 0; i < st.size(); i++)
      st[i] = anyrt;
    return true;
  }

  static get_vcov_from_trian_atomic<Type>& get_cached();
};

template<class Type>
matrix<AD<Type> >
get_vcov_from_trian(AD<Type> const *vals, unsigned const dim){
  size_t const n_ele = (dim * (dim + 1L)) / 2L;
  matrix<AD<Type> > out(dim, dim);
  CppAD::vector<AD<Type> > x_vals(n_ele);

  for(size_t i = 0; i < n_ele; ++i)
    x_vals[i] = *(vals + i);
  size_t cc = 0L;
  for(size_t i = dim + 1L; i-- > 1; cc += i)
    x_vals[cc] = exp(x_vals[cc]);

  static get_vcov_from_trian_atomic<Type> &functor =
    get_vcov_from_trian_atomic<Type>::get_cached();

  typename Eigen::Map<Eigen::Matrix<AD<Type> ,Eigen::Dynamic,1> >
    tx(x_vals.data(), n_ele),
    ty(out.data(), dim * dim);

  functor(tx, ty);

  return out;
}

inline matrix<double>
get_vcov_from_trian(double const *vals, unsigned const dim){
  matrix<double> out(dim, dim);
  size_t const n_ele = (dim * (dim + 1L)) / 2L;
  vector<double> x_vals(n_ele);
  for(size_t i = 0; i < n_ele; ++i)
    x_vals[i] = *(vals + i);
  size_t cc = 0L;
  for(size_t i = dim + 1L; i-- > 1; cc += i)
    x_vals[cc] = exp(x_vals[cc]);

  double *p_out = out.data();
  get_vcov_from_trian_atomic<double>::comp(x_vals.data(), p_out, dim);

  return out;
}

template<class Type>
matrix<Type>
get_vcov_from_trian(vector<Type> const &theta){
  unsigned const dim = get_rng_dim(theta);
  return get_vcov_from_trian(&theta[0], dim);
}

/* unlike objective_function::parallel_region this does not increase the
 * counter
 */
template<class Type>
bool is_my_region(objective_function<Type> const &o){
  if(o.current_parallel_region < 0 || o.selected_parallel_region < 0)
    /* Serial mode */
    return true;
  if(o.selected_parallel_region == o.current_parallel_region &&
     (!o.parallel_ignore_statements))
    return true;
  return false;
}

#endif // ifndef SURVTMB_UTILS_H

/// utils.cpp
#include <array>
#include <memory.h>
#include <string.h>

using ADd   = CppAD::AD<double>;
using ADdd  = CppAD::AD<CppAD::AD<double> >;
using ADddd = CppAD::AD<CppAD::AD<CppAD::AD<double> > >;

static get_vcov_from_trian_atomic<double> get_vcov_from_trian_d
  ("get_vcov_from_trian_atomic<double>");
static get_vcov_from_trian_atomic<ADd   > get_vcov_from_trian_ADd
  ("get_vcov_from_trian_atomic<ADd>");
static get_vcov_from_trian_atomic<ADdd  > get_vcov_from_trian_ADdd
  ("get_vcov_from_trian_atomic<ADdd>");
static get_vcov_from_trian_atomic<ADddd > get_vcov_from_trian_ADddd
  ("get_vcov_from_trian_atomic<ADddd>");

template<>
get_vcov_from_trian_atomic<double>&
get_vcov_from_trian_atomic<double>::get_cached(){
  return get_vcov_from_trian_d;
}


template<>
get_vcov_from_trian_atomic<ADd>&
get_vcov_from_trian_atomic<ADd>::get_cached(){
  return get_vcov_from_trian_ADd;
}

template<>
get_vcov_from_trian_atomic<ADdd>&
get_vcov_from_trian_atomic<ADdd>::get_cached(){
  return get_vcov_from_trian_ADdd;
}

template<>
get_vcov_from_trian_atomic<ADddd>&
get_vcov_from_trian_atomic<ADddd>::get_cached(){
  return get_vcov_from_trian_ADddd;
}

/// src/laplace.h
/* Computes the log-likelihood for given random effects.
 *
 * Args:
 *   COMMON_ARGS: see the other header file.
 *   u: [random effect dim] x [# groups] matrix with random effects.
 */
template<class Type>
void laplace(COMMON_ARGS(Type, parallel_accumulator), matrix<Type> const &u);

/// dmvnorm_log.h

#ifndef DMVNORM_LOG_H
#define DMVNORM_LOG_H

template<class Type>
Type mult_var_dens(vector<Type> const &theta, matrix<Type> const &rngs){
  if(theta.size() < 2L){ /* univariate case */
    Type const sd = exp(theta[0]),
      half(.5);
    Type out(0);
    Type const * const end = rngs.data() + rngs.size();
    for(Type const *x = rngs.data(); x != end; ++x){
      Type const scaled = *x / sd;
      out += - half * scaled * scaled;
    }

    out += - Type(rngs.size()) * log(Type(sqrt(2 * M_PI)) * sd);

    return out;
  }

  matrix<Type> const vcov = get_vcov_from_trian(theta);
  density::MVNORM_t<Type> norm_dist(vcov);
  Type out(0);
  for(unsigned g = 0; g < rngs.cols(); ++g)
    out -= norm_dist(rngs.col(g));

  return out;
}

#endif // ifndef DMVNORM_LOG_H

/// pnorm-log.h
#ifndef PNORM_LOG_H
#define PNORM_LOG_H

namespace atomic {
/* Computes log CDF of standard normal distribution */
TMB_ATOMIC_VECTOR_FUNCTION(
  // ATOMIC_NAME
  pnorm_log1
  ,
  // OUTPUT_DIM
  1
  ,
  // ATOMIC_DOUBLE
  ty[0] = Rmath::Rf_pnorm5(tx[0], 0, 1, 1, 1);
  ,
  // ATOMIC_REVERSE
  px[0] = py[0];
  if(CppAD::Value(tx[0]) > -10){
    Type const cdf = exp(ty[0]);
    px[0] *= dnorm1(tx[0]) / cdf;
  } else {
      Type const log_pdf = dnorm(tx[0], Type(0.), Type(1.), 1L);
    px[0] *= exp(log_pdf - ty[0]);
  })
} // namespace atomic

/* Computes the log CDF of normal distribution.
 *
 * Args:
 *   Similar to `stats::pnorm`.
 */
template<class Type>
Type pnorm_log(Type q, Type  mean = 0., Type sd = 1.){
  CppAD::vector<Type> tx(1);
  tx[0] = (q - mean) / sd;
  return atomic::pnorm_log1(tx)[0];
}
VECTORIZE3_ttt(pnorm_log)
VECTORIZE1_t  (pnorm_log)

#endif // ifndef PNORM_LOG_H

/// laplace.cpp

template<class Type>
Type laplace_PH_terms
  (Type const &eta, Type const &etaD, Type const &event,
   Type const &eps, Type const &eps_log, Type const &kappa){
  Type const H = exp(eta),
             h = etaD * H,
        if_low = event * eps_log - H - h * h * kappa,
        if_ok  = event * log(h)  - H;
  return CppAD::CondExpGe(h, eps, if_ok, if_low);
}

template<class Type>
Type laplace_PO_terms
  (Type const &eta, Type const &etaD, Type const &event,
   Type const &eps, Type const &eps_log, Type const &kappa){
  Type const too_large(30.),
                   one(1.);

  Type const H = CppAD::CondExpGe(
    eta, too_large, eta, log(one + exp(eta))),
             h = etaD * exp(eta - H),
        if_low = event * eps_log - H - h * h * kappa,
        if_ok  = event * log(h)  - H;
  return CppAD::CondExpGe(h, eps, if_ok, if_low);
}

template<class Type>
Type laplace_probit_terms
  (Type const &eta, Type const &etaD, Type const &event,
   Type const &eps, Type const &eps_log, Type const &kappa){
  Type const tiny(std::numeric_limits<double>::epsilon()),
             zero(0.),
              one(1.);

  Type const H = -pnorm_log(-eta),
             h = etaD * dnorm(-eta, zero, one) /
               (pnorm(-eta) + tiny),
        if_low = event * eps_log - H - h * h * kappa,
        if_ok  = event * log(h)  - H;
  return CppAD::CondExpGe(h, eps, if_ok, if_low);
}

template<class Type>
void laplace(COMMON_ARGS(Type, parallel_accumulator),
             matrix<Type> const &u){
  /* checks */
  unsigned const rng_dim = get_rng_dim(theta);
  {
    int const *max_grp = std::max_element(grp.data(), grp.data() + grp.size());
    if(!max_grp or *max_grp != u.cols() - 1L)
      error("Invalid 'grp' or 'u'");
    if(rng_dim != u.rows())
      error("Invalid 'u'");
  }

  /* log-likelihood terms from conditional distribution of the observed
   * outcomes */
  Type  const eps_log = log(eps);
  auto const b_vec = b.matrix().col(0);

  /* compute terms from conditional density */
  typedef Type (*loop_func)(
      Type const&, Type const&, Type const&,
      Type const&, Type const&, Type const&);

  auto cond_dens_loop = [&](loop_func func){
    unsigned i(0L);
    for(unsigned g = 0; g < grp_size.size(); ++g){
      unsigned const n_members = grp_size[g];
      /* do we need to anything on this thread? */
      if(!is_my_region(*result.obj)){
        i += n_members;
        result.obj->parallel_region();
        continue;
      }

      /* compute linear predictor etc. */
      auto const u_g = u.col(g);
      unsigned const end_i(i + n_members);
      vector<Type> const eta = ([&](){
        vector<Type> out(n_members);
        unsigned k(0L);
        for(unsigned j = i; j < end_i; ++j, ++k){
          out[k]  = (X.row(j) * b_vec)[0];
          out[k] += Z.row(j) * u_g;
        }

        return out;
      })();
      vector<Type> const etaD = ([&](){
        vector<Type> out(n_members);
        unsigned k(0L);
        for(unsigned j = i; j < end_i; ++j, ++k)
          out[k] = (XD.row(j) * b_vec)[0];

        return out;
      })();

      Type next_term(0.);
      for(unsigned k = 0; k < n_members; ++k, ++i)
        next_term += func(
          eta[k], etaD[k], event[i], eps, eps_log, kappa);

      result -= next_term;
    }
  };

  if(link == "PH")
    cond_dens_loop(laplace_PH_terms<Type>);
  else if (link == "PO")
    cond_dens_loop(laplace_PO_terms<Type>);
  else if(link == "probit")
    cond_dens_loop(laplace_probit_terms<Type>);
  else
    error("'%s' not implemented", link.c_str());

  /* log-likelihood terms from random effect density */
  result -= mult_var_dens(theta, u);
}

using ADd   = CppAD::AD<double>;
using ADdd  = CppAD::AD<CppAD::AD<double> >;
using ADddd = CppAD::AD<CppAD::AD<CppAD::AD<double> > >;

template void laplace<double>(
    COMMON_ARGS(double, parallel_accumulator), matrix<double> const &u);
template void laplace<ADd>(
    COMMON_ARGS(ADd, parallel_accumulator), matrix<ADd> const &u);
template void laplace<ADdd>(
    COMMON_ARGS(ADdd, parallel_accumulator), matrix<ADdd> const &u);
template void laplace<ADddd>(
    COMMON_ARGS(ADddd, parallel_accumulator), matrix<ADddd> const &u);

/// survTMB.cpp

#include <cmath>
#include <algorithm>
#include <future>
#include <limits>

#ifdef _OPENMP
#include "omp.h"
#endif

template<class Type>
Type objective_function<Type>::operator() ()
{
  SETUP_DATA;
  SETUP_DATA_CHECK;

  parallel_accumulator<Type> result(this);

  /* perform approximations using method descibed at
   *   https://github.com/kaskr/adcomp/issues/233#issuecomment-306032192
   *
   * each branch may contain further parameters or data objects */
  if(app_type == "Laplace"){
    PARAMETER_MATRIX(u);
    laplace(COMMON_CALL, u);
    return result;

  }

  error("approximation method '%s' is not implemented", app_type.c_str());
  return Type(0);
}
	/// tmb_includes.h
	#ifndef TMB_INCLUDES_H
	#define TMB_INCLUDES_H

	/* TMB */
	#include <TMB.hpp>

	namespace CppAD {
	inline double Value(double const x){
	return x;
	}
	}

	template<class Type>
	Type vec_dot(Type const , Type const , size_t const);

	template<class Type>
	Type vec_dot(vector<Type> const &x, vector<Type> const &y){
	#ifdef DO_CHECKS
	if(x.size() != y.size())
	throw std::invalid_argument("vec_dot<Type>: dimension do not match");
	#endif
	return vec_dot(x.data(), y.data(), x.size());
	}

	template<class Type>
	Type vec_dot
	(Eigen::Map<const Eigen::Matrix<Type,Eigen::Dynamic,1> > &x,
	vector<Type> const &y){
	#ifdef DO_CHECKS
	if(x.size() != y.size())
	throw std::invalid_argument("vec_dot<Type>: dimension do not match");
	#endif
	if(x.innerStride() == 1L)
	return vec_dot(x.data(), y.data(), x.size());

	Type out(0.);
	for(int i = 0; i < x.size(); ++i)
	out += x[i] * y[i];
	return out;
	}

	template<class Type>
	Type quad_form(Type const, Type const, Type const*, size_t const);

	template<class Type>
	Type quad_form
	(vector<Type> const &x, matrix<Type> const &A, vector<Type> const &y){
	#ifdef DO_CHECKS
	if(x.size() != A.rows() or y.size() != A.cols())
	throw std::invalid_argument("quad_form<Type>: dimension do not match");
	if(A.rows() != A.cols())
	throw std::invalid_argument("quad_form<Type>: invalid A");
	#endif
	return quad_form(x.data(), A.data(), y.data(), A.cols());
	}

	template<class Type>
	Type quad_form_sym(Type const, Type const, size_t const);

	template<class Type>
	Type quad_form_sym(vector<Type> const &x, matrix<Type> const &A){
	#ifdef DO_CHECKS
	if(x.size() != A.rows() or x.size() != A.cols())
	throw std::invalid_argument(
	"quad_form_sym<Type>: dimension do not match");
	#endif
	return quad_form_sym(x.data(), A.data(), A.cols());
	}

	template<class Type>
	Type mat_mult_trace(matrix<Type> const &X, matrix<Type> const &Y){
	#ifdef DO_CHECKS
	if(X.rows() != Y.cols() or X.cols() != Y.rows())
	throw std::invalid_argument(
	"mat_mult_trace<Type>: dimension do not match");
	#endif
	return vec_dot(X.data(), Y.data(), X.rows() * X.cols());
	}

	#endif // ifndef TMB_INCLUDES_H

	/// src/common.h
	/* Common arguments used in approximations.
	*
	* Args:
	* tobs: observed times.
	* event: zero/one variables for whether the event is observed.
	* X: fixed effect design matrix.
	* XD: derivative of X wrt time.
	* Z: random effect design matrix.
	* grp: integer vector with group identifier.
	* eps: small tolerance for positivity constraint on hazards.
	* kappa: one-sided L2 penalty for positivity constraint.
	* b: fixed effect coefficients.
	* theta: covariance matrix parameters. See get_vcov_from_trian.
	*/
	#ifndef COMMON_ARGS
	#define COMMON_ARGS(TYPE, ACCUMLATOR) \
	ACCUMLATOR<TYPE> &result, \
	vector<TYPE> const &tobs, vector<TYPE> const &event, \
	matrix<TYPE> const &X, matrix<TYPE> const &XD, \
	matrix<TYPE> const &Z, vector<int> const &grp, \
	TYPE const &eps, TYPE const &kappa, vector<TYPE> const &b, \
	vector<TYPE> const &theta, std::string const &link, \
	vector<int> const &grp_size
	#endif

	#ifndef COMMON_CALL
	#define COMMON_CALL \
	result, tobs, event, X, XD, Z, grp, eps, kappa, b, theta, \
	link, grp_size
	#endif

	#ifndef SETUP_DATA
	#define SETUP_DATA \
	/* common data objects and parameters */ \
	DATA_STRING(app_type); \
	DATA_VECTOR(tobs); \
	\
	DATA_VECTOR(event); \
	DATA_MATRIX(X); \
	DATA_MATRIX(XD); \
	DATA_MATRIX(Z); \
	DATA_IVECTOR(grp); \
	DATA_STRING(link); \
	DATA_IVECTOR(grp_size); \
	\
	/* They are marked as parameter such that the user can \
	* change them later */ \
	PARAMETER(eps); \
	PARAMETER(kappa); \
	\
	PARAMETER_VECTOR(b); \
	PARAMETER_VECTOR(theta);
	#endif

	#ifndef SETUP_DATA_CHECK
	#define SETUP_DATA_CHECK \
	/* checks */ \
	{ \
	unsigned const n = tobs.size(); \
	auto check_rows = [n](matrix<Type> const &x, \
	char const *msg){ \
	if(x.rows() != n) \
	error(msg); \
	}; \
	check_rows(X , "invalid 'X'"); \
	check_rows(XD, "invalid 'XD'"); \
	check_rows(Z , "invalid 'Z'"); \
	if(n != event.size()) \
	error("invalid 'event'"); \
	if(n != grp.size()) \
	error("invalid 'grp'"); \
	\
	if(b.size() != X.cols()) \
	error("invalid 'b'"); \
	if(XD.cols() != X.cols()) \
	error("invalid 'XD' (# columns)"); \
	\
	if(Z.cols() != get_rng_dim(theta)) \
	error("invalid 'Z' (# columns: %d %d)", Z.cols(), \
	get_rng_dim(theta)); \
	\
	std::size_t grp_size_sum = 0; \
	for(int i = 0; i < grp_size.size(); ++i) \
	grp_size_sum += grp_size[i]; \
	if(n != grp_size_sum) \
	error("invalid 'grp_size'"); \
	\
	int g_max = 0; \
	for(int i = 0; i < grp.size(); ++i) \
	g_max = std::max(grp[i], g_max); \
	if(g_max + 1L != grp_size.size()) \
	error( \
	"invalid 'grp_size' (does not match with number of groups)"); \
	\
	for(int i = 1L; i < grp.size(); ++i){ \
	if(grp[i - 1L] > grp[i]) \
	error("'grp' is not sorted"); \
	if(grp[i - 1L] - grp[i] < -1L) \
	error("too big gap in 'grp'"); \
	} \
	}
	#endif

	/// fast-commutation.h

	#ifndef FAST_COMMUTATION_H
	#define FAST_COMMUTATION_H
	#include <memory>

	std::unique_ptr<size_t[]> get_commutation_unequal_vec
	(unsigned const, unsigned const, bool const);

	size_t const * get_commutation_unequal_vec_cached(unsigned const);

	#endif // ifndef FAST_COMMUTATION_H

	/// fast-commutation.cpp
	std::unique_ptr<size_t[]> get_commutation_unequal_vec
	(unsigned const n, unsigned const m, bool const transpose){
	unsigned const nm = n * m,
	nnm_p1 = n * nm + 1L,
	nm_pm = nm + m;
	std::unique_ptr<size_t[]> out(new size_t[nm]);
	size_t * const o_begin = out.get();
	size_t idx = 0L;
	for(unsigned i = 0; i < n; ++i, idx += nm_pm){
	size_t idx1 = idx;
	for(unsigned j = 0; j < m; ++j, idx1 += nnm_p1)
	if(transpose)
	*(o_begin + idx1 / nm) = (idx1 % nm);
	else
	*(o_begin + idx1 % nm) = (idx1 / nm);
	}

	return out;
	}

	size_t const * get_commutation_unequal_vec_cached(unsigned const n){
	constexpr std::size_t n_cache = 1000L;
	if(n > n_cache or n == 0L)
	throw std::invalid_argument(
	"get_commutation_unequal_vec_cached: invalid n (too large or zero)");

	static std::array<std::unique_ptr<size_t[]>, n_cache> cached_values;

	unsigned const idx = n - 1L;
	bool has_value = cached_values[idx].get();

	if(has_value)
	return cached_values[idx].get();

	#ifdef _OPENMP
	#pragma omp critical (coomuCached)
	{
	#endif
	has_value = cached_values[idx].get();
	if(!has_value){
	std::unique_ptr<size_t[]> new_val =
	get_commutation_unequal_vec(n, n, false);
	cached_values[idx].swap(new_val);
	}
	#ifdef _OPENMP
	}
	#endif
	return cached_values[idx].get();
	}

	/// utils.h
	#ifndef SURVTMB_UTILS_H
	#define SURVTMB_UTILS_H

	#ifdef _OPENMP
	#include "omp.h"
	#endif
	#include <memory>

	inline unsigned get_rng_dim(unsigned const n_vcov_params){
	double const n(n_vcov_params);
	return std::lround(std::sqrt(.25 + 2 * n) - .5);
	}

	template<class Type>
	unsigned get_rng_dim(vector<Type> x){
	return get_rng_dim(x.size());
	}

	/* Returns the covariance matrix from a log Cholesky decomposition
	*/
	template<class Type>
	class get_vcov_from_trian_atomic : public CppAD::atomic_base<Type> {
	public:
	get_vcov_from_trian_atomic(char const *name):
	CppAD::atomic_base<Type>(name) {
	this->option(CppAD::atomic_base<Type>::bool_sparsity_enum);
	}

	template<class T>
	static void comp(T const x, T y, size_t const n_arg){
	// TODO: can be done smarter...
	typename
	Eigen::Map<Eigen::Matrix<T,Eigen::Dynamic,Eigen::Dynamic> >
	out(y, n_arg, n_arg);
	out.setZero();

	for(size_t c = 0; c < n_arg; ++c){
	for(size_t r = c; r < n_arg; ++r)
	out(r, c) = *x++;
	}

	// out * out.t()
	for(size_t c = n_arg; c-- > 0;){
	for(size_t r = c + 1L; r-- > 0;){
	Type new_term(0);
	for(size_t k = 0; k <= r; ++k)
	new_term += out(c, k) * out(r, k);
	out(c, r) = new_term;
	out(r, c) = new_term;
	}
	}
	}

	virtual bool forward(std::size_t p, std::size_t q,
	const CppAD::vector<bool> &vx,
	CppAD::vector<bool> &vy,
	const CppAD::vector<Type> &tx,
	CppAD::vector<Type> &ty){
	if(q > 0)
	return false;

	size_t const n = std::lround(
	.5 * (std::sqrt(8. * static_cast<double>(tx.size()) + 1.) - 1.));
	comp(&tx[0], &ty[0], n);

	/* set variable flags */
	if (vx.size() > 0) {
	bool anyvx = false;
	for (std::size_t i = 0; i < vx.size() and !anyvx; i++)
	anyvx \|= vx[i];
	for (std::size_t i = 0; i < vy.size(); i++)
	vy[i] = anyvx;
	}

	return true;
	}

	virtual bool reverse(std::size_t q, const CppAD::vector<Type> &tx,
	const CppAD::vector<Type> &ty,
	CppAD::vector<Type> &px,
	const CppAD::vector<Type> &py){
	if(q > 0)
	return false;

	size_t const n = std::lround(
	.5 * (std::sqrt(8. * static_cast<double>(tx.size()) + 1.) - 1.)),
	nn = n * n;

	size_t const * const com_vec = get_commutation_unequal_vec_cached(n);
	size_t bri(0L),
	brl(0L);

	for(size_t i = 0; i < px.size(); ++i)
	px[i] = Type(0.);

	for(size_t i = 0L; i < nn;
	bri = bri + 1L >= n ? 0L : bri + 1L,
	brl = bri == 0L ? brl + 1L : brl,
	++i){
	size_t const idx_l = *(com_vec + i);
	Type const lfs = py[idx_l] + py[i];

	size_t idx_k(bri);
	for(size_t k = 0; k <= bri; idx_k += n - k, ++k){
	if(brl >= k){
	Type const rfs = tx[
	brl - k /* row index / + idx_k - bri / col index */];
	px[idx_k - k] += lfs * rfs;
	}
	}
	}

	return true;
	}

	virtual bool rev_sparse_jac(size_t q, const CppAD::vector<bool>& rt,
	CppAD::vector<bool>& st) {
	bool anyrt = false;
	for (std::size_t i = 0; i < rt.size() and !anyrt; i++)
	anyrt \|= rt[i];
	for (std::size_t i = 0; i < st.size(); i++)
	st[i] = anyrt;
	return true;
	}

	static get_vcov_from_trian_atomic<Type>& get_cached();
	};

	template<class Type>
	matrix<AD<Type> >
	get_vcov_from_trian(AD<Type> const *vals, unsigned const dim){
	size_t const n_ele = (dim * (dim + 1L)) / 2L;
	matrix<AD<Type> > out(dim, dim);
	CppAD::vector<AD<Type> > x_vals(n_ele);

	for(size_t i = 0; i < n_ele; ++i)
	x_vals[i] = *(vals + i);
	size_t cc = 0L;
	for(size_t i = dim + 1L; i-- > 1; cc += i)
	x_vals[cc] = exp(x_vals[cc]);

	static get_vcov_from_trian_atomic<Type> &functor =
	get_vcov_from_trian_atomic<Type>::get_cached();

	typename Eigen::Map<Eigen::Matrix<AD<Type> ,Eigen::Dynamic,1> >
	tx(x_vals.data(), n_ele),
	ty(out.data(), dim * dim);

	functor(tx, ty);

	return out;
	}

	inline matrix<double>
	get_vcov_from_trian(double const *vals, unsigned const dim){
	matrix<double> out(dim, dim);
	size_t const n_ele = (dim * (dim + 1L)) / 2L;
	vector<double> x_vals(n_ele);
	for(size_t i = 0; i < n_ele; ++i)
	x_vals[i] = *(vals + i);
	size_t cc = 0L;
	for(size_t i = dim + 1L; i-- > 1; cc += i)
	x_vals[cc] = exp(x_vals[cc]);

	double *p_out = out.data();
	get_vcov_from_trian_atomic<double>::comp(x_vals.data(), p_out, dim);

	return out;
	}

	template<class Type>
	matrix<Type>
	get_vcov_from_trian(vector<Type> const &theta){
	unsigned const dim = get_rng_dim(theta);
	return get_vcov_from_trian(&theta[0], dim);
	}

	/* unlike objective_function::parallel_region this does not increase the
	* counter
	*/
	template<class Type>
	bool is_my_region(objective_function<Type> const &o){
	if(o.current_parallel_region < 0 \|\| o.selected_parallel_region < 0)
	/* Serial mode */
	return true;
	if(o.selected_parallel_region == o.current_parallel_region &&
	(!o.parallel_ignore_statements))
	return true;
	return false;
	}

	#endif // ifndef SURVTMB_UTILS_H

	/// utils.cpp
	#include <array>
	#include <memory.h>
	#include <string.h>

	using ADd = CppAD::AD<double>;
	using ADdd = CppAD::AD<CppAD::AD<double> >;
	using ADddd = CppAD::AD<CppAD::AD<CppAD::AD<double> > >;

	static get_vcov_from_trian_atomic<double> get_vcov_from_trian_d
	("get_vcov_from_trian_atomic<double>");
	static get_vcov_from_trian_atomic<ADd > get_vcov_from_trian_ADd
	("get_vcov_from_trian_atomic<ADd>");
	static get_vcov_from_trian_atomic<ADdd > get_vcov_from_trian_ADdd
	("get_vcov_from_trian_atomic<ADdd>");
	static get_vcov_from_trian_atomic<ADddd > get_vcov_from_trian_ADddd
	("get_vcov_from_trian_atomic<ADddd>");

	template<>
	get_vcov_from_trian_atomic<double>&
	get_vcov_from_trian_atomic<double>::get_cached(){
	return get_vcov_from_trian_d;
	}


	template<>
	get_vcov_from_trian_atomic<ADd>&
	get_vcov_from_trian_atomic<ADd>::get_cached(){
	return get_vcov_from_trian_ADd;
	}

	template<>
	get_vcov_from_trian_atomic<ADdd>&
	get_vcov_from_trian_atomic<ADdd>::get_cached(){
	return get_vcov_from_trian_ADdd;
	}

	template<>
	get_vcov_from_trian_atomic<ADddd>&
	get_vcov_from_trian_atomic<ADddd>::get_cached(){
	return get_vcov_from_trian_ADddd;
	}

	/// src/laplace.h
	/* Computes the log-likelihood for given random effects.
	*
	* Args:
	* COMMON_ARGS: see the other header file.
	* u: [random effect dim] x [# groups] matrix with random effects.
	*/
	template<class Type>
	void laplace(COMMON_ARGS(Type, parallel_accumulator), matrix<Type> const &u);

	/// dmvnorm_log.h

	#ifndef DMVNORM_LOG_H
	#define DMVNORM_LOG_H

	template<class Type>
	Type mult_var_dens(vector<Type> const &theta, matrix<Type> const &rngs){
	if(theta.size() < 2L){ /* univariate case */
	Type const sd = exp(theta[0]),
	half(.5);
	Type out(0);
	Type const * const end = rngs.data() + rngs.size();
	for(Type const *x = rngs.data(); x != end; ++x){
	Type const scaled = *x / sd;
	out += - half * scaled * scaled;
	}

	out += - Type(rngs.size()) * log(Type(sqrt(2 * M_PI)) * sd);

	return out;
	}

	matrix<Type> const vcov = get_vcov_from_trian(theta);
	density::MVNORM_t<Type> norm_dist(vcov);
	Type out(0);
	for(unsigned g = 0; g < rngs.cols(); ++g)
	out -= norm_dist(rngs.col(g));

	return out;
	}

	#endif // ifndef DMVNORM_LOG_H

	/// pnorm-log.h
	#ifndef PNORM_LOG_H
	#define PNORM_LOG_H

	namespace atomic {
	/* Computes log CDF of standard normal distribution */
	TMB_ATOMIC_VECTOR_FUNCTION(
	// ATOMIC_NAME
	pnorm_log1
	,
	// OUTPUT_DIM
	1
	,
	// ATOMIC_DOUBLE
	ty[0] = Rmath::Rf_pnorm5(tx[0], 0, 1, 1, 1);
	,
	// ATOMIC_REVERSE
	px[0] = py[0];
	if(CppAD::Value(tx[0]) > -10){
	Type const cdf = exp(ty[0]);
	px[0] *= dnorm1(tx[0]) / cdf;
	} else {
	Type const log_pdf = dnorm(tx[0], Type(0.), Type(1.), 1L);
	px[0] *= exp(log_pdf - ty[0]);
	})
	} // namespace atomic

	/* Computes the log CDF of normal distribution.
	*
	* Args:
	* Similar to `stats::pnorm`.
	*/
	template<class Type>
	Type pnorm_log(Type q, Type mean = 0., Type sd = 1.){
	CppAD::vector<Type> tx(1);
	tx[0] = (q - mean) / sd;
	return atomic::pnorm_log1(tx)[0];
	}
	VECTORIZE3_ttt(pnorm_log)
	VECTORIZE1_t (pnorm_log)

	#endif // ifndef PNORM_LOG_H

	/// laplace.cpp

	template<class Type>
	Type laplace_PH_terms
	(Type const &eta, Type const &etaD, Type const &event,
	Type const &eps, Type const &eps_log, Type const &kappa){
	Type const H = exp(eta),
	h = etaD * H,
	if_low = event * eps_log - H - h * h * kappa,
	if_ok = event * log(h) - H;
	return CppAD::CondExpGe(h, eps, if_ok, if_low);
	}

	template<class Type>
	Type laplace_PO_terms
	(Type const &eta, Type const &etaD, Type const &event,
	Type const &eps, Type const &eps_log, Type const &kappa){
	Type const too_large(30.),
	one(1.);

	Type const H = CppAD::CondExpGe(
	eta, too_large, eta, log(one + exp(eta))),
	h = etaD * exp(eta - H),
	if_low = event * eps_log - H - h * h * kappa,
	if_ok = event * log(h) - H;
	return CppAD::CondExpGe(h, eps, if_ok, if_low);
	}

	template<class Type>
	Type laplace_probit_terms
	(Type const &eta, Type const &etaD, Type const &event,
	Type const &eps, Type const &eps_log, Type const &kappa){
	Type const tiny(std::numeric_limits<double>::epsilon()),
	zero(0.),
	one(1.);

	Type const H = -pnorm_log(-eta),
	h = etaD * dnorm(-eta, zero, one) /
	(pnorm(-eta) + tiny),
	if_low = event * eps_log - H - h * h * kappa,
	if_ok = event * log(h) - H;
	return CppAD::CondExpGe(h, eps, if_ok, if_low);
	}

	template<class Type>
	void laplace(COMMON_ARGS(Type, parallel_accumulator),
	matrix<Type> const &u){
	/* checks */
	unsigned const rng_dim = get_rng_dim(theta);
	{
	int const *max_grp = std::max_element(grp.data(), grp.data() + grp.size());
	if(!max_grp or *max_grp != u.cols() - 1L)
	error("Invalid 'grp' or 'u'");
	if(rng_dim != u.rows())
	error("Invalid 'u'");
	}

	/* log-likelihood terms from conditional distribution of the observed
	* outcomes */
	Type const eps_log = log(eps);
	auto const b_vec = b.matrix().col(0);

	/* compute terms from conditional density */
	typedef Type (*loop_func)(
	Type const&, Type const&, Type const&,
	Type const&, Type const&, Type const&);

	auto cond_dens_loop = [&](loop_func func){
	unsigned i(0L);
	for(unsigned g = 0; g < grp_size.size(); ++g){
	unsigned const n_members = grp_size[g];
	/* do we need to anything on this thread? */
	if(!is_my_region(*result.obj)){
	i += n_members;
	result.obj->parallel_region();
	continue;
	}

	/* compute linear predictor etc. */
	auto const u_g = u.col(g);
	unsigned const end_i(i + n_members);
	vector<Type> const eta = ([&](){
	vector<Type> out(n_members);
	unsigned k(0L);
	for(unsigned j = i; j < end_i; ++j, ++k){
	out[k] = (X.row(j) * b_vec)[0];
	out[k] += Z.row(j) * u_g;
	}

	return out;
	})();
	vector<Type> const etaD = ([&](){
	vector<Type> out(n_members);
	unsigned k(0L);
	for(unsigned j = i; j < end_i; ++j, ++k)
	out[k] = (XD.row(j) * b_vec)[0];

	return out;
	})();

	Type next_term(0.);
	for(unsigned k = 0; k < n_members; ++k, ++i)
	next_term += func(
	eta[k], etaD[k], event[i], eps, eps_log, kappa);

	result -= next_term;
	}
	};

	if(link == "PH")
	cond_dens_loop(laplace_PH_terms<Type>);
	else if (link == "PO")
	cond_dens_loop(laplace_PO_terms<Type>);
	else if(link == "probit")
	cond_dens_loop(laplace_probit_terms<Type>);
	else
	error("'%s' not implemented", link.c_str());

	/* log-likelihood terms from random effect density */
	result -= mult_var_dens(theta, u);
	}

	using ADd = CppAD::AD<double>;
	using ADdd = CppAD::AD<CppAD::AD<double> >;
	using ADddd = CppAD::AD<CppAD::AD<CppAD::AD<double> > >;

	template void laplace<double>(
	COMMON_ARGS(double, parallel_accumulator), matrix<double> const &u);
	template void laplace<ADd>(
	COMMON_ARGS(ADd, parallel_accumulator), matrix<ADd> const &u);
	template void laplace<ADdd>(
	COMMON_ARGS(ADdd, parallel_accumulator), matrix<ADdd> const &u);
	template void laplace<ADddd>(
	COMMON_ARGS(ADddd, parallel_accumulator), matrix<ADddd> const &u);

	/// survTMB.cpp

	#include <cmath>
	#include <algorithm>
	#include <future>
	#include <limits>

	#ifdef _OPENMP
	#include "omp.h"
	#endif

	template<class Type>
	Type objective_function<Type>::operator() ()
	{
	SETUP_DATA;
	SETUP_DATA_CHECK;

	parallel_accumulator<Type> result(this);

	/* perform approximations using method descibed at
	* https://github.com/kaskr/adcomp/issues/233#issuecomment-306032192
	*
	* each branch may contain further parameters or data objects */
	if(app_type == "Laplace"){
	PARAMETER_MATRIX(u);
	laplace(COMMON_CALL, u);
	return result;

	}

	error("approximation method '%s' is not implemented", app_type.c_str());
	return Type(0);
	}