Skip to content

Instantly share code, notes, and snippets.

@jwpeterson
Created December 19, 2020 05:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jwpeterson/3c2b633e86ef83fe5e2e7a8ab5c9ec0f to your computer and use it in GitHub Desktop.
Save jwpeterson/3c2b633e86ef83fe5e2e7a8ab5c9ec0f to your computer and use it in GitHub Desktop.
Comparison of matrix-matrix-matrix product for RealTensorValue, DenseMatrix, and Eigen::Matrix3d
// Basic include file needed for the mesh functionality.
#include "libmesh/libmesh.h"
#include "libmesh/mesh.h"
#include "libmesh/dof_map.h"
#include "libmesh/elem.h"
#include "libmesh/fe_interface.h"
#include "libmesh/fe_map.h"
#include "libmesh/fe_base.h"
#include "libmesh/quadrature_gauss.h"
#include "libmesh/enum_to_string.h"
#include "libmesh/tensor_value.h"
#include "libmesh/dense_matrix.h"
// C++ include files that we need
#include <iostream>
#include <array>
#include <memory>
// Bring in everything from the libMesh namespace
using namespace libMesh;
// The main program.
int main (int argc, char ** argv)
{
// Initialize libMesh.
LibMeshInit init (argc, argv);
auto r = [](){ return static_cast<Real>(std::rand()) / static_cast<Real>(RAND_MAX); };
// Big vectors of random matrices
const unsigned int N=10000000;
std::vector<RealTensorValue> As;
std::vector<RealTensorValue> Bs;
std::vector<RealTensorValue> Cs;
As.reserve(N);
Bs.reserve(N);
Cs.reserve(N);
for (unsigned int i=0; i<N; ++i)
{
As.emplace_back(r(), r(), r(), r(), r(), r(), r(), r(), r());
Bs.emplace_back(r(), r(), r(), r(), r(), r(), r(), r(), r());
Cs.emplace_back(r(), r(), r(), r(), r(), r(), r(), r(), r());
}
// Same data structures but for DenseMatrix
std::vector<DenseMatrix<Real>> Dense_As;
std::vector<DenseMatrix<Real>> Dense_Bs;
std::vector<DenseMatrix<Real>> Dense_Cs;
Dense_As.reserve(N);
Dense_Bs.reserve(N);
Dense_Cs.reserve(N);
for (unsigned int i=0; i<N; ++i)
{
DenseMatrix<Real> A(3,3);
DenseMatrix<Real> B(3,3);
DenseMatrix<Real> C(3,3);
A.get_values() = {r(), r(), r(), r(), r(), r(), r(), r(), r()};
B.get_values() = {r(), r(), r(), r(), r(), r(), r(), r(), r()};
C.get_values() = {r(), r(), r(), r(), r(), r(), r(), r(), r()};
Dense_As.emplace_back(A);
Dense_Bs.emplace_back(B);
Dense_Cs.emplace_back(C);
}
// Same data structures but for Eigen::Matrix3d
std::vector<Eigen::Matrix3d> Eigen_As(N);
std::vector<Eigen::Matrix3d> Eigen_Bs(N);
std::vector<Eigen::Matrix3d> Eigen_Cs(N);
for (unsigned int i=0; i<N; ++i)
{
Eigen_As[i] << r(), r(), r(), r(), r(), r(), r(), r(), r();
Eigen_Bs[i] << r(), r(), r(), r(), r(), r(), r(), r(), r();
Eigen_Cs[i] << r(), r(), r(), r(), r(), r(), r(), r(), r();
}
PerfLog perf_log ("TypeTensor, DenseMatrix, Eigen::Matrix3d performance");
const unsigned int n_loops = 10;
for (unsigned int loop=0; loop<n_loops; ++loop)
{
// Output something for each loop so it doesn't seem like the
// code is just hanging.
libMesh::out << "Starting loop " << loop << std::endl;
std::vector<RealTensorValue> Ds1(N);
// std::vector<RealTensorValue> Ds2(N);
// Note: I have tried timing the operator approach first and
// vice-versa, but this did not seem to make any difference in
// the results.
// Time how long the "operator" approach to computing A * B * C takes
perf_log.push("TypeTensor A*B*C");
for (unsigned int i=0; i<N; ++i)
Ds1[i] = As[i] * Bs[i] * Cs[i];
perf_log.pop("TypeTensor A*B*C");
// Time how long the mat_mult3 approach takes
// Note: the mat_mult3() approach has been abandoned.
// perf_log.push("TypeTensor::mat_mult3()");
// for (unsigned int i=0; i<N; ++i)
// Ds2[i] = RealTensorValue::mat_mult3(As[i], Bs[i], Cs[i]);
// perf_log.pop("TypeTensor::mat_mult3()");
// Keep the compiler honest by accessing entries of Ds1 and Ds2
// Real norm_sum = 0.;
// for (unsigned int i=0; i<N; ++i)
// norm_sum += (Ds1[i] - Ds2[i]).norm();
// libMesh::out << "norm_sum = " << norm_sum << std::endl;
// Time how long the DenseMatrix approach takes. Allocate space
// for all the 3x3 DenseMatrices first (untimed) using the std::vector
// constructor that makes N copies of its arg.
std::vector<DenseMatrix<Real>> Ds3(N, DenseMatrix<Real>(3,3));
perf_log.push("DenseMatrix A*B*C");
for (unsigned int i=0; i<N; ++i)
{
// We compute the triple product as
// D = A; D *= B; D *= C;
// although there are other things we could try, such as
// left_multiply(), etc. there are no operators defined on
// DenseMatrix.
Ds3[i] = Dense_As[i];
Ds3[i].right_multiply(Dense_Bs[i]);
Ds3[i].right_multiply(Dense_Cs[i]);
}
perf_log.pop("DenseMatrix A*B*C");
// Time how long the Eigen::Matrix3d approach takes
std::vector<Eigen::Matrix3d> Ds4(N);
perf_log.push("Eigen::Matrix3d A*B*C");
for (unsigned int i=0; i<N; ++i)
Ds4[i] = Eigen_As[i] * Eigen_Bs[i] * Eigen_Cs[i];
perf_log.pop("Eigen::Matrix3d A*B*C");
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment