riga/testDeepJetPerformance.cc

## testDeepJetPerformance.cc
/*
 * Performance test of the DeepJet model with both TensorFlow and ONNXRuntime.
 * Place this file into a cmssw test directory and add the following to the BuildFile.xml:
 *
 * <bin name="testTFDeepJetPerformance" file="testRunner.cpp,testDeepJetPerformance.cc">
 *     <use name="cppunit" />
 *     <use name="FWCore/Utilities" />
 *     <use name="PhysicsTools/TensorFlow" />
 *     <use name="PhysicsTools/ONNXRuntime" />
 * </bin>
 *
 * Author: Marcel Rieger
 */

#include <stdexcept>
#include <sys/time.h>
#include <iomanip>
#include <cppunit/extensions/HelperMacros.h>

#include "PhysicsTools/TensorFlow/interface/TensorFlow.h"
#include "PhysicsTools/ONNXRuntime/interface/ONNXRuntime.h"

float randFloat() { return float(rand() % 100000) / 100000; }

void meanAndStd(const std::vector<double>& values, double& mean, double& std) {
  mean = 0.;
  for (double& v : values) {
    mean += v;
  }
  mean /= double(values.size());

  std = 0.;
  for (double& v : values) {
    std += pow(v - mean, 2);
  }
  std /= double(values.size() - 1);
  std = pow(std, 0.5);
}

class testDeepJetPerformance : public CppUnit::TestFixture {
  CPPUNIT_TEST_SUITE(testDeepJetPerformance);
  CPPUNIT_TEST(checkAll);
  CPPUNIT_TEST_SUITE_END();

public:
  void checkAll();
};

CPPUNIT_TEST_SUITE_REGISTRATION(testDeepJetPerformance);

void testDeepJetPerformance::checkAll() {
  // test configuration
  std::vector<int> batchSizes = {1, 2, 4, 8, 16, 32, 64, 128, 256};
  int runs = 500;
  std::string threadPoolNameTF = "tensorflow";
  int nThreadsTF = 1;  // not used for "no_threads"
  std::string modelTF =
      "/cvmfs/cms.cern.ch/slc7_amd64_gcc820/cms/data-RecoBTag-Combined/V01-02-01/RecoBTag/Combined/data/"
      "DeepFlavourV03_10X_training/constant_graph.pb";
  std::string modelOX =
      "/cvmfs/cms.cern.ch/slc7_amd64_gcc820/cms/data-RecoBTag-Combined/V01-02-01/RecoBTag/Combined/data/"
      "DeepFlavourV03_10X_training/model.onnx";
  struct timeval tv;

  // names of models input and outputs
  std::vector<std::string> inputNames = {"input_1", "input_2", "input_3", "input_4", "input_5"};
  std::vector<std::string> outputNames = {"ID_pred/Softmax:0"};

  // create tensorflow objects
  tensorflow::TBBThreadPool::instance(nThreadsTF);
  tensorflow::setLogging();
  CPPUNIT_ASSERT(tensorflow::TBBThreadPool::instance().NumThreads() == nThreadsTF);
  tensorflow::GraphDef* graphDef = tensorflow::loadGraphDef(modelTF);
  CPPUNIT_ASSERT(graphDef != nullptr);
  tensorflow::SessionOptions opts;
  tensorflow::setThreading(opts, 4);
  tensorflow::Session* session = tensorflow::createSession(graphDef, opts);
  CPPUNIT_ASSERT(session != nullptr);

  // create onnx objects
  cms::Ort::ONNXRuntime ox(modelOX);

  // loop over batch sizes
  for (int batchSize : batchSizes) {
    std::cout << "start test for batch size " << batchSize << std::endl;

    // define tensorflow inputs and outputs
    tensorflow::Tensor input1TF(tensorflow::DT_FLOAT, {batchSize, 15});
    tensorflow::Tensor input2TF(tensorflow::DT_FLOAT, {batchSize, 25, 16});
    tensorflow::Tensor input3TF(tensorflow::DT_FLOAT, {batchSize, 25, 6});
    tensorflow::Tensor input4TF(tensorflow::DT_FLOAT, {batchSize, 4, 12});
    tensorflow::Tensor input5TF(tensorflow::DT_FLOAT, {batchSize, 1});
    tensorflow::Tensor input6TF(tensorflow::DT_BOOL, {});
    std::vector<tensorflow::Tensor> outputsTF;

    // store tensors in a named list
    tensorflow::NamedTensorList inputsTF = {
        {inputNames[0] + ":0", input1TF},
        {inputNames[1] + ":0", input2TF},
        {inputNames[2] + ":0", input3TF},
        {inputNames[3] + ":0", input4TF},
        {inputNames[4] + ":0", input5TF},
        {"cpf_input_batchnorm/keras_learning_phase:0", input6TF},  // only present in TF model
    };

    // define onnx inputs
    std::vector<std::vector<float>> inputsOX = {
        std::vector<float>(input1TF.shape().num_elements()),
        std::vector<float>(input2TF.shape().num_elements()),
        std::vector<float>(input3TF.shape().num_elements()),
        std::vector<float>(input4TF.shape().num_elements()),
        std::vector<float>(input5TF.shape().num_elements())
    };

    // store runtimes
    std::vector<double> runtimesTF;
    std::vector<double> runtimesOX;

    // start runs
    for (int r = 0; r < runs + 1; r++) {
      // fill random floats
      float* d = input1TF.flat<float>().data();
      for (int i = 0; i < input1TF.shape().num_elements(); i++, d++) {
        *d = randFloat();
        inputsOX[0][i] = *d;
      }
      d = input2TF.flat<float>().data();
      for (int i = 0; i < input2TF.shape().num_elements(); i++, d++) {
        *d = randFloat();
        inputsOX[1][i] = *d;
      }
      d = input3TF.flat<float>().data();
      for (int i = 0; i < input3TF.shape().num_elements(); i++, d++) {
        *d = randFloat();
        inputsOX[2][i] = *d;
      }
      d = input4TF.flat<float>().data();
      for (int i = 0; i < input4TF.shape().num_elements(); i++, d++) {
        *d = randFloat();
        inputsOX[3][i] = *d;
      }
      d = input5TF.flat<float>().data();
      for (int i = 0; i < input5TF.shape().num_elements(); i++, d++) {
        *d = randFloat();
        inputsOX[4][i] = *d;
      }
      input6TF.scalar<bool>()() = false;  // only present in TF model

      // run tensorflow
      gettimeofday(&tv, NULL);
      double t0 = tv.tv_sec * 1000. + tv.tv_usec / 1000.;
      tensorflow::run(session, inputsTF, outputNames, &outputsTF, nullptr);
      gettimeofday(&tv, NULL);
      double runtimeTF = (tv.tv_sec * 1000. + tv.tv_usec / 1000.) - t0;

      // run onnx
      gettimeofday(&tv, NULL);
      t0 = tv.tv_sec * 1000. + tv.tv_usec / 1000.;
      ox.run(inputNames, inputsOX, outputNames, batchSize);
      gettimeofday(&tv, NULL);
      double runtimeOX = (tv.tv_sec * 1000. + tv.tv_usec / 1000.) - t0;

      // store runtimes for all but the first run
      if (r != 0) {
        runtimesTF.push_back(runtimeTF);
        runtimesOX.push_back(runtimeOX);
      }
    }

    // compute runtime means and stds
    double meanTF = 0.;
    double stdTF = 0.;
    meanAndStd(runtimesTF, meanTF, stdTF);

    double meanOX = 0.;
    double stdOX = 0.;
    meanAndStd(runtimesOX, meanOX, stdOX);

    // log
    std::setprecision(4);
    std::cout << "runtime averaged over " << runs << " runs with batch size " << batchSize << ":" << std::endl;
    std::cout << "    TF: " << meanTF << " +- " << stdTF << " ms" << std::endl;
    std::cout << "    OX: " << meanOX << " +- " << stdOX << " ms" << std::endl;
    std::cout << std::endl;
  }

  // cleanup
  CPPUNIT_ASSERT(tensorflow::closeSession(session));
  delete graphDef;
}
	/*
	* Performance test of the DeepJet model with both TensorFlow and ONNXRuntime.
	* Place this file into a cmssw test directory and add the following to the BuildFile.xml:
	*
	* <bin name="testTFDeepJetPerformance" file="testRunner.cpp,testDeepJetPerformance.cc">
	* <use name="cppunit" />
	* <use name="FWCore/Utilities" />
	* <use name="PhysicsTools/TensorFlow" />
	* <use name="PhysicsTools/ONNXRuntime" />
	* </bin>
	*
	* Author: Marcel Rieger
	*/

	#include <stdexcept>
	#include <sys/time.h>
	#include <iomanip>
	#include <cppunit/extensions/HelperMacros.h>

	#include "PhysicsTools/TensorFlow/interface/TensorFlow.h"
	#include "PhysicsTools/ONNXRuntime/interface/ONNXRuntime.h"

	float randFloat() { return float(rand() % 100000) / 100000; }

	void meanAndStd(const std::vector<double>& values, double& mean, double& std) {
	mean = 0.;
	for (double& v : values) {
	mean += v;
	}
	mean /= double(values.size());

	std = 0.;
	for (double& v : values) {
	std += pow(v - mean, 2);
	}
	std /= double(values.size() - 1);
	std = pow(std, 0.5);
	}

	class testDeepJetPerformance : public CppUnit::TestFixture {
	CPPUNIT_TEST_SUITE(testDeepJetPerformance);
	CPPUNIT_TEST(checkAll);
	CPPUNIT_TEST_SUITE_END();

	public:
	void checkAll();
	};

	CPPUNIT_TEST_SUITE_REGISTRATION(testDeepJetPerformance);

	void testDeepJetPerformance::checkAll() {
	// test configuration
	std::vector<int> batchSizes = {1, 2, 4, 8, 16, 32, 64, 128, 256};
	int runs = 500;
	std::string threadPoolNameTF = "tensorflow";
	int nThreadsTF = 1; // not used for "no_threads"
	std::string modelTF =
	"/cvmfs/cms.cern.ch/slc7_amd64_gcc820/cms/data-RecoBTag-Combined/V01-02-01/RecoBTag/Combined/data/"
	"DeepFlavourV03_10X_training/constant_graph.pb";
	std::string modelOX =
	"/cvmfs/cms.cern.ch/slc7_amd64_gcc820/cms/data-RecoBTag-Combined/V01-02-01/RecoBTag/Combined/data/"
	"DeepFlavourV03_10X_training/model.onnx";
	struct timeval tv;

	// names of models input and outputs
	std::vector<std::string> inputNames = {"input_1", "input_2", "input_3", "input_4", "input_5"};
	std::vector<std::string> outputNames = {"ID_pred/Softmax:0"};

	// create tensorflow objects
	tensorflow::TBBThreadPool::instance(nThreadsTF);
	tensorflow::setLogging();
	CPPUNIT_ASSERT(tensorflow::TBBThreadPool::instance().NumThreads() == nThreadsTF);
	tensorflow::GraphDef* graphDef = tensorflow::loadGraphDef(modelTF);
	CPPUNIT_ASSERT(graphDef != nullptr);
	tensorflow::SessionOptions opts;
	tensorflow::setThreading(opts, 4);
	tensorflow::Session* session = tensorflow::createSession(graphDef, opts);
	CPPUNIT_ASSERT(session != nullptr);

	// create onnx objects
	cms::Ort::ONNXRuntime ox(modelOX);

	// loop over batch sizes
	for (int batchSize : batchSizes) {
	std::cout << "start test for batch size " << batchSize << std::endl;

	// define tensorflow inputs and outputs
	tensorflow::Tensor input1TF(tensorflow::DT_FLOAT, {batchSize, 15});
	tensorflow::Tensor input2TF(tensorflow::DT_FLOAT, {batchSize, 25, 16});
	tensorflow::Tensor input3TF(tensorflow::DT_FLOAT, {batchSize, 25, 6});
	tensorflow::Tensor input4TF(tensorflow::DT_FLOAT, {batchSize, 4, 12});
	tensorflow::Tensor input5TF(tensorflow::DT_FLOAT, {batchSize, 1});
	tensorflow::Tensor input6TF(tensorflow::DT_BOOL, {});
	std::vector<tensorflow::Tensor> outputsTF;

	// store tensors in a named list
	tensorflow::NamedTensorList inputsTF = {
	{inputNames[0] + ":0", input1TF},
	{inputNames[1] + ":0", input2TF},
	{inputNames[2] + ":0", input3TF},
	{inputNames[3] + ":0", input4TF},
	{inputNames[4] + ":0", input5TF},
	{"cpf_input_batchnorm/keras_learning_phase:0", input6TF}, // only present in TF model
	};

	// define onnx inputs
	std::vector<std::vector<float>> inputsOX = {
	std::vector<float>(input1TF.shape().num_elements()),
	std::vector<float>(input2TF.shape().num_elements()),
	std::vector<float>(input3TF.shape().num_elements()),
	std::vector<float>(input4TF.shape().num_elements()),
	std::vector<float>(input5TF.shape().num_elements())
	};

	// store runtimes
	std::vector<double> runtimesTF;
	std::vector<double> runtimesOX;

	// start runs
	for (int r = 0; r < runs + 1; r++) {
	// fill random floats
	float* d = input1TF.flat<float>().data();
	for (int i = 0; i < input1TF.shape().num_elements(); i++, d++) {
	*d = randFloat();
	inputsOX[0][i] = *d;
	}
	d = input2TF.flat<float>().data();
	for (int i = 0; i < input2TF.shape().num_elements(); i++, d++) {
	*d = randFloat();
	inputsOX[1][i] = *d;
	}
	d = input3TF.flat<float>().data();
	for (int i = 0; i < input3TF.shape().num_elements(); i++, d++) {
	*d = randFloat();
	inputsOX[2][i] = *d;
	}
	d = input4TF.flat<float>().data();
	for (int i = 0; i < input4TF.shape().num_elements(); i++, d++) {
	*d = randFloat();
	inputsOX[3][i] = *d;
	}
	d = input5TF.flat<float>().data();
	for (int i = 0; i < input5TF.shape().num_elements(); i++, d++) {
	*d = randFloat();
	inputsOX[4][i] = *d;
	}
	input6TF.scalar<bool>()() = false; // only present in TF model

	// run tensorflow
	gettimeofday(&tv, NULL);
	double t0 = tv.tv_sec * 1000. + tv.tv_usec / 1000.;
	tensorflow::run(session, inputsTF, outputNames, &outputsTF, nullptr);
	gettimeofday(&tv, NULL);
	double runtimeTF = (tv.tv_sec * 1000. + tv.tv_usec / 1000.) - t0;

	// run onnx
	gettimeofday(&tv, NULL);
	t0 = tv.tv_sec * 1000. + tv.tv_usec / 1000.;
	ox.run(inputNames, inputsOX, outputNames, batchSize);
	gettimeofday(&tv, NULL);
	double runtimeOX = (tv.tv_sec * 1000. + tv.tv_usec / 1000.) - t0;

	// store runtimes for all but the first run
	if (r != 0) {
	runtimesTF.push_back(runtimeTF);
	runtimesOX.push_back(runtimeOX);
	}
	}

	// compute runtime means and stds
	double meanTF = 0.;
	double stdTF = 0.;
	meanAndStd(runtimesTF, meanTF, stdTF);

	double meanOX = 0.;
	double stdOX = 0.;
	meanAndStd(runtimesOX, meanOX, stdOX);

	// log
	std::setprecision(4);
	std::cout << "runtime averaged over " << runs << " runs with batch size " << batchSize << ":" << std::endl;
	std::cout << " TF: " << meanTF << " +- " << stdTF << " ms" << std::endl;
	std::cout << " OX: " << meanOX << " +- " << stdOX << " ms" << std::endl;
	std::cout << std::endl;
	}

	// cleanup
	CPPUNIT_ASSERT(tensorflow::closeSession(session));
	delete graphDef;
	}