Skip to content

Instantly share code, notes, and snippets.

@riga
Last active February 4, 2020 16:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save riga/5cb64ff74bc1c051b0be77c29df9daab to your computer and use it in GitHub Desktop.
Save riga/5cb64ff74bc1c051b0be77c29df9daab to your computer and use it in GitHub Desktop.
CMS DeepJet performance test between TF and ONNX
/*
* Performance test of the DeepJet model with both TensorFlow and ONNXRuntime.
* Place this file into a cmssw test directory and add the following to the BuildFile.xml:
*
* <bin name="testTFDeepJetPerformance" file="testRunner.cpp,testDeepJetPerformance.cc">
* <use name="cppunit" />
* <use name="FWCore/Utilities" />
* <use name="PhysicsTools/TensorFlow" />
* <use name="PhysicsTools/ONNXRuntime" />
* </bin>
*
* Author: Marcel Rieger
*/
#include <stdexcept>
#include <sys/time.h>
#include <iomanip>
#include <cppunit/extensions/HelperMacros.h>
#include "PhysicsTools/TensorFlow/interface/TensorFlow.h"
#include "PhysicsTools/ONNXRuntime/interface/ONNXRuntime.h"
float randFloat() { return float(rand() % 100000) / 100000; }
void meanAndStd(const std::vector<double>& values, double& mean, double& std) {
mean = 0.;
for (double& v : values) {
mean += v;
}
mean /= double(values.size());
std = 0.;
for (double& v : values) {
std += pow(v - mean, 2);
}
std /= double(values.size() - 1);
std = pow(std, 0.5);
}
class testDeepJetPerformance : public CppUnit::TestFixture {
CPPUNIT_TEST_SUITE(testDeepJetPerformance);
CPPUNIT_TEST(checkAll);
CPPUNIT_TEST_SUITE_END();
public:
void checkAll();
};
CPPUNIT_TEST_SUITE_REGISTRATION(testDeepJetPerformance);
void testDeepJetPerformance::checkAll() {
// test configuration
std::vector<int> batchSizes = {1, 2, 4, 8, 16, 32, 64, 128, 256};
int runs = 500;
std::string threadPoolNameTF = "tensorflow";
int nThreadsTF = 1; // not used for "no_threads"
std::string modelTF =
"/cvmfs/cms.cern.ch/slc7_amd64_gcc820/cms/data-RecoBTag-Combined/V01-02-01/RecoBTag/Combined/data/"
"DeepFlavourV03_10X_training/constant_graph.pb";
std::string modelOX =
"/cvmfs/cms.cern.ch/slc7_amd64_gcc820/cms/data-RecoBTag-Combined/V01-02-01/RecoBTag/Combined/data/"
"DeepFlavourV03_10X_training/model.onnx";
struct timeval tv;
// names of models input and outputs
std::vector<std::string> inputNames = {"input_1", "input_2", "input_3", "input_4", "input_5"};
std::vector<std::string> outputNames = {"ID_pred/Softmax:0"};
// create tensorflow objects
tensorflow::TBBThreadPool::instance(nThreadsTF);
tensorflow::setLogging();
CPPUNIT_ASSERT(tensorflow::TBBThreadPool::instance().NumThreads() == nThreadsTF);
tensorflow::GraphDef* graphDef = tensorflow::loadGraphDef(modelTF);
CPPUNIT_ASSERT(graphDef != nullptr);
tensorflow::SessionOptions opts;
tensorflow::setThreading(opts, 4);
tensorflow::Session* session = tensorflow::createSession(graphDef, opts);
CPPUNIT_ASSERT(session != nullptr);
// create onnx objects
cms::Ort::ONNXRuntime ox(modelOX);
// loop over batch sizes
for (int batchSize : batchSizes) {
std::cout << "start test for batch size " << batchSize << std::endl;
// define tensorflow inputs and outputs
tensorflow::Tensor input1TF(tensorflow::DT_FLOAT, {batchSize, 15});
tensorflow::Tensor input2TF(tensorflow::DT_FLOAT, {batchSize, 25, 16});
tensorflow::Tensor input3TF(tensorflow::DT_FLOAT, {batchSize, 25, 6});
tensorflow::Tensor input4TF(tensorflow::DT_FLOAT, {batchSize, 4, 12});
tensorflow::Tensor input5TF(tensorflow::DT_FLOAT, {batchSize, 1});
tensorflow::Tensor input6TF(tensorflow::DT_BOOL, {});
std::vector<tensorflow::Tensor> outputsTF;
// store tensors in a named list
tensorflow::NamedTensorList inputsTF = {
{inputNames[0] + ":0", input1TF},
{inputNames[1] + ":0", input2TF},
{inputNames[2] + ":0", input3TF},
{inputNames[3] + ":0", input4TF},
{inputNames[4] + ":0", input5TF},
{"cpf_input_batchnorm/keras_learning_phase:0", input6TF}, // only present in TF model
};
// define onnx inputs
std::vector<std::vector<float>> inputsOX = {
std::vector<float>(input1TF.shape().num_elements()),
std::vector<float>(input2TF.shape().num_elements()),
std::vector<float>(input3TF.shape().num_elements()),
std::vector<float>(input4TF.shape().num_elements()),
std::vector<float>(input5TF.shape().num_elements())
};
// store runtimes
std::vector<double> runtimesTF;
std::vector<double> runtimesOX;
// start runs
for (int r = 0; r < runs + 1; r++) {
// fill random floats
float* d = input1TF.flat<float>().data();
for (int i = 0; i < input1TF.shape().num_elements(); i++, d++) {
*d = randFloat();
inputsOX[0][i] = *d;
}
d = input2TF.flat<float>().data();
for (int i = 0; i < input2TF.shape().num_elements(); i++, d++) {
*d = randFloat();
inputsOX[1][i] = *d;
}
d = input3TF.flat<float>().data();
for (int i = 0; i < input3TF.shape().num_elements(); i++, d++) {
*d = randFloat();
inputsOX[2][i] = *d;
}
d = input4TF.flat<float>().data();
for (int i = 0; i < input4TF.shape().num_elements(); i++, d++) {
*d = randFloat();
inputsOX[3][i] = *d;
}
d = input5TF.flat<float>().data();
for (int i = 0; i < input5TF.shape().num_elements(); i++, d++) {
*d = randFloat();
inputsOX[4][i] = *d;
}
input6TF.scalar<bool>()() = false; // only present in TF model
// run tensorflow
gettimeofday(&tv, NULL);
double t0 = tv.tv_sec * 1000. + tv.tv_usec / 1000.;
tensorflow::run(session, inputsTF, outputNames, &outputsTF, nullptr);
gettimeofday(&tv, NULL);
double runtimeTF = (tv.tv_sec * 1000. + tv.tv_usec / 1000.) - t0;
// run onnx
gettimeofday(&tv, NULL);
t0 = tv.tv_sec * 1000. + tv.tv_usec / 1000.;
ox.run(inputNames, inputsOX, outputNames, batchSize);
gettimeofday(&tv, NULL);
double runtimeOX = (tv.tv_sec * 1000. + tv.tv_usec / 1000.) - t0;
// store runtimes for all but the first run
if (r != 0) {
runtimesTF.push_back(runtimeTF);
runtimesOX.push_back(runtimeOX);
}
}
// compute runtime means and stds
double meanTF = 0.;
double stdTF = 0.;
meanAndStd(runtimesTF, meanTF, stdTF);
double meanOX = 0.;
double stdOX = 0.;
meanAndStd(runtimesOX, meanOX, stdOX);
// log
std::setprecision(4);
std::cout << "runtime averaged over " << runs << " runs with batch size " << batchSize << ":" << std::endl;
std::cout << " TF: " << meanTF << " +- " << stdTF << " ms" << std::endl;
std::cout << " OX: " << meanOX << " +- " << stdOX << " ms" << std::endl;
std::cout << std::endl;
}
// cleanup
CPPUNIT_ASSERT(tensorflow::closeSession(session));
delete graphDef;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment