Last active
February 4, 2020 16:45
-
-
Save riga/5cb64ff74bc1c051b0be77c29df9daab to your computer and use it in GitHub Desktop.
CMS DeepJet performance test between TF and ONNX
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Performance test of the DeepJet model with both TensorFlow and ONNXRuntime. | |
* Place this file into a cmssw test directory and add the following to the BuildFile.xml: | |
* | |
* <bin name="testTFDeepJetPerformance" file="testRunner.cpp,testDeepJetPerformance.cc"> | |
* <use name="cppunit" /> | |
* <use name="FWCore/Utilities" /> | |
* <use name="PhysicsTools/TensorFlow" /> | |
* <use name="PhysicsTools/ONNXRuntime" /> | |
* </bin> | |
* | |
* Author: Marcel Rieger | |
*/ | |
#include <stdexcept> | |
#include <sys/time.h> | |
#include <iomanip> | |
#include <cppunit/extensions/HelperMacros.h> | |
#include "PhysicsTools/TensorFlow/interface/TensorFlow.h" | |
#include "PhysicsTools/ONNXRuntime/interface/ONNXRuntime.h" | |
float randFloat() { return float(rand() % 100000) / 100000; } | |
void meanAndStd(const std::vector<double>& values, double& mean, double& std) { | |
mean = 0.; | |
for (double& v : values) { | |
mean += v; | |
} | |
mean /= double(values.size()); | |
std = 0.; | |
for (double& v : values) { | |
std += pow(v - mean, 2); | |
} | |
std /= double(values.size() - 1); | |
std = pow(std, 0.5); | |
} | |
class testDeepJetPerformance : public CppUnit::TestFixture { | |
CPPUNIT_TEST_SUITE(testDeepJetPerformance); | |
CPPUNIT_TEST(checkAll); | |
CPPUNIT_TEST_SUITE_END(); | |
public: | |
void checkAll(); | |
}; | |
CPPUNIT_TEST_SUITE_REGISTRATION(testDeepJetPerformance); | |
void testDeepJetPerformance::checkAll() { | |
// test configuration | |
std::vector<int> batchSizes = {1, 2, 4, 8, 16, 32, 64, 128, 256}; | |
int runs = 500; | |
std::string threadPoolNameTF = "tensorflow"; | |
int nThreadsTF = 1; // not used for "no_threads" | |
std::string modelTF = | |
"/cvmfs/cms.cern.ch/slc7_amd64_gcc820/cms/data-RecoBTag-Combined/V01-02-01/RecoBTag/Combined/data/" | |
"DeepFlavourV03_10X_training/constant_graph.pb"; | |
std::string modelOX = | |
"/cvmfs/cms.cern.ch/slc7_amd64_gcc820/cms/data-RecoBTag-Combined/V01-02-01/RecoBTag/Combined/data/" | |
"DeepFlavourV03_10X_training/model.onnx"; | |
struct timeval tv; | |
// names of models input and outputs | |
std::vector<std::string> inputNames = {"input_1", "input_2", "input_3", "input_4", "input_5"}; | |
std::vector<std::string> outputNames = {"ID_pred/Softmax:0"}; | |
// create tensorflow objects | |
tensorflow::TBBThreadPool::instance(nThreadsTF); | |
tensorflow::setLogging(); | |
CPPUNIT_ASSERT(tensorflow::TBBThreadPool::instance().NumThreads() == nThreadsTF); | |
tensorflow::GraphDef* graphDef = tensorflow::loadGraphDef(modelTF); | |
CPPUNIT_ASSERT(graphDef != nullptr); | |
tensorflow::SessionOptions opts; | |
tensorflow::setThreading(opts, 4); | |
tensorflow::Session* session = tensorflow::createSession(graphDef, opts); | |
CPPUNIT_ASSERT(session != nullptr); | |
// create onnx objects | |
cms::Ort::ONNXRuntime ox(modelOX); | |
// loop over batch sizes | |
for (int batchSize : batchSizes) { | |
std::cout << "start test for batch size " << batchSize << std::endl; | |
// define tensorflow inputs and outputs | |
tensorflow::Tensor input1TF(tensorflow::DT_FLOAT, {batchSize, 15}); | |
tensorflow::Tensor input2TF(tensorflow::DT_FLOAT, {batchSize, 25, 16}); | |
tensorflow::Tensor input3TF(tensorflow::DT_FLOAT, {batchSize, 25, 6}); | |
tensorflow::Tensor input4TF(tensorflow::DT_FLOAT, {batchSize, 4, 12}); | |
tensorflow::Tensor input5TF(tensorflow::DT_FLOAT, {batchSize, 1}); | |
tensorflow::Tensor input6TF(tensorflow::DT_BOOL, {}); | |
std::vector<tensorflow::Tensor> outputsTF; | |
// store tensors in a named list | |
tensorflow::NamedTensorList inputsTF = { | |
{inputNames[0] + ":0", input1TF}, | |
{inputNames[1] + ":0", input2TF}, | |
{inputNames[2] + ":0", input3TF}, | |
{inputNames[3] + ":0", input4TF}, | |
{inputNames[4] + ":0", input5TF}, | |
{"cpf_input_batchnorm/keras_learning_phase:0", input6TF}, // only present in TF model | |
}; | |
// define onnx inputs | |
std::vector<std::vector<float>> inputsOX = { | |
std::vector<float>(input1TF.shape().num_elements()), | |
std::vector<float>(input2TF.shape().num_elements()), | |
std::vector<float>(input3TF.shape().num_elements()), | |
std::vector<float>(input4TF.shape().num_elements()), | |
std::vector<float>(input5TF.shape().num_elements()) | |
}; | |
// store runtimes | |
std::vector<double> runtimesTF; | |
std::vector<double> runtimesOX; | |
// start runs | |
for (int r = 0; r < runs + 1; r++) { | |
// fill random floats | |
float* d = input1TF.flat<float>().data(); | |
for (int i = 0; i < input1TF.shape().num_elements(); i++, d++) { | |
*d = randFloat(); | |
inputsOX[0][i] = *d; | |
} | |
d = input2TF.flat<float>().data(); | |
for (int i = 0; i < input2TF.shape().num_elements(); i++, d++) { | |
*d = randFloat(); | |
inputsOX[1][i] = *d; | |
} | |
d = input3TF.flat<float>().data(); | |
for (int i = 0; i < input3TF.shape().num_elements(); i++, d++) { | |
*d = randFloat(); | |
inputsOX[2][i] = *d; | |
} | |
d = input4TF.flat<float>().data(); | |
for (int i = 0; i < input4TF.shape().num_elements(); i++, d++) { | |
*d = randFloat(); | |
inputsOX[3][i] = *d; | |
} | |
d = input5TF.flat<float>().data(); | |
for (int i = 0; i < input5TF.shape().num_elements(); i++, d++) { | |
*d = randFloat(); | |
inputsOX[4][i] = *d; | |
} | |
input6TF.scalar<bool>()() = false; // only present in TF model | |
// run tensorflow | |
gettimeofday(&tv, NULL); | |
double t0 = tv.tv_sec * 1000. + tv.tv_usec / 1000.; | |
tensorflow::run(session, inputsTF, outputNames, &outputsTF, nullptr); | |
gettimeofday(&tv, NULL); | |
double runtimeTF = (tv.tv_sec * 1000. + tv.tv_usec / 1000.) - t0; | |
// run onnx | |
gettimeofday(&tv, NULL); | |
t0 = tv.tv_sec * 1000. + tv.tv_usec / 1000.; | |
ox.run(inputNames, inputsOX, outputNames, batchSize); | |
gettimeofday(&tv, NULL); | |
double runtimeOX = (tv.tv_sec * 1000. + tv.tv_usec / 1000.) - t0; | |
// store runtimes for all but the first run | |
if (r != 0) { | |
runtimesTF.push_back(runtimeTF); | |
runtimesOX.push_back(runtimeOX); | |
} | |
} | |
// compute runtime means and stds | |
double meanTF = 0.; | |
double stdTF = 0.; | |
meanAndStd(runtimesTF, meanTF, stdTF); | |
double meanOX = 0.; | |
double stdOX = 0.; | |
meanAndStd(runtimesOX, meanOX, stdOX); | |
// log | |
std::setprecision(4); | |
std::cout << "runtime averaged over " << runs << " runs with batch size " << batchSize << ":" << std::endl; | |
std::cout << " TF: " << meanTF << " +- " << stdTF << " ms" << std::endl; | |
std::cout << " OX: " << meanOX << " +- " << stdOX << " ms" << std::endl; | |
std::cout << std::endl; | |
} | |
// cleanup | |
CPPUNIT_ASSERT(tensorflow::closeSession(session)); | |
delete graphDef; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment