Skip to content

Instantly share code, notes, and snippets.

@pranavsharma
Last active September 9, 2021 20:06
Show Gist options
  • Save pranavsharma/fe589e119e0995690b5e262b6d751397 to your computer and use it in GitHub Desktop.
Save pranavsharma/fe589e119e0995690b5e262b6d751397 to your computer and use it in GitHub Desktop.
onnxruntime C++ API inferencing example for CPU
// Copyright(c) Microsoft Corporation.All rights reserved.
// Licensed under the MIT License.
//
/*
pranav@XXX:~$ wget https://github.com/microsoft/onnxruntime/releases/download/v1.8.1/onnxruntime-linux-x64-1.8.1.tgz
....
2021-07-28 19:44:06 (7.60 MB/s) - ‘onnxruntime-linux-x64-1.8.1.tgz’ saved [4736207/4736207]
pranav@XXX:~$ tar xvfz onnxruntime-linux-x64-1.8.1.tgz
onnxruntime-linux-x64-1.8.1/
onnxruntime-linux-x64-1.8.1/ThirdPartyNotices.txt
onnxruntime-linux-x64-1.8.1/LICENSE
onnxruntime-linux-x64-1.8.1/Privacy.md
onnxruntime-linux-x64-1.8.1/README.md
onnxruntime-linux-x64-1.8.1/include/
onnxruntime-linux-x64-1.8.1/include/onnxruntime_session_options_config_keys.h
onnxruntime-linux-x64-1.8.1/include/cpu_provider_factory.h
onnxruntime-linux-x64-1.8.1/include/cuda_provider_factory.h
onnxruntime-linux-x64-1.8.1/include/provider_options.h
onnxruntime-linux-x64-1.8.1/include/onnxruntime_cxx_inline.h
onnxruntime-linux-x64-1.8.1/include/onnxruntime_run_options_config_keys.h
onnxruntime-linux-x64-1.8.1/include/onnxruntime_c_api.h
onnxruntime-linux-x64-1.8.1/include/onnxruntime_cxx_api.h
onnxruntime-linux-x64-1.8.1/GIT_COMMIT_ID
onnxruntime-linux-x64-1.8.1/lib/
onnxruntime-linux-x64-1.8.1/lib/libonnxruntime.so
onnxruntime-linux-x64-1.8.1/lib/libonnxruntime.so.1.8.1
onnxruntime-linux-x64-1.8.1/VERSION_NUMBER
pranav@XXX:~$ cd onnxruntime-linux-x64-1.8.1/
pranav@XXX:~/onnxruntime-linux-x64-1.8.1$ g++ -std=c++14 -o t-ortcpu t-ortcpu.cc -I include/ -L lib/ -lonnxruntime -Wl,-rpat
h,lib/
pranav@XXX:~/onnxruntime-linux-x64-1.8.1$ cp ~/squeezenet.onnx .
pranav@XXX:~/onnxruntime-linux-x64-1.8.1$ ~/t-ortcpu
Using Onnxruntime C++ API
Session Creation elapsed time in milliseconds: 38 ms
Number of inputs = 1
Input 0 : name=data_0
Input 0 : type=1
Input 0 : num_dims=4
Input 0 : dim 0=1
Input 0 : dim 1=3
Input 0 : dim 2=224
Input 0 : dim 3=224
Run elapsed time in milliseconds: 4 ms
Score for class [0] = 0.000045
Score for class [1] = 0.003846
Score for class [2] = 0.000125
Score for class [3] = 0.001180
Score for class [4] = 0.001317
Done!
*/
#include <assert.h>
#include <chrono>
#include <iostream>
#include <onnxruntime_cxx_api.h>
#include <vector>
int main(int argc, char *argv[]) {
//*************************************************************************
// initialize enviroment...one enviroment per process
// enviroment maintains thread pools and other state info
Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "test");
// initialize session options if needed
Ort::SessionOptions session_options;
// If onnxruntime.dll is built with CUDA enabled, we can uncomment out this line
// to use CUDA for this
// session (we also need to include cuda_provider_factory.h above which defines
// it)
// #include "cuda_provider_factory.h"
// OrtSessionOptionsAppendExecutionProvider_CUDA(session_options, 1);
// Sets graph optimization level
// Available levels are
// ORT_DISABLE_ALL -> To disable all optimizations
// ORT_ENABLE_BASIC -> To enable basic optimizations (Such as redundant node
// removals)
// ORT_ENABLE_EXTENDED -> To enable extended optimizations (Includes level 1 +
// more complex optimizations like node fusions)
// ORT_ENABLE_ALL -> To Enable All possible opitmizations
// session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
//*************************************************************************
// create session and load model into memory
// using squeezenet version 1.3
// URL = https://github.com/onnx/models/tree/master/squeezenet
#ifdef _WIN32
const wchar_t *model_path = L"squeezenet.onnx";
#else
const char *model_path = "squeezenet.onnx";
#endif
printf("Using Onnxruntime C++ API\n");
auto start = std::chrono::steady_clock::now();
Ort::Session session(env, model_path, session_options);
auto end = std::chrono::steady_clock::now();
std::cout << "Session Creation elapsed time in milliseconds: "
<< std::chrono::duration_cast<std::chrono::milliseconds>(end -
start)
.count()
<< " ms\n";
//*************************************************************************
// print model input layer (node names, types, shape etc.)
Ort::AllocatorWithDefaultOptions allocator;
// print number of model input nodes
size_t num_input_nodes = session.GetInputCount();
std::vector<const char *> input_node_names(num_input_nodes);
std::vector<int64_t> input_node_dims; // simplify... this model has only 1
// input node {1, 3, 224, 224}.
printf("Number of inputs = %zu\n", num_input_nodes);
// iterate over all input nodes
for (int i = 0; i < num_input_nodes; i++) {
// print input node names
char *input_name = session.GetInputName(i, allocator);
printf("Input %d : name=%s\n", i, input_name);
input_node_names[i] = input_name;
// print input node types
Ort::TypeInfo type_info = session.GetInputTypeInfo(i);
auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
ONNXTensorElementDataType type = tensor_info.GetElementType();
printf("Input %d : type=%d\n", i, type);
// print input shapes/dims
input_node_dims = tensor_info.GetShape();
printf("Input %d : num_dims=%zu\n", i, input_node_dims.size());
for (int j = 0; j < input_node_dims.size(); j++)
printf("Input %d : dim %d=%jd\n", i, j, input_node_dims[j]);
}
// Results should be...
// Number of inputs = 1
// Input 0 : name = data_0
// Input 0 : type = 1
// Input 0 : num_dims = 4
// Input 0 : dim 0 = 1
// Input 0 : dim 1 = 3
// Input 0 : dim 2 = 224
// Input 0 : dim 3 = 224
//*************************************************************************
// Score the model using sample data, and inspect values
size_t input_tensor_size =
224 * 224 * 3; // simplify ... using known dim values to calculate size
// use OrtGetTensorShapeElementCount() to get official size!
std::vector<float> input_tensor_values(input_tensor_size);
std::vector<const char *> output_node_names = {"softmaxout_1"};
// initialize input data with values in [0.0, 1.0]
for (unsigned int i = 0; i < input_tensor_size; i++)
input_tensor_values[i] = (float)i / (input_tensor_size + 1);
// create input tensor object from data values
auto memory_info =
Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
Ort::Value input_tensor = Ort::Value::CreateTensor<float>(
memory_info, input_tensor_values.data(), input_tensor_size,
input_node_dims.data(), 4);
assert(input_tensor.IsTensor());
// score model & input tensor, get back output tensor
start = std::chrono::steady_clock::now();
auto output_tensors =
session.Run(Ort::RunOptions{nullptr}, input_node_names.data(),
&input_tensor, 1, output_node_names.data(), 1);
end = std::chrono::steady_clock::now();
std::cout << "Run elapsed time in milliseconds: "
<< std::chrono::duration_cast<std::chrono::milliseconds>(end -
start)
.count()
<< " ms\n";
assert(output_tensors.size() == 1 && output_tensors.front().IsTensor());
// Get pointer to output tensor float values
float *floatarr = output_tensors.front().GetTensorMutableData<float>();
assert(abs(floatarr[0] - 0.000045) < 1e-6);
// score the model, and print scores for first 5 classes
for (int i = 0; i < 5; i++)
printf("Score for class [%d] = %f\n", i, floatarr[i]);
// Results should be as below...
// Score for class[0] = 0.000045
// Score for class[1] = 0.003846
// Score for class[2] = 0.000125
// Score for class[3] = 0.001180
// Score for class[4] = 0.001317
printf("Done!\n");
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment