Last active
September 9, 2021 20:06
-
-
Save pranavsharma/fe589e119e0995690b5e262b6d751397 to your computer and use it in GitHub Desktop.
onnxruntime C++ API inferencing example for CPU
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Copyright(c) Microsoft Corporation.All rights reserved. | |
// Licensed under the MIT License. | |
// | |
/* | |
pranav@XXX:~$ wget https://github.com/microsoft/onnxruntime/releases/download/v1.8.1/onnxruntime-linux-x64-1.8.1.tgz | |
.... | |
2021-07-28 19:44:06 (7.60 MB/s) - ‘onnxruntime-linux-x64-1.8.1.tgz’ saved [4736207/4736207] | |
pranav@XXX:~$ tar xvfz onnxruntime-linux-x64-1.8.1.tgz | |
onnxruntime-linux-x64-1.8.1/ | |
onnxruntime-linux-x64-1.8.1/ThirdPartyNotices.txt | |
onnxruntime-linux-x64-1.8.1/LICENSE | |
onnxruntime-linux-x64-1.8.1/Privacy.md | |
onnxruntime-linux-x64-1.8.1/README.md | |
onnxruntime-linux-x64-1.8.1/include/ | |
onnxruntime-linux-x64-1.8.1/include/onnxruntime_session_options_config_keys.h | |
onnxruntime-linux-x64-1.8.1/include/cpu_provider_factory.h | |
onnxruntime-linux-x64-1.8.1/include/cuda_provider_factory.h | |
onnxruntime-linux-x64-1.8.1/include/provider_options.h | |
onnxruntime-linux-x64-1.8.1/include/onnxruntime_cxx_inline.h | |
onnxruntime-linux-x64-1.8.1/include/onnxruntime_run_options_config_keys.h | |
onnxruntime-linux-x64-1.8.1/include/onnxruntime_c_api.h | |
onnxruntime-linux-x64-1.8.1/include/onnxruntime_cxx_api.h | |
onnxruntime-linux-x64-1.8.1/GIT_COMMIT_ID | |
onnxruntime-linux-x64-1.8.1/lib/ | |
onnxruntime-linux-x64-1.8.1/lib/libonnxruntime.so | |
onnxruntime-linux-x64-1.8.1/lib/libonnxruntime.so.1.8.1 | |
onnxruntime-linux-x64-1.8.1/VERSION_NUMBER | |
pranav@XXX:~$ cd onnxruntime-linux-x64-1.8.1/ | |
pranav@XXX:~/onnxruntime-linux-x64-1.8.1$ g++ -std=c++14 -o t-ortcpu t-ortcpu.cc -I include/ -L lib/ -lonnxruntime -Wl,-rpat | |
h,lib/ | |
pranav@XXX:~/onnxruntime-linux-x64-1.8.1$ cp ~/squeezenet.onnx . | |
pranav@XXX:~/onnxruntime-linux-x64-1.8.1$ ~/t-ortcpu | |
Using Onnxruntime C++ API | |
Session Creation elapsed time in milliseconds: 38 ms | |
Number of inputs = 1 | |
Input 0 : name=data_0 | |
Input 0 : type=1 | |
Input 0 : num_dims=4 | |
Input 0 : dim 0=1 | |
Input 0 : dim 1=3 | |
Input 0 : dim 2=224 | |
Input 0 : dim 3=224 | |
Run elapsed time in milliseconds: 4 ms | |
Score for class [0] = 0.000045 | |
Score for class [1] = 0.003846 | |
Score for class [2] = 0.000125 | |
Score for class [3] = 0.001180 | |
Score for class [4] = 0.001317 | |
Done! | |
*/ | |
#include <assert.h> | |
#include <chrono> | |
#include <iostream> | |
#include <onnxruntime_cxx_api.h> | |
#include <vector> | |
int main(int argc, char *argv[]) { | |
//************************************************************************* | |
// initialize enviroment...one enviroment per process | |
// enviroment maintains thread pools and other state info | |
Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "test"); | |
// initialize session options if needed | |
Ort::SessionOptions session_options; | |
// If onnxruntime.dll is built with CUDA enabled, we can uncomment out this line | |
// to use CUDA for this | |
// session (we also need to include cuda_provider_factory.h above which defines | |
// it) | |
// #include "cuda_provider_factory.h" | |
// OrtSessionOptionsAppendExecutionProvider_CUDA(session_options, 1); | |
// Sets graph optimization level | |
// Available levels are | |
// ORT_DISABLE_ALL -> To disable all optimizations | |
// ORT_ENABLE_BASIC -> To enable basic optimizations (Such as redundant node | |
// removals) | |
// ORT_ENABLE_EXTENDED -> To enable extended optimizations (Includes level 1 + | |
// more complex optimizations like node fusions) | |
// ORT_ENABLE_ALL -> To Enable All possible opitmizations | |
// session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED); | |
//************************************************************************* | |
// create session and load model into memory | |
// using squeezenet version 1.3 | |
// URL = https://github.com/onnx/models/tree/master/squeezenet | |
#ifdef _WIN32 | |
const wchar_t *model_path = L"squeezenet.onnx"; | |
#else | |
const char *model_path = "squeezenet.onnx"; | |
#endif | |
printf("Using Onnxruntime C++ API\n"); | |
auto start = std::chrono::steady_clock::now(); | |
Ort::Session session(env, model_path, session_options); | |
auto end = std::chrono::steady_clock::now(); | |
std::cout << "Session Creation elapsed time in milliseconds: " | |
<< std::chrono::duration_cast<std::chrono::milliseconds>(end - | |
start) | |
.count() | |
<< " ms\n"; | |
//************************************************************************* | |
// print model input layer (node names, types, shape etc.) | |
Ort::AllocatorWithDefaultOptions allocator; | |
// print number of model input nodes | |
size_t num_input_nodes = session.GetInputCount(); | |
std::vector<const char *> input_node_names(num_input_nodes); | |
std::vector<int64_t> input_node_dims; // simplify... this model has only 1 | |
// input node {1, 3, 224, 224}. | |
printf("Number of inputs = %zu\n", num_input_nodes); | |
// iterate over all input nodes | |
for (int i = 0; i < num_input_nodes; i++) { | |
// print input node names | |
char *input_name = session.GetInputName(i, allocator); | |
printf("Input %d : name=%s\n", i, input_name); | |
input_node_names[i] = input_name; | |
// print input node types | |
Ort::TypeInfo type_info = session.GetInputTypeInfo(i); | |
auto tensor_info = type_info.GetTensorTypeAndShapeInfo(); | |
ONNXTensorElementDataType type = tensor_info.GetElementType(); | |
printf("Input %d : type=%d\n", i, type); | |
// print input shapes/dims | |
input_node_dims = tensor_info.GetShape(); | |
printf("Input %d : num_dims=%zu\n", i, input_node_dims.size()); | |
for (int j = 0; j < input_node_dims.size(); j++) | |
printf("Input %d : dim %d=%jd\n", i, j, input_node_dims[j]); | |
} | |
// Results should be... | |
// Number of inputs = 1 | |
// Input 0 : name = data_0 | |
// Input 0 : type = 1 | |
// Input 0 : num_dims = 4 | |
// Input 0 : dim 0 = 1 | |
// Input 0 : dim 1 = 3 | |
// Input 0 : dim 2 = 224 | |
// Input 0 : dim 3 = 224 | |
//************************************************************************* | |
// Score the model using sample data, and inspect values | |
size_t input_tensor_size = | |
224 * 224 * 3; // simplify ... using known dim values to calculate size | |
// use OrtGetTensorShapeElementCount() to get official size! | |
std::vector<float> input_tensor_values(input_tensor_size); | |
std::vector<const char *> output_node_names = {"softmaxout_1"}; | |
// initialize input data with values in [0.0, 1.0] | |
for (unsigned int i = 0; i < input_tensor_size; i++) | |
input_tensor_values[i] = (float)i / (input_tensor_size + 1); | |
// create input tensor object from data values | |
auto memory_info = | |
Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); | |
Ort::Value input_tensor = Ort::Value::CreateTensor<float>( | |
memory_info, input_tensor_values.data(), input_tensor_size, | |
input_node_dims.data(), 4); | |
assert(input_tensor.IsTensor()); | |
// score model & input tensor, get back output tensor | |
start = std::chrono::steady_clock::now(); | |
auto output_tensors = | |
session.Run(Ort::RunOptions{nullptr}, input_node_names.data(), | |
&input_tensor, 1, output_node_names.data(), 1); | |
end = std::chrono::steady_clock::now(); | |
std::cout << "Run elapsed time in milliseconds: " | |
<< std::chrono::duration_cast<std::chrono::milliseconds>(end - | |
start) | |
.count() | |
<< " ms\n"; | |
assert(output_tensors.size() == 1 && output_tensors.front().IsTensor()); | |
// Get pointer to output tensor float values | |
float *floatarr = output_tensors.front().GetTensorMutableData<float>(); | |
assert(abs(floatarr[0] - 0.000045) < 1e-6); | |
// score the model, and print scores for first 5 classes | |
for (int i = 0; i < 5; i++) | |
printf("Score for class [%d] = %f\n", i, floatarr[i]); | |
// Results should be as below... | |
// Score for class[0] = 0.000045 | |
// Score for class[1] = 0.003846 | |
// Score for class[2] = 0.000125 | |
// Score for class[3] = 0.001180 | |
// Score for class[4] = 0.001317 | |
printf("Done!\n"); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment