Last active
July 29, 2021 16:44
-
-
Save pranavsharma/243ce2f6a814c1545ccb9a3fdbe9d00b to your computer and use it in GitHub Desktop.
onnxruntime C++ API inferencing example for GPU
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Copyright(c) Microsoft Corporation.All rights reserved. | |
// Licensed under the MIT License. | |
// Example of using IOBinding while inferencing with GPU | |
#include <assert.h> | |
#include <chrono> | |
#include <iostream> | |
#include <onnxruntime_cxx_api.h> | |
#include <vector> | |
int main(int argc, char *argv[]) { | |
Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "test"); | |
Ort::SessionOptions session_options; | |
session_options.AppendExecutionProvider_CUDA(OrtCUDAProviderOptions{}); | |
#ifdef _WIN32 | |
const wchar_t *model_path = L"squeezenet.onnx"; | |
#else | |
const char *model_path = "squeezenet.onnx"; | |
#endif | |
auto start = std::chrono::steady_clock::now(); | |
Ort::Session session(env, model_path, session_options); | |
auto end = std::chrono::steady_clock::now(); | |
std::cout << "Session Creation elapsed time in milliseconds: " | |
<< std::chrono::duration_cast<std::chrono::milliseconds>(end - | |
start) | |
.count() | |
<< " ms\n"; | |
Ort::IoBinding io_binding{session}; | |
//************************************************************************* | |
// print model input layer (node names, types, shape etc.) | |
Ort::AllocatorWithDefaultOptions allocator; | |
// print number of model input nodes | |
size_t num_input_nodes = session.GetInputCount(); | |
std::vector<const char *> input_node_names(num_input_nodes); | |
std::vector<int64_t> input_node_dims; // simplify... this model has only 1 | |
// input node {1, 3, 224, 224}. | |
// Otherwise need vector<vector<>> | |
// printf("Number of inputs = %zu\n", num_input_nodes); | |
// iterate over all input nodes | |
for (int i = 0; i < num_input_nodes; i++) { | |
// print input node names | |
char *input_name = session.GetInputName(i, allocator); | |
// printf("Input %d : name=%s\n", i, input_name); | |
input_node_names[i] = input_name; | |
// print input node types | |
Ort::TypeInfo type_info = session.GetInputTypeInfo(i); | |
auto tensor_info = type_info.GetTensorTypeAndShapeInfo(); | |
ONNXTensorElementDataType type = tensor_info.GetElementType(); | |
// printf("Input %d : type=%d\n", i, type); | |
// print input shapes/dims | |
input_node_dims = tensor_info.GetShape(); | |
// printf("Input %d : num_dims=%zu\n", i, input_node_dims.size()); | |
// for (int j = 0; j < input_node_dims.size(); j++) | |
// printf("Input %d : dim %d=%jd\n", i, j, input_node_dims[j]); | |
} | |
size_t input_tensor_size = | |
224 * 224 * 3; // simplify ... using known dim values to calculate size | |
// use OrtGetTensorShapeElementCount() to get official size! | |
std::vector<float> input_tensor_values(input_tensor_size); | |
std::vector<const char *> output_node_names = {"softmaxout_1"}; | |
// initialize input data with values in [0.0, 1.0] | |
for (unsigned int i = 0; i < input_tensor_size; i++) | |
input_tensor_values[i] = (float)i / (input_tensor_size + 1); | |
// create input tensor object from data values | |
auto memory_info = | |
Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); | |
Ort::Value input_tensor = Ort::Value::CreateTensor<float>( | |
memory_info, input_tensor_values.data(), input_tensor_size, | |
input_node_dims.data(), 4); | |
assert(input_tensor.IsTensor()); | |
start = std::chrono::steady_clock::now(); | |
io_binding.BindInput(input_node_names[0], input_tensor); | |
end = std::chrono::steady_clock::now(); | |
std::cout << "BindInput elapsed time in microseconds: " | |
<< std::chrono::duration_cast<std::chrono::microseconds>(end - | |
start) | |
.count() | |
<< " us\n"; | |
Ort::MemoryInfo output_mem_info{"Cuda", OrtDeviceAllocator, 0, | |
OrtMemTypeDefault}; | |
io_binding.BindOutput(output_node_names[0], output_mem_info); | |
// score model & input tensor, get back output tensor | |
Ort::RunOptions run_options; | |
start = std::chrono::steady_clock::now(); | |
session.Run(run_options, io_binding); | |
end = std::chrono::steady_clock::now(); | |
std::cout << "Warm up ... run elapsed time in milliseconds: " | |
<< std::chrono::duration_cast<std::chrono::milliseconds>(end - | |
start) | |
.count() | |
<< " ms\n"; | |
start = std::chrono::steady_clock::now(); | |
session.Run(run_options, io_binding); | |
end = std::chrono::steady_clock::now(); | |
std::cout << "After warm up ... Run elapsed time in milliseconds: " | |
<< std::chrono::duration_cast<std::chrono::milliseconds>(end - | |
start) | |
.count() | |
<< " ms\n"; | |
printf("Done!\n"); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment