Skip to content

Instantly share code, notes, and snippets.

@pranavsharma
Last active July 29, 2021 16:44
Show Gist options
  • Save pranavsharma/243ce2f6a814c1545ccb9a3fdbe9d00b to your computer and use it in GitHub Desktop.
Save pranavsharma/243ce2f6a814c1545ccb9a3fdbe9d00b to your computer and use it in GitHub Desktop.
onnxruntime C++ API inferencing example for GPU
// Copyright(c) Microsoft Corporation.All rights reserved.
// Licensed under the MIT License.
// Example of using IOBinding while inferencing with GPU
#include <assert.h>
#include <chrono>
#include <iostream>
#include <onnxruntime_cxx_api.h>
#include <vector>
int main(int argc, char *argv[]) {
Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "test");
Ort::SessionOptions session_options;
session_options.AppendExecutionProvider_CUDA(OrtCUDAProviderOptions{});
#ifdef _WIN32
const wchar_t *model_path = L"squeezenet.onnx";
#else
const char *model_path = "squeezenet.onnx";
#endif
auto start = std::chrono::steady_clock::now();
Ort::Session session(env, model_path, session_options);
auto end = std::chrono::steady_clock::now();
std::cout << "Session Creation elapsed time in milliseconds: "
<< std::chrono::duration_cast<std::chrono::milliseconds>(end -
start)
.count()
<< " ms\n";
Ort::IoBinding io_binding{session};
//*************************************************************************
// print model input layer (node names, types, shape etc.)
Ort::AllocatorWithDefaultOptions allocator;
// print number of model input nodes
size_t num_input_nodes = session.GetInputCount();
std::vector<const char *> input_node_names(num_input_nodes);
std::vector<int64_t> input_node_dims; // simplify... this model has only 1
// input node {1, 3, 224, 224}.
// Otherwise need vector<vector<>>
// printf("Number of inputs = %zu\n", num_input_nodes);
// iterate over all input nodes
for (int i = 0; i < num_input_nodes; i++) {
// print input node names
char *input_name = session.GetInputName(i, allocator);
// printf("Input %d : name=%s\n", i, input_name);
input_node_names[i] = input_name;
// print input node types
Ort::TypeInfo type_info = session.GetInputTypeInfo(i);
auto tensor_info = type_info.GetTensorTypeAndShapeInfo();
ONNXTensorElementDataType type = tensor_info.GetElementType();
// printf("Input %d : type=%d\n", i, type);
// print input shapes/dims
input_node_dims = tensor_info.GetShape();
// printf("Input %d : num_dims=%zu\n", i, input_node_dims.size());
// for (int j = 0; j < input_node_dims.size(); j++)
// printf("Input %d : dim %d=%jd\n", i, j, input_node_dims[j]);
}
size_t input_tensor_size =
224 * 224 * 3; // simplify ... using known dim values to calculate size
// use OrtGetTensorShapeElementCount() to get official size!
std::vector<float> input_tensor_values(input_tensor_size);
std::vector<const char *> output_node_names = {"softmaxout_1"};
// initialize input data with values in [0.0, 1.0]
for (unsigned int i = 0; i < input_tensor_size; i++)
input_tensor_values[i] = (float)i / (input_tensor_size + 1);
// create input tensor object from data values
auto memory_info =
Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
Ort::Value input_tensor = Ort::Value::CreateTensor<float>(
memory_info, input_tensor_values.data(), input_tensor_size,
input_node_dims.data(), 4);
assert(input_tensor.IsTensor());
start = std::chrono::steady_clock::now();
io_binding.BindInput(input_node_names[0], input_tensor);
end = std::chrono::steady_clock::now();
std::cout << "BindInput elapsed time in microseconds: "
<< std::chrono::duration_cast<std::chrono::microseconds>(end -
start)
.count()
<< " us\n";
Ort::MemoryInfo output_mem_info{"Cuda", OrtDeviceAllocator, 0,
OrtMemTypeDefault};
io_binding.BindOutput(output_node_names[0], output_mem_info);
// score model & input tensor, get back output tensor
Ort::RunOptions run_options;
start = std::chrono::steady_clock::now();
session.Run(run_options, io_binding);
end = std::chrono::steady_clock::now();
std::cout << "Warm up ... run elapsed time in milliseconds: "
<< std::chrono::duration_cast<std::chrono::milliseconds>(end -
start)
.count()
<< " ms\n";
start = std::chrono::steady_clock::now();
session.Run(run_options, io_binding);
end = std::chrono::steady_clock::now();
std::cout << "After warm up ... Run elapsed time in milliseconds: "
<< std::chrono::duration_cast<std::chrono::milliseconds>(end -
start)
.count()
<< " ms\n";
printf("Done!\n");
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment