Skip to content

Instantly share code, notes, and snippets.

@apivovarov
Last active July 8, 2021 03:56
Show Gist options
  • Save apivovarov/a4983ad5ba1a534a474a915203453b06 to your computer and use it in GitHub Desktop.
Save apivovarov/a4983ad5ba1a534a474a915203453b06 to your computer and use it in GitHub Desktop.
Run Mobilenet TFLtie models using C++ API
/* To compile:
g++ --std=c++11 -O3 \
-I /opt/flatbuffers/include \
-I /opt/tensorflow \
run-tflite.cpp -o run-tflite \
libtensorflow-lite.a -lstdc++ -lpthread -lm -lz -ldl
*/
#include <iostream>
#include <fstream>
#include <chrono>
#include "tensorflow/lite/kernels/register.h"
std::string getShape(TfLiteTensor* t) {
std::string s = "(";
int sz = t->dims->size;
for(int i=0; i<sz; i++){
if (i > 0) {
s += ",";
}
s += std::to_string(t->dims->data[i]);
}
s += ")";
return s;
}
int main(int argc, char *argv[]){
if (argc != 4) {
printf("%s <model.tflite> <n_threads> <use_nnapi>\n", argv[0]);
return -1;
}
char* graph_path = argv[1];
int num_threads = std::stoi(argv[2]);
int use_nnapi = std::stoi(argv[3]);
printf("Model: %s\n", graph_path);
//std::cout << graph_path << std::endl;
std::unique_ptr<tflite::FlatBufferModel> model(tflite::FlatBufferModel::BuildFromFile(graph_path));
if(!model){
printf("Failed to mmap model\n");
exit(1);
}
printf("Model is built\n");
tflite::ops::builtin::BuiltinOpResolver resolver;
std::unique_ptr<tflite::Interpreter> interpreter;
tflite::InterpreterBuilder(*model, resolver)(&interpreter);
if(!interpreter){
printf("Failed to construct interpreter\n");
exit(1);
}
printf("Interpreter is constructed\n");
interpreter->UseNNAPI(use_nnapi > 0);
printf("Use NNAPI: %s\n", use_nnapi > 0 ? "True" : "False");
if(num_threads > 0){
interpreter->SetNumThreads(num_threads);
printf("SetNumThreads: %d\n", num_threads);
}
// Get Input and Output tensors info
int in_id = interpreter->inputs()[0];
TfLiteTensor* in_tensor = interpreter->tensor(in_id);
auto in_type = in_tensor->type;
auto in_shape = getShape(in_tensor).c_str();
auto in_name = in_tensor->name;
printf("Input Tensor id, name, type, shape: %i, %s, %s(%d), %s\n", in_id, in_name, TfLiteTypeGetName(in_type), in_type, in_shape);
int out_sz = interpreter->outputs().size();
std::cout << "Output Tensor id, name, type, shape: " << std::endl;
int t0_type;
int t0_sz;
for (int i = 0; i < out_sz; i++) {
auto t_id = interpreter->outputs()[i];
TfLiteTensor* t = interpreter->tensor(t_id);
auto t_type = t->type;
if (i == 0) {
t0_type = t->type;
t0_sz = t->dims->data[1];
}
printf(" %i, %s, %s(%d), %s\n", t_id, t->name, TfLiteTypeGetName(t_type), t_type, getShape(t).c_str());
}
int dim_h = in_tensor->dims->data[1];
int dim_w = in_tensor->dims->data[2];
if(interpreter->AllocateTensors() != kTfLiteOk){
printf("Failed to allocate tensors\n");
exit(1);
}
printf("AllocateTensors Ok\n");
int sz = dim_h*dim_w*3;
float* img;
unsigned char* img_uint8;
if (in_type == 3) {
img_uint8 = new unsigned char[sz];
} else {
img = new float[sz];
}
// Read cat image
std::string img_name = "cat" + std::to_string(dim_h) + "-3.txt";
printf("img_name: %s\n", img_name.c_str());
std::string line;
std::ifstream imgfile(img_name);
if (!imgfile.is_open()){
std::cout << "Unable to open file: " << img_name << std::endl;
exit(1);
}
int i = 0;
while(getline(imgfile, line)){
int v = std::stoi(line);
if (in_type == 3) {
img_uint8[i] = (unsigned char) v;
} else {
float fv = 2.0f / 255.0f * v - 1.0f;
//std::cout << fv << '\n';
img[i] = fv;
}
i++;
}
imgfile.close();
printf("Image read ok, size: %d\n", i);
const int N = 100;
int total_time = 0;
for(int j=-1; j < N; j++){
float* in_data;
unsigned char* in_data_uint8;
if (in_type == 3) {
in_data_uint8 = interpreter->typed_input_tensor<unsigned char>(0);
} else {
in_data = interpreter->typed_input_tensor<float>(0);
}
// Set input
auto t1 = std::chrono::high_resolution_clock::now();
if (in_type == 3) {
memcpy(in_data_uint8, img_uint8, sz*sizeof(unsigned char));
} else {
memcpy(in_data, img, sz*sizeof(float));
}
// Invoke
if(interpreter->Invoke() != kTfLiteOk){
std::printf("Failed to invoke!\n");
exit(1);
}
// Get output and ArgMax
int idx = 0;
float v = 0.0f;
unsigned char v_uint8 = 0;
if (t0_type == 1) { // float32
float* output = interpreter->typed_output_tensor<float>(0);
for (int i = 0; i < t0_sz; i++) {
float vi = output[i];
if(vi > v){
idx = i;
v = vi;
}
}
} else if (t0_type == 3) { // uint8
unsigned char* output = interpreter->typed_output_tensor<unsigned char>(0);
for (int i = 0; i < t0_sz; i++) {
unsigned char vi = output[i];
if(vi > v_uint8){
idx = i;
v_uint8 = vi;
}
}
v = v_uint8 / 255.0;
}
auto t2 = std::chrono::high_resolution_clock::now();
auto dur = std::chrono::duration_cast<std::chrono::milliseconds>(t2-t1).count();
if (j >= 0){
total_time += dur;
printf("Argmax: %d, prop: %f, time: %lld\n", idx, v, dur);
}
}
printf("Avg time: %f\n", total_time * 1.0 / N);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment