Last active
July 8, 2021 03:56
-
-
Save apivovarov/a4983ad5ba1a534a474a915203453b06 to your computer and use it in GitHub Desktop.
Run Mobilenet TFLtie models using C++ API
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* To compile: | |
g++ --std=c++11 -O3 \ | |
-I /opt/flatbuffers/include \ | |
-I /opt/tensorflow \ | |
run-tflite.cpp -o run-tflite \ | |
libtensorflow-lite.a -lstdc++ -lpthread -lm -lz -ldl | |
*/ | |
#include <iostream> | |
#include <fstream> | |
#include <chrono> | |
#include "tensorflow/lite/kernels/register.h" | |
std::string getShape(TfLiteTensor* t) { | |
std::string s = "("; | |
int sz = t->dims->size; | |
for(int i=0; i<sz; i++){ | |
if (i > 0) { | |
s += ","; | |
} | |
s += std::to_string(t->dims->data[i]); | |
} | |
s += ")"; | |
return s; | |
} | |
int main(int argc, char *argv[]){ | |
if (argc != 4) { | |
printf("%s <model.tflite> <n_threads> <use_nnapi>\n", argv[0]); | |
return -1; | |
} | |
char* graph_path = argv[1]; | |
int num_threads = std::stoi(argv[2]); | |
int use_nnapi = std::stoi(argv[3]); | |
printf("Model: %s\n", graph_path); | |
//std::cout << graph_path << std::endl; | |
std::unique_ptr<tflite::FlatBufferModel> model(tflite::FlatBufferModel::BuildFromFile(graph_path)); | |
if(!model){ | |
printf("Failed to mmap model\n"); | |
exit(1); | |
} | |
printf("Model is built\n"); | |
tflite::ops::builtin::BuiltinOpResolver resolver; | |
std::unique_ptr<tflite::Interpreter> interpreter; | |
tflite::InterpreterBuilder(*model, resolver)(&interpreter); | |
if(!interpreter){ | |
printf("Failed to construct interpreter\n"); | |
exit(1); | |
} | |
printf("Interpreter is constructed\n"); | |
interpreter->UseNNAPI(use_nnapi > 0); | |
printf("Use NNAPI: %s\n", use_nnapi > 0 ? "True" : "False"); | |
if(num_threads > 0){ | |
interpreter->SetNumThreads(num_threads); | |
printf("SetNumThreads: %d\n", num_threads); | |
} | |
// Get Input and Output tensors info | |
int in_id = interpreter->inputs()[0]; | |
TfLiteTensor* in_tensor = interpreter->tensor(in_id); | |
auto in_type = in_tensor->type; | |
auto in_shape = getShape(in_tensor).c_str(); | |
auto in_name = in_tensor->name; | |
printf("Input Tensor id, name, type, shape: %i, %s, %s(%d), %s\n", in_id, in_name, TfLiteTypeGetName(in_type), in_type, in_shape); | |
int out_sz = interpreter->outputs().size(); | |
std::cout << "Output Tensor id, name, type, shape: " << std::endl; | |
int t0_type; | |
int t0_sz; | |
for (int i = 0; i < out_sz; i++) { | |
auto t_id = interpreter->outputs()[i]; | |
TfLiteTensor* t = interpreter->tensor(t_id); | |
auto t_type = t->type; | |
if (i == 0) { | |
t0_type = t->type; | |
t0_sz = t->dims->data[1]; | |
} | |
printf(" %i, %s, %s(%d), %s\n", t_id, t->name, TfLiteTypeGetName(t_type), t_type, getShape(t).c_str()); | |
} | |
int dim_h = in_tensor->dims->data[1]; | |
int dim_w = in_tensor->dims->data[2]; | |
if(interpreter->AllocateTensors() != kTfLiteOk){ | |
printf("Failed to allocate tensors\n"); | |
exit(1); | |
} | |
printf("AllocateTensors Ok\n"); | |
int sz = dim_h*dim_w*3; | |
float* img; | |
unsigned char* img_uint8; | |
if (in_type == 3) { | |
img_uint8 = new unsigned char[sz]; | |
} else { | |
img = new float[sz]; | |
} | |
// Read cat image | |
std::string img_name = "cat" + std::to_string(dim_h) + "-3.txt"; | |
printf("img_name: %s\n", img_name.c_str()); | |
std::string line; | |
std::ifstream imgfile(img_name); | |
if (!imgfile.is_open()){ | |
std::cout << "Unable to open file: " << img_name << std::endl; | |
exit(1); | |
} | |
int i = 0; | |
while(getline(imgfile, line)){ | |
int v = std::stoi(line); | |
if (in_type == 3) { | |
img_uint8[i] = (unsigned char) v; | |
} else { | |
float fv = 2.0f / 255.0f * v - 1.0f; | |
//std::cout << fv << '\n'; | |
img[i] = fv; | |
} | |
i++; | |
} | |
imgfile.close(); | |
printf("Image read ok, size: %d\n", i); | |
const int N = 100; | |
int total_time = 0; | |
for(int j=-1; j < N; j++){ | |
float* in_data; | |
unsigned char* in_data_uint8; | |
if (in_type == 3) { | |
in_data_uint8 = interpreter->typed_input_tensor<unsigned char>(0); | |
} else { | |
in_data = interpreter->typed_input_tensor<float>(0); | |
} | |
// Set input | |
auto t1 = std::chrono::high_resolution_clock::now(); | |
if (in_type == 3) { | |
memcpy(in_data_uint8, img_uint8, sz*sizeof(unsigned char)); | |
} else { | |
memcpy(in_data, img, sz*sizeof(float)); | |
} | |
// Invoke | |
if(interpreter->Invoke() != kTfLiteOk){ | |
std::printf("Failed to invoke!\n"); | |
exit(1); | |
} | |
// Get output and ArgMax | |
int idx = 0; | |
float v = 0.0f; | |
unsigned char v_uint8 = 0; | |
if (t0_type == 1) { // float32 | |
float* output = interpreter->typed_output_tensor<float>(0); | |
for (int i = 0; i < t0_sz; i++) { | |
float vi = output[i]; | |
if(vi > v){ | |
idx = i; | |
v = vi; | |
} | |
} | |
} else if (t0_type == 3) { // uint8 | |
unsigned char* output = interpreter->typed_output_tensor<unsigned char>(0); | |
for (int i = 0; i < t0_sz; i++) { | |
unsigned char vi = output[i]; | |
if(vi > v_uint8){ | |
idx = i; | |
v_uint8 = vi; | |
} | |
} | |
v = v_uint8 / 255.0; | |
} | |
auto t2 = std::chrono::high_resolution_clock::now(); | |
auto dur = std::chrono::duration_cast<std::chrono::milliseconds>(t2-t1).count(); | |
if (j >= 0){ | |
total_time += dur; | |
printf("Argmax: %d, prop: %f, time: %lld\n", idx, v, dur); | |
} | |
} | |
printf("Avg time: %f\n", total_time * 1.0 / N); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment