apivovarov/run-tflite.cpp

## run-tflite.cpp
/* To compile:
g++ --std=c++11 -O3 \
-I /opt/flatbuffers/include \
-I /opt/tensorflow \
run-tflite.cpp -o run-tflite \
libtensorflow-lite.a -lstdc++ -lpthread -lm -lz -ldl
*/
#include <iostream>
#include <fstream>
#include <chrono>
#include "tensorflow/lite/kernels/register.h"

std::string getShape(TfLiteTensor* t) {
  std::string s = "(";
  int sz = t->dims->size;
  for(int i=0; i<sz; i++){
    if (i > 0) {
        s += ",";
    }
    s += std::to_string(t->dims->data[i]);
  }
  s += ")";
  return s;
}

int main(int argc, char *argv[]){
  if (argc != 4) {
     printf("%s <model.tflite> <n_threads> <use_nnapi>\n", argv[0]);
     return -1;
  }
  char* graph_path = argv[1];
  int num_threads = std::stoi(argv[2]);
  int use_nnapi = std::stoi(argv[3]);
  printf("Model: %s\n", graph_path);
  //std::cout << graph_path << std::endl;
  std::unique_ptr<tflite::FlatBufferModel> model(tflite::FlatBufferModel::BuildFromFile(graph_path));
  if(!model){
    printf("Failed to mmap model\n");
    exit(1);
  }
  printf("Model is built\n");
  tflite::ops::builtin::BuiltinOpResolver resolver;
  std::unique_ptr<tflite::Interpreter> interpreter;
  tflite::InterpreterBuilder(*model, resolver)(&interpreter);
  if(!interpreter){
    printf("Failed to construct interpreter\n");
    exit(1);
  }
  printf("Interpreter is constructed\n");
  interpreter->UseNNAPI(use_nnapi > 0);
  printf("Use NNAPI: %s\n", use_nnapi > 0 ? "True" : "False");
  if(num_threads > 0){
    interpreter->SetNumThreads(num_threads);
    printf("SetNumThreads: %d\n", num_threads);
  }

  // Get Input and Output tensors info
  int in_id = interpreter->inputs()[0];
  TfLiteTensor* in_tensor = interpreter->tensor(in_id);
  auto in_type = in_tensor->type;
  auto in_shape = getShape(in_tensor).c_str();
  auto in_name = in_tensor->name;
  printf("Input Tensor id, name, type, shape: %i, %s, %s(%d), %s\n", in_id, in_name, TfLiteTypeGetName(in_type), in_type, in_shape);

  int out_sz = interpreter->outputs().size();
  std::cout << "Output Tensor id, name, type, shape: " << std::endl;
  int t0_type;
  int t0_sz;
  for (int i = 0; i < out_sz; i++) {
    auto t_id = interpreter->outputs()[i];
    TfLiteTensor* t = interpreter->tensor(t_id);
    auto t_type = t->type;
    if (i == 0) {
        t0_type = t->type;
        t0_sz = t->dims->data[1];
    }
    printf("  %i, %s, %s(%d), %s\n", t_id, t->name, TfLiteTypeGetName(t_type), t_type, getShape(t).c_str());
  }


  int dim_h = in_tensor->dims->data[1];
  int dim_w = in_tensor->dims->data[2];

  if(interpreter->AllocateTensors() != kTfLiteOk){
    printf("Failed to allocate tensors\n");
    exit(1);
  }
  printf("AllocateTensors Ok\n");

  int sz = dim_h*dim_w*3;
  float* img;
  unsigned char* img_uint8;
  if (in_type == 3) {
    img_uint8 = new unsigned char[sz];
  } else {
    img = new float[sz];
  }

  // Read cat image
  std::string img_name = "cat" + std::to_string(dim_h) + "-3.txt";
  printf("img_name: %s\n", img_name.c_str());

  std::string line;
  std::ifstream imgfile(img_name);
  if (!imgfile.is_open()){
     std::cout << "Unable to open file: " << img_name << std::endl;
     exit(1);
  }
  int i = 0;
  while(getline(imgfile, line)){
    int v = std::stoi(line);
    if (in_type == 3) {
      img_uint8[i] = (unsigned char) v;
    } else {
      float fv = 2.0f / 255.0f * v - 1.0f;
      //std::cout << fv << '\n';
      img[i] = fv;
    }
    i++;
  }
  imgfile.close();
  printf("Image read ok, size: %d\n", i);

  const int N = 100;
  int total_time = 0;
  for(int j=-1; j < N; j++){
    float* in_data;
    unsigned char* in_data_uint8;
    if (in_type == 3) {
      in_data_uint8 = interpreter->typed_input_tensor<unsigned char>(0);
    } else {
      in_data = interpreter->typed_input_tensor<float>(0);
    }

    // Set input
    auto t1 = std::chrono::high_resolution_clock::now();
    if (in_type == 3) {
      memcpy(in_data_uint8, img_uint8, sz*sizeof(unsigned char));
    } else {
      memcpy(in_data, img, sz*sizeof(float));
    }
    // Invoke
    if(interpreter->Invoke() != kTfLiteOk){
      std::printf("Failed to invoke!\n");
      exit(1);
    }
    // Get output and ArgMax
    int idx = 0;
    float v = 0.0f;
    unsigned char v_uint8 = 0;
    if (t0_type == 1) { // float32
        float* output = interpreter->typed_output_tensor<float>(0);
        for (int i = 0; i < t0_sz; i++) {
          float vi = output[i];
          if(vi > v){
            idx = i;
            v = vi;
          }
        }
    } else if (t0_type == 3) { // uint8
        unsigned char* output = interpreter->typed_output_tensor<unsigned char>(0);
        for (int i = 0; i < t0_sz; i++) {
          unsigned char vi = output[i];
          if(vi > v_uint8){
            idx = i;
            v_uint8 = vi;
          }
        }
        v = v_uint8 / 255.0;
    }
    auto t2 = std::chrono::high_resolution_clock::now();
    auto dur = std::chrono::duration_cast<std::chrono::milliseconds>(t2-t1).count();
    if (j >= 0){
      total_time += dur;
      printf("Argmax: %d, prop: %f, time: %lld\n", idx, v, dur);
    }
  }
  printf("Avg time: %f\n", total_time * 1.0 / N);
  return 0;
}
	/* To compile:
	g++ --std=c++11 -O3 \
	-I /opt/flatbuffers/include \
	-I /opt/tensorflow \
	run-tflite.cpp -o run-tflite \
	libtensorflow-lite.a -lstdc++ -lpthread -lm -lz -ldl
	*/
	#include <iostream>
	#include <fstream>
	#include <chrono>
	#include "tensorflow/lite/kernels/register.h"

	std::string getShape(TfLiteTensor* t) {
	std::string s = "(";
	int sz = t->dims->size;
	for(int i=0; i<sz; i++){
	if (i > 0) {
	s += ",";
	}
	s += std::to_string(t->dims->data[i]);
	}
	s += ")";
	return s;
	}

	int main(int argc, char *argv[]){
	if (argc != 4) {
	printf("%s <model.tflite> <n_threads> <use_nnapi>\n", argv[0]);
	return -1;
	}
	char* graph_path = argv[1];
	int num_threads = std::stoi(argv[2]);
	int use_nnapi = std::stoi(argv[3]);
	printf("Model: %s\n", graph_path);
	//std::cout << graph_path << std::endl;
	std::unique_ptr<tflite::FlatBufferModel> model(tflite::FlatBufferModel::BuildFromFile(graph_path));
	if(!model){
	printf("Failed to mmap model\n");
	exit(1);
	}
	printf("Model is built\n");
	tflite::ops::builtin::BuiltinOpResolver resolver;
	std::unique_ptr<tflite::Interpreter> interpreter;
	tflite::InterpreterBuilder(*model, resolver)(&interpreter);
	if(!interpreter){
	printf("Failed to construct interpreter\n");
	exit(1);
	}
	printf("Interpreter is constructed\n");
	interpreter->UseNNAPI(use_nnapi > 0);
	printf("Use NNAPI: %s\n", use_nnapi > 0 ? "True" : "False");
	if(num_threads > 0){
	interpreter->SetNumThreads(num_threads);
	printf("SetNumThreads: %d\n", num_threads);
	}

	// Get Input and Output tensors info
	int in_id = interpreter->inputs()[0];
	TfLiteTensor* in_tensor = interpreter->tensor(in_id);
	auto in_type = in_tensor->type;
	auto in_shape = getShape(in_tensor).c_str();
	auto in_name = in_tensor->name;
	printf("Input Tensor id, name, type, shape: %i, %s, %s(%d), %s\n", in_id, in_name, TfLiteTypeGetName(in_type), in_type, in_shape);

	int out_sz = interpreter->outputs().size();
	std::cout << "Output Tensor id, name, type, shape: " << std::endl;
	int t0_type;
	int t0_sz;
	for (int i = 0; i < out_sz; i++) {
	auto t_id = interpreter->outputs()[i];
	TfLiteTensor* t = interpreter->tensor(t_id);
	auto t_type = t->type;
	if (i == 0) {
	t0_type = t->type;
	t0_sz = t->dims->data[1];
	}
	printf(" %i, %s, %s(%d), %s\n", t_id, t->name, TfLiteTypeGetName(t_type), t_type, getShape(t).c_str());
	}


	int dim_h = in_tensor->dims->data[1];
	int dim_w = in_tensor->dims->data[2];

	if(interpreter->AllocateTensors() != kTfLiteOk){
	printf("Failed to allocate tensors\n");
	exit(1);
	}
	printf("AllocateTensors Ok\n");

	int sz = dim_hdim_w3;
	float* img;
	unsigned char* img_uint8;
	if (in_type == 3) {
	img_uint8 = new unsigned char[sz];
	} else {
	img = new float[sz];
	}

	// Read cat image
	std::string img_name = "cat" + std::to_string(dim_h) + "-3.txt";
	printf("img_name: %s\n", img_name.c_str());

	std::string line;
	std::ifstream imgfile(img_name);
	if (!imgfile.is_open()){
	std::cout << "Unable to open file: " << img_name << std::endl;
	exit(1);
	}
	int i = 0;
	while(getline(imgfile, line)){
	int v = std::stoi(line);
	if (in_type == 3) {
	img_uint8[i] = (unsigned char) v;
	} else {
	float fv = 2.0f / 255.0f * v - 1.0f;
	//std::cout << fv << '\n';
	img[i] = fv;
	}
	i++;
	}
	imgfile.close();
	printf("Image read ok, size: %d\n", i);

	const int N = 100;
	int total_time = 0;
	for(int j=-1; j < N; j++){
	float* in_data;
	unsigned char* in_data_uint8;
	if (in_type == 3) {
	in_data_uint8 = interpreter->typed_input_tensor<unsigned char>(0);
	} else {
	in_data = interpreter->typed_input_tensor<float>(0);
	}

	// Set input
	auto t1 = std::chrono::high_resolution_clock::now();
	if (in_type == 3) {
	memcpy(in_data_uint8, img_uint8, sz*sizeof(unsigned char));
	} else {
	memcpy(in_data, img, sz*sizeof(float));
	}
	// Invoke
	if(interpreter->Invoke() != kTfLiteOk){
	std::printf("Failed to invoke!\n");
	exit(1);
	}
	// Get output and ArgMax
	int idx = 0;
	float v = 0.0f;
	unsigned char v_uint8 = 0;
	if (t0_type == 1) { // float32
	float* output = interpreter->typed_output_tensor<float>(0);
	for (int i = 0; i < t0_sz; i++) {
	float vi = output[i];
	if(vi > v){
	idx = i;
	v = vi;
	}
	}
	} else if (t0_type == 3) { // uint8
	unsigned char* output = interpreter->typed_output_tensor<unsigned char>(0);
	for (int i = 0; i < t0_sz; i++) {
	unsigned char vi = output[i];
	if(vi > v_uint8){
	idx = i;
	v_uint8 = vi;
	}
	}
	v = v_uint8 / 255.0;
	}
	auto t2 = std::chrono::high_resolution_clock::now();
	auto dur = std::chrono::duration_cast<std::chrono::milliseconds>(t2-t1).count();
	if (j >= 0){
	total_time += dur;
	printf("Argmax: %d, prop: %f, time: %lld\n", idx, v, dur);
	}
	}
	printf("Avg time: %f\n", total_time * 1.0 / N);
	return 0;
	}