Skip to content

Instantly share code, notes, and snippets.

@YashasSamaga
Last active May 8, 2020 09:19
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save YashasSamaga/71157cf0c3768c497e5e70fb95435596 to your computer and use it in GitHub Desktop.
Save YashasSamaga/71157cf0c3768c497e5e70fb95435596 to your computer and use it in GitHub Desktop.
#include <iostream>
#include <algorithm>
#include <fstream>
#include <vector>
#include <chrono>
#include <opencv2/dnn.hpp>
#include <opencv2/highgui.hpp>
#include "benchmark.hpp"
constexpr bool async = false;
constexpr auto default_batch_size = 1;
//#define USE_RANDOM_IMAGES
struct mask_type {
int backend;
int target;
};
struct config_type {
std::string name;
int backend;
int target;
};
std::vector<config_type> backends = {
{"OCV CPU", cv::dnn::DNN_BACKEND_OPENCV, cv::dnn::DNN_TARGET_CPU},
{"OCV OpenCL", cv::dnn::DNN_BACKEND_OPENCV, cv::dnn::DNN_TARGET_OPENCL},
{"OCV OpenCL FP16", cv::dnn::DNN_BACKEND_OPENCV, cv::dnn::DNN_TARGET_OPENCL_FP16},
{"IE CPU", cv::dnn::DNN_BACKEND_INFERENCE_ENGINE, cv::dnn::DNN_TARGET_CPU},
{"CUDA FP32", cv::dnn::DNN_BACKEND_CUDA, cv::dnn::DNN_TARGET_CUDA},
{"CUDA FP16", cv::dnn::DNN_BACKEND_CUDA, cv::dnn::DNN_TARGET_CUDA_FP16}
};
std::vector<cv::Mat> image_samples;
template <class T>
auto to_milliseconds(const T& duration) {
return std::chrono::duration_cast<std::chrono::milliseconds>(duration);
}
template <class T>
auto to_microseconds(const T& duration) {
return std::chrono::duration_cast<std::chrono::microseconds>(duration);
}
template <std::size_t BenchmarkTrials = 10, std::size_t WarmupTrials = 3>
auto run_network(const std::string& model, const std::string& config, int backend, int target, const cv::Mat& blob)
{
auto net = cv::dnn::readNet(model, config);
net.setPreferableBackend(backend);
net.setPreferableTarget(target);
for(int i = 0; i < WarmupTrials; i++)
{
net.setInput(blob);
net.forward();
}
return benchmark([&] {
for(int i = 0; i < BenchmarkTrials; i++)
{
net.setInput(blob);
net.forward();
}
});
}
template <std::size_t BenchmarkTrials = 10, std::size_t WarmupTrials = 3>
auto run_network_async(const std::string& model, const std::string& config, int backend, int target, const cv::Mat& blob)
{
auto net = cv::dnn::readNet(model, config);
net.setPreferableBackend(backend);
net.setPreferableTarget(target);
for(int i = 0; i < WarmupTrials; i++)
{
net.setInput(blob);
net.forward();
}
auto waste = benchmark([] { });
using duration_type = decltype(waste);
duration_type init_time{}, wait_time{};
cv::AsyncArray asyncOutput;
cv::Mat output;
for(int i = 0; i < BenchmarkTrials; i++)
{
init_time += benchmark([&] {
net.setInput(blob);
asyncOutput = net.forwardAsync();
});
wait_time += benchmark([&] {
asyncOutput.get(output);
});
}
return std::pair<decltype(init_time), decltype(wait_time)>(init_time, wait_time);
}
void bench_network_async(const std::string& model, const std::string& config, cv::Size input_size, int count = default_batch_size, std::vector<mask_type> mask = {}) {
#ifndef USE_RANDOM_IMAGES
assert(count <= image_samples.size());
#endif
std::vector<cv::Mat> images;
for (int i = 0; i < count; i++)
{
#ifdef USE_RANDOM_IMAGES
cv::Mat image(input_size, type);
cv::randu(image, cv::Scalar(0, 0, 0), cv::Scalar(255, 255, 255));
images.push_back(image);
#else
images.push_back(image_samples[i]);
#endif
}
cv::Mat blob = cv::dnn::blobFromImages(images, 1.0f, input_size, 0.0f);
constexpr int N = 10;
auto time = run_network_async<N>(model, config, cv::dnn::DNN_BACKEND_CUDA, cv::dnn::DNN_TARGET_CUDA, blob);
float average_init = to_microseconds(time.first).count() / 1000.0 / N;
float average_wait = to_microseconds(time.second).count() / 1000.0 / N;
std::cout << "Average Initialization Time: " << average_init << "ms" << std::endl;
std::cout << "Average Waiting Time: " << average_wait << "ms" << std::endl;
std::cout << std::endl;
}
void bench_network_sync(const std::string& model, const std::string& config, cv::Size input_size, int count = default_batch_size, std::vector<mask_type> mask = {}) {
#ifndef USE_RANDOM_IMAGES
assert(count <= image_samples.size());
#endif
std::vector<cv::Mat> images;
for (int i = 0; i < count; i++)
{
#ifdef USE_RANDOM_IMAGES
cv::Mat image(input_size, type);
cv::randu(image, cv::Scalar(0, 0, 0), cv::Scalar(255, 255, 255));
images.push_back(image);
#else
images.push_back(image_samples[i]);
#endif
}
cv::Mat blob = cv::dnn::blobFromImages(images, 1.0f, input_size, 0.0f);
for(auto c : backends) {
auto backend = c.backend;
auto target = c.target;
bool masked = false;
for(auto m : mask) {
if(m.backend == backend && target == target)
masked = true;
if(m.backend == backend && m.target == -1)
masked = true;
if(m.backend == -1 && m.target == target)
masked = true;
}
if(masked)
continue;
constexpr int N = 10;
auto time = run_network<N>(model, config, backend, target, blob);
float average = to_microseconds(time).count() / 1000.0 / N;
std::cout << c.name << ": " << average << "ms" << std::endl;
}
std::cout << std::endl;
}
void bench_network(const std::string& model, const std::string& config, cv::Size input_size, int count = default_batch_size, std::vector<mask_type> mask = {}) {
if(async)
bench_network_async(model, config, input_size, count, mask);
else
bench_network_sync(model, config, input_size, count, mask);
}
void bench_alexnet()
{
std::cout << "BVLC AlexNet\n";
bench_network("data/alexnet/deploy.prototxt", "data/alexnet/bvlc_alexnet.caffemodel", cv::Size(227, 227));
std::cout << std::endl;
}
void bench_googlenet()
{
std::cout << "BVLC GoogleNet\n";
bench_network("data/googlenet/deploy.prototxt", "data/googlenet/bvlc_googlenet.caffemodel", cv::Size(224, 224));
std::cout << std::endl;
}
void bench_resnet50()
{
std::cout << "ResNet 50\n";
bench_network("data/resnet50/ResNet-50-deploy.prototxt", "data/resnet50/ResNet-50-model.caffemodel", cv::Size(224, 224));
std::cout << std::endl;
}
void bench_squeezenet()
{
std::cout << "SqueezeNet v1.1\n";
bench_network("data/squeezenet/squeezenet_v1.1.prototxt", "data/squeezenet/squeezenet_v1.1.caffemodel", cv::Size(227, 227));
std::cout << std::endl;
}
void bench_tensorflow_inception_5h()
{
std::cout << "TensorFlow Inception 5h\n";
bench_network("data/tensorflow_inception_5h/tensorflow_inception_graph.pb", "", cv::Size(224, 224));
std::cout << std::endl;
}
void bench_vgg16()
{
std::cout << "VGG16 SSD\n";
bench_network("data/vgg16/ssd_vgg16.prototxt", "data/vgg16/VGG_ILSVRC2016_SSD_300x300_iter_440000.caffemodel", cv::Size(300, 300));
std::cout << std::endl;
}
void bench_enet()
{
std::cout << "ENet Cityscapes\n";
bench_network("data/enet/model-cityscapes.net", "", cv::Size(512, 256), 1,
{
{cv::dnn::DNN_BACKEND_INFERENCE_ENGINE, -1},
// {-1, cv::dnn::DNN_TARGET_OPENCL_FP16}
});
std::cout << std::endl;
}
void bench_openface_nn4_small2_v1()
{
std::cout << "OpenFace nn4 small2 v1\n";
bench_network("data/openface/nn4.small2.v1.t7", "", cv::Size(96, 96));
std::cout << std::endl;
}
void bench_mobilenet_ssd()
{
std::cout << "MobileNet SSD\n";
bench_network("data/mobilenet_ssd/MobileNetSSD_deploy.prototxt", "data/mobilenet_ssd/MobileNetSSD_deploy.caffemodel", cv::Size(300, 300));
std::cout << std::endl;
}
void bench_mobilenet_ssd_v1_coco()
{
std::cout << "MobileNet SSD Coco v1\n";
bench_network("data/mobilenet_ssd_v1_coco_2017_11_17/ssd_mobilenet_v1_coco_2017_11_17.pb", "data/mobilenet_ssd_v1_coco_2017_11_17/ssd_mobilenet_v1_coco_2017_11_17.pbtxt", cv::Size(300, 300));
std::cout << std::endl;
}
void bench_mobilenet_ssd_v2_coco()
{
std::cout << "MobileNet SSD Coco v2\n";
bench_network("data/mobilenet_ssd_v2_coco_2018_03_29/ssd_mobilenet_v2_coco_2018_03_29.pb", "data/mobilenet_ssd_v2_coco_2018_03_29/ssd_mobilenet_v2_coco_2018_03_29.pbtxt", cv::Size(300, 300));
std::cout << std::endl;
}
void bench_densenet121()
{
std::cout << "DenseNet 121\n";
bench_network("data/densenet121/DenseNet_121.prototxt", "data/densenet121/DenseNet_121.caffemodel", cv::Size(224, 224));
std::cout << std::endl;
}
void bench_openpose_pose_mpi()
{
std::cout << "OpenPose pose MPI\n";
bench_network("data/openpose_pose_mpi/openpose_pose_mpi_faster_4_stages.prototxt", "data/openpose_pose_mpi/pose_iter_160000.caffemodel", cv::Size(368, 368));
std::cout << std::endl;
}
void bench_opencv_face_detector()
{
std::cout << "OpenCV Face Detector\n";
bench_network("data/opencv_face_detector/deploy.prototxt", "data/opencv_face_detector/res10_300x300_ssd_iter_140000_fp16.caffemodel", cv::Size(300, 300));
std::cout << std::endl;
}
void bench_inception_v2_coco()
{
std::cout << "Inception v2 Coco\n";
bench_network("data/ssd_inception_v2_coco_2017_11_17/ssd_inception_v2_coco_2017_11_17.pb", "data/ssd_inception_v2_coco_2017_11_17/ssd_inception_v2_coco_2017_11_17.pbtxt", cv::Size(300, 300));
std::cout << std::endl;
}
void bench_yolo_v3()
{
std::cout << "YOLO v3\n";
bench_network("data/yolov3/yolov3.cfg", "data/yolov3/yolov3.weights", cv::Size(416, 416));
std::cout << std::endl;
}
void bench_EAST_text_detection()
{
std::cout << "EAST Text Detection\n";
bench_network("data/east_text_detection/frozen_east_text_detection.pb", "", cv::Size(320, 320));
std::cout << std::endl;
}
void bench_fast_neural_style_sn()
{
std::cout << "FastNeuralStyle Stary Night\n";
bench_network("data/fns_stary_night/fast_neural_style_eccv16_starry_night.t7", "", cv::Size(320, 240));
std::cout << std::endl;
}
void bench_inception_v2_faster_rcnn()
{
std::cout << "Inception v2 Faster RCNN\n";
bench_network("data/inception_v2_faster_rcnn/faster_rcnn_inception_v2_coco_2018_01_28.pb", "data/inception_v2_faster_rcnn/faster_rcnn_inception_v2_coco_2018_01_28.pbtxt", cv::Size(800, 600), default_batch_size,
{
{cv::dnn::DNN_BACKEND_INFERENCE_ENGINE, -1}
});
std::cout << std::endl;
}
void bench_yolo_v2()
{
std::cout << "YOLO v2\n";
bench_network("data/yolov2/yolov2.cfg", "data/yolov2/yolov2.weights", cv::Size(608, 608));
std::cout << std::endl;
}
int main(int argc, char *argv[]) {
constexpr auto total_images = 10;
auto prefix = std::string("data/images/img_"),
suffix = std::string(".jpg");
/* populate sample images */
for (int i = 0; i < total_images; i++) {
auto file = prefix + std::to_string(i) + suffix;
auto image = cv::imread(file);
image_samples.push_back(image);
}
bench_enet();
if(async)
{
bench_alexnet();
bench_googlenet();
bench_resnet50();
bench_squeezenet();
bench_tensorflow_inception_5h();
bench_enet();
bench_openface_nn4_small2_v1();
bench_densenet121();
bench_openpose_pose_mpi();
bench_EAST_text_detection();
bench_fast_neural_style_sn();
}
else
{
bench_alexnet();
bench_googlenet();
bench_resnet50();
bench_squeezenet();
bench_tensorflow_inception_5h();
bench_vgg16();
bench_enet();
bench_openface_nn4_small2_v1();
bench_mobilenet_ssd();
//bench_mobilenet_ssd_v1_coco();
//bench_mobilenet_ssd_v2_coco();
bench_densenet121();
bench_openpose_pose_mpi();
bench_opencv_face_detector();
bench_inception_v2_coco();
bench_yolo_v3();
bench_yolo_v2();
bench_EAST_text_detection();
bench_fast_neural_style_sn();
bench_inception_v2_faster_rcnn();
}
return 0;
}
#ifndef BENCHMARK_HPP
#define BENCHMARK_HPP
#include <chrono>
template <class Function, typename ...Args>
auto benchmark(Function function, Args&& ...args) {
using std::chrono::steady_clock;
auto start = steady_clock::now();
function(std::forward<Args>(args)...);
auto end = steady_clock::now();
return end - start;
}
/* doNotOptimizeAway from https://stackoverflow.com/a/36781982/1935009 */
#ifdef _MSC_VER
#pragma optimize("", off)
template <class T>
void doNotOptimizeAway(T&& datum) {
datum = datum;
}
#pragma optimize("", on)
#elif defined(__clang__)
template <class T>
__attribute__((__optnone__)) void doNotOptimizeAway(T&& /* datum */) {}
#else
template <class T>
void doNotOptimizeAway(T&& datum) {
asm volatile("" : "+r" (datum));
}
#endif
#endif /* BENCHMARK_HPP */
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment