Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Efficient YOLOv3 Inference on OpenCV's CUDA DNN backend
#include <iostream>
#include <queue>
#include <iterator>
#include <sstream>
#include <fstream>
#include <iomanip>
#include <chrono>
#include <opencv2/core.hpp>
#include <opencv2/dnn.hpp>
#include <opencv2/dnn/all_layers.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
constexpr float CONFIDENCE_THRESHOLD = 0;
constexpr float NMS_THRESHOLD = 0.4;
constexpr int NUM_CLASSES = 80;
// colors for bounding boxes
const cv::Scalar colors[] = {
{0, 255, 255},
{255, 255, 0},
{0, 255, 0},
{255, 0, 0}
};
const auto NUM_COLORS = sizeof(colors)/sizeof(colors[0]);
int main()
{
std::vector<std::string> class_names;
{
std::ifstream class_file("classes.txt");
if (!class_file)
{
std::cerr << "failed to open classes.txt\n";
return 0;
}
std::string line;
while (std::getline(class_file, line))
class_names.push_back(line);
}
cv::VideoCapture source("demo.mp4");
auto net = cv::dnn::readNetFromDarknet("yolov4.cfg", "yolov4.weights");
net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
// net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
// net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
auto output_names = net.getUnconnectedOutLayersNames();
cv::Mat frame, blob;
std::vector<cv::Mat> detections;
while(cv::waitKey(1) < 1)
{
source >> frame;
if (frame.empty())
{
cv::waitKey();
break;
}
auto total_start = std::chrono::steady_clock::now();
cv::dnn::blobFromImage(frame, blob, 0.00392, cv::Size(608, 608), cv::Scalar(), true, false, CV_32F);
net.setInput(blob);
auto dnn_start = std::chrono::steady_clock::now();
net.forward(detections, output_names);
auto dnn_end = std::chrono::steady_clock::now();
std::vector<int> indices[NUM_CLASSES];
std::vector<cv::Rect> boxes[NUM_CLASSES];
std::vector<float> scores[NUM_CLASSES];
for (auto& output : detections)
{
const auto num_boxes = output.rows;
for (int i = 0; i < num_boxes; i++)
{
auto x = output.at<float>(i, 0) * frame.cols;
auto y = output.at<float>(i, 1) * frame.rows;
auto width = output.at<float>(i, 2) * frame.cols;
auto height = output.at<float>(i, 3) * frame.rows;
cv::Rect rect(x - width/2, y - height/2, width, height);
for (int c = 0; c < NUM_CLASSES; c++)
{
auto confidence = *output.ptr<float>(i, 5 + c);
if (confidence >= CONFIDENCE_THRESHOLD)
{
boxes[c].push_back(rect);
scores[c].push_back(confidence);
}
}
}
}
for (int c = 0; c < NUM_CLASSES; c++)
cv::dnn::NMSBoxes(boxes[c], scores[c], 0.0, NMS_THRESHOLD, indices[c]);
for (int c= 0; c < NUM_CLASSES; c++)
{
for (size_t i = 0; i < indices[c].size(); ++i)
{
const auto color = colors[c % NUM_COLORS];
auto idx = indices[c][i];
const auto& rect = boxes[c][idx];
cv::rectangle(frame, cv::Point(rect.x, rect.y), cv::Point(rect.x + rect.width, rect.y + rect.height), color, 3);
std::ostringstream label_ss;
label_ss << class_names[c] << ": " << std::fixed << std::setprecision(2) << scores[c][idx];
auto label = label_ss.str();
int baseline;
auto label_bg_sz = cv::getTextSize(label.c_str(), cv::FONT_HERSHEY_COMPLEX_SMALL, 1, 1, &baseline);
cv::rectangle(frame, cv::Point(rect.x, rect.y - label_bg_sz.height - baseline - 10), cv::Point(rect.x + label_bg_sz.width, rect.y), color, cv::FILLED);
cv::putText(frame, label.c_str(), cv::Point(rect.x, rect.y - baseline - 5), cv::FONT_HERSHEY_COMPLEX_SMALL, 1, cv::Scalar(0, 0, 0));
}
}
auto total_end = std::chrono::steady_clock::now();
float inference_fps = 1000.0 / std::chrono::duration_cast<std::chrono::milliseconds>(dnn_end - dnn_start).count();
float total_fps = 1000.0 / std::chrono::duration_cast<std::chrono::milliseconds>(total_end - total_start).count();
std::ostringstream stats_ss;
stats_ss << std::fixed << std::setprecision(2);
stats_ss << "Inference FPS: " << inference_fps << ", Total FPS: " << total_fps;
auto stats = stats_ss.str();
int baseline;
auto stats_bg_sz = cv::getTextSize(stats.c_str(), cv::FONT_HERSHEY_COMPLEX_SMALL, 1, 1, &baseline);
cv::rectangle(frame, cv::Point(0, 0), cv::Point(stats_bg_sz.width, stats_bg_sz.height + 10), cv::Scalar(0, 0, 0), cv::FILLED);
cv::putText(frame, stats.c_str(), cv::Point(0, stats_bg_sz.height + 5), cv::FONT_HERSHEY_COMPLEX_SMALL, 1, cv::Scalar(255, 255, 255));
cv::namedWindow("output");
cv::imshow("output", frame);
}
return 0;
}
@dnaveenr

This comment has been minimized.

Copy link

dnaveenr commented Jun 18, 2020

Thanks for this Yashas. Could you please provide the corresponding python code for Efficient YOLOv3 Inference on OpenCV?

@YashasSamaga

This comment has been minimized.

Copy link
Owner Author

YashasSamaga commented Jun 19, 2020

@dnaveenr You just have to change auto net = cv::dnn::readNetFromDarknet("yolov4.cfg", "yolov4.weights"); line. The code works for both YOLOv3 and YOLOv4.

@dnaveenr

This comment has been minimized.

Copy link

dnaveenr commented Jun 19, 2020

Ok. Thanks. I meant equivalent Python code? But its fine. I think the main difference is the following :

for (auto name : net.getUnconnectedOutLayersNames())
{
int layerId = net.getLayerId(name);
auto layer = net.getLayer(layerId).dynamicCastcv::dnn::RegionLayer();
if (!layer.empty())
layer->nmsThreshold = 0;
}

I'll make the changes.

@YashasSamaga

This comment has been minimized.

Copy link
Owner Author

YashasSamaga commented Jun 19, 2020

@dnaveenr Sorry, I read too fast. You can get the same effect by manually setting nms_threshold=0 (add it if not present already) in all [yolo] blocks in yolovN.cfg. I don't think nmsThreshold is exposed in python.

@dnaveenr

This comment has been minimized.

Copy link

dnaveenr commented Jun 19, 2020

No problem. Thanks. I will try it out.

@sky-fly97

This comment has been minimized.

Copy link

sky-fly97 commented Jun 26, 2020

Hello,I got an error that cv::dnn::dnn4_v20191202::RegionLayer" has no member "nmsThreshold" at line 52-53, but if I delete it,it will be OK!

@sky-fly97

This comment has been minimized.

Copy link

sky-fly97 commented Jun 26, 2020

By the way, how to save the MP4 demo after running?

@YashasSamaga

This comment has been minimized.

Copy link
Owner Author

YashasSamaga commented Jun 26, 2020

@sky-fly97 I can guess that your OpenCV version is not the latest master. In that case, you will see performance regressions if you do not set the nms threshold to zero. The old solution is to set nms_threshold=0 in all [yolo] blocks in yolov3.cfg.

If you need YOLOv4 support, you need the master branch.

This is what happened:

  1. nmsThreshold was added a month ago to address spurious NMS in opencv/opencv#17371
  2. A new fix which sets nmsThreshold to zero by default was added in opencv/opencv#17592 (and the old fix is redundant now)

It's not required anymore and I have removed it from this gist.

@sky-fly97

This comment has been minimized.

Copy link

sky-fly97 commented Jun 26, 2020

Yeah, now my version of opencv is 4.2.Thank you very much!

@YashasSamaga

This comment has been minimized.

Copy link
Owner Author

YashasSamaga commented Jun 26, 2020

@sky-fly97

By the way, how to save the MP4 demo after running?

You need VideoWriter.

cv::VideoWriter writer("output.mp4", cv::VideoWriter::fourcc('M', 'P', 'E', 'G'), 30, cv::Size(width, height));

cv::Mat frame; // what you want to write
writer << frame;

Yeah, now my version of opencv is 4.2.Thank you very much!

There have been a lot of performance improvements since then. OpenCV 4.2 was the first release with CUDA support.

@sky-fly97

This comment has been minimized.

Copy link

sky-fly97 commented Jun 27, 2020

@sky-fly97

By the way, how to save the MP4 demo after running?

You need VideoWriter.

cv::VideoWriter writer("output.mp4", cv::VideoWriter::fourcc('M', 'P', 'E', 'G'), 30, cv::Size(width, height));

cv::Mat frame; // what you want to write
writer << frame;

Yeah, now my version of opencv is 4.2.Thank you very much!

There have been a lot of performance improvements since then. OpenCV 4.2 was the first release with CUDA support.

Thanks!

@sky-fly97

This comment has been minimized.

Copy link

sky-fly97 commented Jun 27, 2020

When I changed yolov3 to yolov4's cfg and weights, a cv::Exception error occurred during dnn::readNetFromDarknet. I think it may be because of the opencv version problem, because 4.2 was proposed at the end of last year, but yolov4 was proposed in April this year.

@YashasSamaga

This comment has been minimized.

Copy link
Owner Author

YashasSamaga commented Jun 27, 2020

@sky-fly97 There is no release which supports YOLOv4 yet. Support has been added for the next release on master. You need to use master if you need YOLOv4 support.

@sky-fly97

This comment has been minimized.

Copy link

sky-fly97 commented Jun 27, 2020

When compiling the latest master version, I followed the same process but there were still some problems that caused the compilation to fail, which was not encountered in the 4.2 and 4.3 versions... Could you please share your compiled opencv file? ? Thank you very much! It doesn't matter if it's inconvenient, I will study it again!

@YashasSamaga

This comment has been minimized.

Copy link
Owner Author

YashasSamaga commented Jun 27, 2020

@sky-fly97 What is the error that is causing the compilation to fail?

@sky-fly97

This comment has been minimized.

Copy link

sky-fly97 commented Jun 27, 2020

https://github.com/opencv/opencv/issues/17677,I just asked this question in the community.
Cannot specify link libraries for target "opencv_gapi"

@YashasSamaga

This comment has been minimized.

Copy link
Owner Author

YashasSamaga commented Jun 27, 2020

@sky-fly97

If you only need DNN with CUDA support, you need the following modules:

  • cudev
  • opencv_core
  • opencv_dnn
  • opencv_imgproc

You might also require the following to read/write/display images and videos:

  • opencv_imgcodecs
  • opencv_highgui
  • opencv_videoio

You can disable the rest.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.