Skip to content

Instantly share code, notes, and snippets.

Last active July 13, 2024 06:42
Show Gist options
  • Save YashasSamaga/e2b19a6807a13046e399f4bc3cca3a49 to your computer and use it in GitHub Desktop.
Save YashasSamaga/e2b19a6807a13046e399f4bc3cca3a49 to your computer and use it in GitHub Desktop.
YOLOv4 on OpenCV DNN
import cv2
import time
COLORS = [(0, 255, 255), (255, 255, 0), (0, 255, 0), (255, 0, 0)]
class_names = []
with open("classes.txt", "r") as f:
class_names = [cname.strip() for cname in f.readlines()]
vc = cv2.VideoCapture("demo.mp4")
net = cv2.dnn.readNet("yolov4.weights", "yolov4.cfg")
model = cv2.dnn_DetectionModel(net)
model.setInputParams(size=(416, 416), scale=1/255, swapRB=True)
while cv2.waitKey(1) < 1:
(grabbed, frame) =
if not grabbed:
start = time.time()
classes, scores, boxes = model.detect(frame, CONFIDENCE_THRESHOLD, NMS_THRESHOLD)
end = time.time()
start_drawing = time.time()
for (classid, score, box) in zip(classes, scores, boxes):
color = COLORS[int(classid) % len(COLORS)]
label = "%s : %f" % (class_names[classid[0]], score)
cv2.rectangle(frame, box, color, 2)
cv2.putText(frame, label, (box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
end_drawing = time.time()
fps_label = "FPS: %.2f (excluding drawing time of %.2fms)" % (1 / (end - start), (end_drawing - start_drawing) * 1000)
cv2.putText(frame, fps_label, (0, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2)
cv2.imshow("detections", frame)
#include <iostream>
#include <queue>
#include <iterator>
#include <sstream>
#include <fstream>
#include <iomanip>
#include <chrono>
#include <opencv2/core.hpp>
#include <opencv2/dnn.hpp>
#include <opencv2/dnn/all_layers.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
constexpr float CONFIDENCE_THRESHOLD = 0;
constexpr float NMS_THRESHOLD = 0.4;
constexpr int NUM_CLASSES = 80;
// colors for bounding boxes
const cv::Scalar colors[] = {
{0, 255, 255},
{255, 255, 0},
{0, 255, 0},
{255, 0, 0}
const auto NUM_COLORS = sizeof(colors)/sizeof(colors[0]);
int main()
std::vector<std::string> class_names;
std::ifstream class_file("classes.txt");
if (!class_file)
std::cerr << "failed to open classes.txt\n";
return 0;
std::string line;
while (std::getline(class_file, line))
cv::VideoCapture source("demo.mp4");
auto net = cv::dnn::readNetFromDarknet("yolov4.cfg", "yolov4.weights");
// net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
// net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
auto output_names = net.getUnconnectedOutLayersNames();
cv::Mat frame, blob;
std::vector<cv::Mat> detections;
while(cv::waitKey(1) < 1)
source >> frame;
if (frame.empty())
auto total_start = std::chrono::steady_clock::now();
cv::dnn::blobFromImage(frame, blob, 0.00392, cv::Size(608, 608), cv::Scalar(), true, false, CV_32F);
auto dnn_start = std::chrono::steady_clock::now();
net.forward(detections, output_names);
auto dnn_end = std::chrono::steady_clock::now();
std::vector<int> indices[NUM_CLASSES];
std::vector<cv::Rect> boxes[NUM_CLASSES];
std::vector<float> scores[NUM_CLASSES];
for (auto& output : detections)
const auto num_boxes = output.rows;
for (int i = 0; i < num_boxes; i++)
auto x =<float>(i, 0) * frame.cols;
auto y =<float>(i, 1) * frame.rows;
auto width =<float>(i, 2) * frame.cols;
auto height =<float>(i, 3) * frame.rows;
cv::Rect rect(x - width/2, y - height/2, width, height);
for (int c = 0; c < NUM_CLASSES; c++)
auto confidence = *output.ptr<float>(i, 5 + c);
if (confidence >= CONFIDENCE_THRESHOLD)
for (int c = 0; c < NUM_CLASSES; c++)
cv::dnn::NMSBoxes(boxes[c], scores[c], 0.0, NMS_THRESHOLD, indices[c]);
for (int c= 0; c < NUM_CLASSES; c++)
for (size_t i = 0; i < indices[c].size(); ++i)
const auto color = colors[c % NUM_COLORS];
auto idx = indices[c][i];
const auto& rect = boxes[c][idx];
cv::rectangle(frame, cv::Point(rect.x, rect.y), cv::Point(rect.x + rect.width, rect.y + rect.height), color, 3);
std::ostringstream label_ss;
label_ss << class_names[c] << ": " << std::fixed << std::setprecision(2) << scores[c][idx];
auto label = label_ss.str();
int baseline;
auto label_bg_sz = cv::getTextSize(label.c_str(), cv::FONT_HERSHEY_COMPLEX_SMALL, 1, 1, &baseline);
cv::rectangle(frame, cv::Point(rect.x, rect.y - label_bg_sz.height - baseline - 10), cv::Point(rect.x + label_bg_sz.width, rect.y), color, cv::FILLED);
cv::putText(frame, label.c_str(), cv::Point(rect.x, rect.y - baseline - 5), cv::FONT_HERSHEY_COMPLEX_SMALL, 1, cv::Scalar(0, 0, 0));
auto total_end = std::chrono::steady_clock::now();
float inference_fps = 1000.0 / std::chrono::duration_cast<std::chrono::milliseconds>(dnn_end - dnn_start).count();
float total_fps = 1000.0 / std::chrono::duration_cast<std::chrono::milliseconds>(total_end - total_start).count();
std::ostringstream stats_ss;
stats_ss << std::fixed << std::setprecision(2);
stats_ss << "Inference FPS: " << inference_fps << ", Total FPS: " << total_fps;
auto stats = stats_ss.str();
int baseline;
auto stats_bg_sz = cv::getTextSize(stats.c_str(), cv::FONT_HERSHEY_COMPLEX_SMALL, 1, 1, &baseline);
cv::rectangle(frame, cv::Point(0, 0), cv::Point(stats_bg_sz.width, stats_bg_sz.height + 10), cv::Scalar(0, 0, 0), cv::FILLED);
cv::putText(frame, stats.c_str(), cv::Point(0, stats_bg_sz.height + 5), cv::FONT_HERSHEY_COMPLEX_SMALL, 1, cv::Scalar(255, 255, 255));
cv::imshow("output", frame);
return 0;
Copy link

How to achieve batched inference using cv2.dnn_DetectionModel.detect for yolo

Copy link

mochechan commented Jul 18, 2022

The code in python is fine for me, but its c++ version doesn't work for me.


/usr/bin/c++   -lstdc++  -g -pg -O0 -pthread -lpthread -lstdc++fs  -std=c++14 -std=c++17 -fPIC -std=gnu++11 -rdynamic -I/usr/local/include/tkDNN/ -I/usr/local/cuda/include -std=c++1z  -std=gnu++1z  CMakeFiles/predev2.dir/src/yolov4_opencv_dnn_cuda.cpp.o -o predev2   -L/usr/local/cuda/lib64  -Wl,-rpath,/usr/local/cuda/lib64:/usr/local/lib -ltkDNN -lcurl /usr/lib/aarch64-linux-gnu/ /usr/local/cuda/lib64/libcudart_static.a -ldl /usr/lib/aarch64-linux-gnu/ /usr/lib/aarch64-linux-gnu/ /usr/lib/aarch64-linux-gnu/ /usr/lib/aarch64-linux-gnu/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/cuda/lib64/libcudart_static.a -ldl /usr/lib/aarch64-linux-gnu/ /usr/lib/aarch64-linux-gnu/ /usr/lib/aarch64-linux-gnu/ -lpthread /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ /usr/local/lib/ 


$ gdb -batch -ex run -ex where -ex list -ex quit --args  ./predev2 
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/aarch64-linux-gnu/".
[h264 @ 0x55557908e0] top block unavailable for requested intra mode -1
[h264 @ 0x55557908e0] error while decoding MB 2 0, bytestream 213160
[h264 @ 0x55557908e0] top block unavailable for requested intra mode -1
[h264 @ 0x55557908e0] error while decoding MB 69 0, bytestream 669
[New Thread 0x7f97f79b50 (LWP 3049)]
[New Thread 0x7f97778b50 (LWP 3050)]
[New Thread 0x7f96f77b50 (LWP 3051)]
[New Thread 0x7f96776b50 (LWP 3052)]
[New Thread 0x7f95f75b50 (LWP 3053)]
[New Thread 0x7f95774b50 (LWP 3054)]
[h264 @ 0x5555792890] top block unavailable for requested intra mode -1
[h264 @ 0x5555792890] error while decoding MB 2 0, bytestream 213160

Thread 1 "predev2" received signal SIGSEGV, Segmentation fault.
0x0000007fa3cbadc0 in cv::_InputArray::size(int) const () from /usr/lib/aarch64-linux-gnu/
#0  0x0000007fa3cbadc0 in cv::_InputArray::size(int) const () at /usr/lib/aarch64-linux-gnu/
#1  0x0000007fb6f8edfc in cv::resize(cv::_InputArray const&, cv::_OutputArray const&, cv::Size_<int>, double, double, int) () at /usr/local/lib/
#2  0x0000007fb72f5d04 in cv::dnn::dnn4_v20220524::blobFromImages(cv::_InputArray const&, cv::_OutputArray const&, double, cv::Size_<int>, cv::Scalar_<double> const&, bool, bool, int) () at /usr/local/lib/
#3  0x0000007fb72f6864 in cv::dnn::dnn4_v20220524::blobFromImage(cv::_InputArray const&, cv::_OutputArray const&, double, cv::Size_<int> const&, cv::Scalar_<double> const&, bool, bool, int) () at /usr/local/lib/
#4  0x0000005555560498 in main() () at /home/a/ai22/src/yolov4_opencv_dnn_cuda.cpp:66
16	constexpr float CONFIDENCE_THRESHOLD = 0;
17	constexpr float NMS_THRESHOLD = 0.4;
18	constexpr int NUM_CLASSES = 80;
20	// colors for bounding boxes
21	const cv::Scalar colors[] = {
22	    {0, 255, 255},
23	    {255, 255, 0},
24	    {0, 255, 0},
25	    {255, 0, 0}
A debugging session is active.

	Inferior 1 [process 3043] will be killed.

Quit anyway? (y or n) [answered Y; input not from terminal]

The following line makes the issue. What's the problem? How to solve this problem?

cv::dnn::blobFromImage(frame, blob, 0.00392, cv::Size(608, 608), cv::Scalar(), true, false, CV_32F);

Even I change the size to cv::Size(512, 512) to fit my yolov4.cfg, the issue consists.

Copy link


Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment