Last active
July 13, 2024 06:42
-
-
Save YashasSamaga/e2b19a6807a13046e399f4bc3cca3a49 to your computer and use it in GitHub Desktop.
YOLOv4 on OpenCV DNN
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import cv2 | |
import time | |
CONFIDENCE_THRESHOLD = 0.2 | |
NMS_THRESHOLD = 0.4 | |
COLORS = [(0, 255, 255), (255, 255, 0), (0, 255, 0), (255, 0, 0)] | |
class_names = [] | |
with open("classes.txt", "r") as f: | |
class_names = [cname.strip() for cname in f.readlines()] | |
vc = cv2.VideoCapture("demo.mp4") | |
net = cv2.dnn.readNet("yolov4.weights", "yolov4.cfg") | |
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA) | |
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA_FP16) | |
model = cv2.dnn_DetectionModel(net) | |
model.setInputParams(size=(416, 416), scale=1/255, swapRB=True) | |
while cv2.waitKey(1) < 1: | |
(grabbed, frame) = vc.read() | |
if not grabbed: | |
exit() | |
start = time.time() | |
classes, scores, boxes = model.detect(frame, CONFIDENCE_THRESHOLD, NMS_THRESHOLD) | |
end = time.time() | |
start_drawing = time.time() | |
for (classid, score, box) in zip(classes, scores, boxes): | |
color = COLORS[int(classid) % len(COLORS)] | |
label = "%s : %f" % (class_names[classid[0]], score) | |
cv2.rectangle(frame, box, color, 2) | |
cv2.putText(frame, label, (box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) | |
end_drawing = time.time() | |
fps_label = "FPS: %.2f (excluding drawing time of %.2fms)" % (1 / (end - start), (end_drawing - start_drawing) * 1000) | |
cv2.putText(frame, fps_label, (0, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2) | |
cv2.imshow("detections", frame) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <queue> | |
#include <iterator> | |
#include <sstream> | |
#include <fstream> | |
#include <iomanip> | |
#include <chrono> | |
#include <opencv2/core.hpp> | |
#include <opencv2/dnn.hpp> | |
#include <opencv2/dnn/all_layers.hpp> | |
#include <opencv2/imgproc.hpp> | |
#include <opencv2/highgui.hpp> | |
constexpr float CONFIDENCE_THRESHOLD = 0; | |
constexpr float NMS_THRESHOLD = 0.4; | |
constexpr int NUM_CLASSES = 80; | |
// colors for bounding boxes | |
const cv::Scalar colors[] = { | |
{0, 255, 255}, | |
{255, 255, 0}, | |
{0, 255, 0}, | |
{255, 0, 0} | |
}; | |
const auto NUM_COLORS = sizeof(colors)/sizeof(colors[0]); | |
int main() | |
{ | |
std::vector<std::string> class_names; | |
{ | |
std::ifstream class_file("classes.txt"); | |
if (!class_file) | |
{ | |
std::cerr << "failed to open classes.txt\n"; | |
return 0; | |
} | |
std::string line; | |
while (std::getline(class_file, line)) | |
class_names.push_back(line); | |
} | |
cv::VideoCapture source("demo.mp4"); | |
auto net = cv::dnn::readNetFromDarknet("yolov4.cfg", "yolov4.weights"); | |
net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA); | |
net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA); | |
// net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV); | |
// net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU); | |
auto output_names = net.getUnconnectedOutLayersNames(); | |
cv::Mat frame, blob; | |
std::vector<cv::Mat> detections; | |
while(cv::waitKey(1) < 1) | |
{ | |
source >> frame; | |
if (frame.empty()) | |
{ | |
cv::waitKey(); | |
break; | |
} | |
auto total_start = std::chrono::steady_clock::now(); | |
cv::dnn::blobFromImage(frame, blob, 0.00392, cv::Size(608, 608), cv::Scalar(), true, false, CV_32F); | |
net.setInput(blob); | |
auto dnn_start = std::chrono::steady_clock::now(); | |
net.forward(detections, output_names); | |
auto dnn_end = std::chrono::steady_clock::now(); | |
std::vector<int> indices[NUM_CLASSES]; | |
std::vector<cv::Rect> boxes[NUM_CLASSES]; | |
std::vector<float> scores[NUM_CLASSES]; | |
for (auto& output : detections) | |
{ | |
const auto num_boxes = output.rows; | |
for (int i = 0; i < num_boxes; i++) | |
{ | |
auto x = output.at<float>(i, 0) * frame.cols; | |
auto y = output.at<float>(i, 1) * frame.rows; | |
auto width = output.at<float>(i, 2) * frame.cols; | |
auto height = output.at<float>(i, 3) * frame.rows; | |
cv::Rect rect(x - width/2, y - height/2, width, height); | |
for (int c = 0; c < NUM_CLASSES; c++) | |
{ | |
auto confidence = *output.ptr<float>(i, 5 + c); | |
if (confidence >= CONFIDENCE_THRESHOLD) | |
{ | |
boxes[c].push_back(rect); | |
scores[c].push_back(confidence); | |
} | |
} | |
} | |
} | |
for (int c = 0; c < NUM_CLASSES; c++) | |
cv::dnn::NMSBoxes(boxes[c], scores[c], 0.0, NMS_THRESHOLD, indices[c]); | |
for (int c= 0; c < NUM_CLASSES; c++) | |
{ | |
for (size_t i = 0; i < indices[c].size(); ++i) | |
{ | |
const auto color = colors[c % NUM_COLORS]; | |
auto idx = indices[c][i]; | |
const auto& rect = boxes[c][idx]; | |
cv::rectangle(frame, cv::Point(rect.x, rect.y), cv::Point(rect.x + rect.width, rect.y + rect.height), color, 3); | |
std::ostringstream label_ss; | |
label_ss << class_names[c] << ": " << std::fixed << std::setprecision(2) << scores[c][idx]; | |
auto label = label_ss.str(); | |
int baseline; | |
auto label_bg_sz = cv::getTextSize(label.c_str(), cv::FONT_HERSHEY_COMPLEX_SMALL, 1, 1, &baseline); | |
cv::rectangle(frame, cv::Point(rect.x, rect.y - label_bg_sz.height - baseline - 10), cv::Point(rect.x + label_bg_sz.width, rect.y), color, cv::FILLED); | |
cv::putText(frame, label.c_str(), cv::Point(rect.x, rect.y - baseline - 5), cv::FONT_HERSHEY_COMPLEX_SMALL, 1, cv::Scalar(0, 0, 0)); | |
} | |
} | |
auto total_end = std::chrono::steady_clock::now(); | |
float inference_fps = 1000.0 / std::chrono::duration_cast<std::chrono::milliseconds>(dnn_end - dnn_start).count(); | |
float total_fps = 1000.0 / std::chrono::duration_cast<std::chrono::milliseconds>(total_end - total_start).count(); | |
std::ostringstream stats_ss; | |
stats_ss << std::fixed << std::setprecision(2); | |
stats_ss << "Inference FPS: " << inference_fps << ", Total FPS: " << total_fps; | |
auto stats = stats_ss.str(); | |
int baseline; | |
auto stats_bg_sz = cv::getTextSize(stats.c_str(), cv::FONT_HERSHEY_COMPLEX_SMALL, 1, 1, &baseline); | |
cv::rectangle(frame, cv::Point(0, 0), cv::Point(stats_bg_sz.width, stats_bg_sz.height + 10), cv::Scalar(0, 0, 0), cv::FILLED); | |
cv::putText(frame, stats.c_str(), cv::Point(0, stats_bg_sz.height + 5), cv::FONT_HERSHEY_COMPLEX_SMALL, 1, cv::Scalar(255, 255, 255)); | |
cv::namedWindow("output"); | |
cv::imshow("output", frame); | |
} | |
return 0; | |
} |
The code in python is fine for me, but its c++ version doesn't work for me.
Compile:
/usr/bin/c++ -lstdc++ -g -pg -O0 -pthread -lpthread -lstdc++fs -std=c++14 -std=c++17 -fPIC -std=gnu++11 -rdynamic -I/usr/local/include/tkDNN/ -I/usr/local/cuda/include -std=c++1z -std=gnu++1z CMakeFiles/predev2.dir/src/yolov4_opencv_dnn_cuda.cpp.o -o predev2 -L/usr/local/cuda/lib64 -Wl,-rpath,/usr/local/cuda/lib64:/usr/local/lib -ltkDNN -lcurl /usr/lib/aarch64-linux-gnu/libnvinfer.so /usr/local/cuda/lib64/libcudart_static.a -ldl /usr/lib/aarch64-linux-gnu/librt.so /usr/lib/aarch64-linux-gnu/libcublas.so /usr/lib/aarch64-linux-gnu/libcudnn.so /usr/lib/aarch64-linux-gnu/libnvinfer.so /usr/local/lib/libopencv_gapi.so.4.6.0 /usr/local/lib/libopencv_stitching.so.4.6.0 /usr/local/lib/libopencv_alphamat.so.4.6.0 /usr/local/lib/libopencv_aruco.so.4.6.0 /usr/local/lib/libopencv_barcode.so.4.6.0 /usr/local/lib/libopencv_bgsegm.so.4.6.0 /usr/local/lib/libopencv_bioinspired.so.4.6.0 /usr/local/lib/libopencv_ccalib.so.4.6.0 /usr/local/lib/libopencv_cudabgsegm.so.4.6.0 /usr/local/lib/libopencv_cudafeatures2d.so.4.6.0 /usr/local/lib/libopencv_cudaobjdetect.so.4.6.0 /usr/local/lib/libopencv_cudastereo.so.4.6.0 /usr/local/lib/libopencv_dnn_objdetect.so.4.6.0 /usr/local/lib/libopencv_dnn_superres.so.4.6.0 /usr/local/lib/libopencv_dpm.so.4.6.0 /usr/local/lib/libopencv_face.so.4.6.0 /usr/local/lib/libopencv_freetype.so.4.6.0 /usr/local/lib/libopencv_fuzzy.so.4.6.0 /usr/local/lib/libopencv_hdf.so.4.6.0 /usr/local/lib/libopencv_hfs.so.4.6.0 /usr/local/lib/libopencv_img_hash.so.4.6.0 /usr/local/lib/libopencv_intensity_transform.so.4.6.0 /usr/local/lib/libopencv_line_descriptor.so.4.6.0 /usr/local/lib/libopencv_mcc.so.4.6.0 /usr/local/lib/libopencv_quality.so.4.6.0 /usr/local/lib/libopencv_rapid.so.4.6.0 /usr/local/lib/libopencv_reg.so.4.6.0 /usr/local/lib/libopencv_rgbd.so.4.6.0 /usr/local/lib/libopencv_saliency.so.4.6.0 /usr/local/lib/libopencv_stereo.so.4.6.0 /usr/local/lib/libopencv_structured_light.so.4.6.0 /usr/local/lib/libopencv_superres.so.4.6.0 /usr/local/lib/libopencv_surface_matching.so.4.6.0 /usr/local/lib/libopencv_tracking.so.4.6.0 /usr/local/lib/libopencv_videostab.so.4.6.0 /usr/local/lib/libopencv_wechat_qrcode.so.4.6.0 /usr/local/lib/libopencv_xfeatures2d.so.4.6.0 /usr/local/lib/libopencv_xobjdetect.so.4.6.0 /usr/local/lib/libopencv_xphoto.so.4.6.0 /usr/local/cuda/lib64/libcudart_static.a -ldl /usr/lib/aarch64-linux-gnu/librt.so /usr/lib/aarch64-linux-gnu/libcublas.so /usr/lib/aarch64-linux-gnu/libcudnn.so -lpthread /usr/local/lib/libopencv_shape.so.4.6.0 /usr/local/lib/libopencv_highgui.so.4.6.0 /usr/local/lib/libopencv_datasets.so.4.6.0 /usr/local/lib/libopencv_plot.so.4.6.0 /usr/local/lib/libopencv_text.so.4.6.0 /usr/local/lib/libopencv_ml.so.4.6.0 /usr/local/lib/libopencv_phase_unwrapping.so.4.6.0 /usr/local/lib/libopencv_cudacodec.so.4.6.0 /usr/local/lib/libopencv_videoio.so.4.6.0 /usr/local/lib/libopencv_cudaoptflow.so.4.6.0 /usr/local/lib/libopencv_cudalegacy.so.4.6.0 /usr/local/lib/libopencv_cudawarping.so.4.6.0 /usr/local/lib/libopencv_optflow.so.4.6.0 /usr/local/lib/libopencv_ximgproc.so.4.6.0 /usr/local/lib/libopencv_video.so.4.6.0 /usr/local/lib/libopencv_imgcodecs.so.4.6.0 /usr/local/lib/libopencv_objdetect.so.4.6.0 /usr/local/lib/libopencv_calib3d.so.4.6.0 /usr/local/lib/libopencv_dnn.so.4.6.0 /usr/local/lib/libopencv_features2d.so.4.6.0 /usr/local/lib/libopencv_flann.so.4.6.0 /usr/local/lib/libopencv_photo.so.4.6.0 /usr/local/lib/libopencv_cudaimgproc.so.4.6.0 /usr/local/lib/libopencv_cudafilters.so.4.6.0 /usr/local/lib/libopencv_imgproc.so.4.6.0 /usr/local/lib/libopencv_cudaarithm.so.4.6.0 /usr/local/lib/libopencv_core.so.4.6.0 /usr/local/lib/libopencv_cudev.so.4.6.0
Run:
$ gdb -batch -ex run -ex where -ex list -ex quit --args ./predev2
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/aarch64-linux-gnu/libthread_db.so.1".
[h264 @ 0x55557908e0] top block unavailable for requested intra mode -1
[h264 @ 0x55557908e0] error while decoding MB 2 0, bytestream 213160
[h264 @ 0x55557908e0] top block unavailable for requested intra mode -1
[h264 @ 0x55557908e0] error while decoding MB 69 0, bytestream 669
[New Thread 0x7f97f79b50 (LWP 3049)]
[New Thread 0x7f97778b50 (LWP 3050)]
[New Thread 0x7f96f77b50 (LWP 3051)]
[New Thread 0x7f96776b50 (LWP 3052)]
[New Thread 0x7f95f75b50 (LWP 3053)]
[New Thread 0x7f95774b50 (LWP 3054)]
[h264 @ 0x5555792890] top block unavailable for requested intra mode -1
[h264 @ 0x5555792890] error while decoding MB 2 0, bytestream 213160
Thread 1 "predev2" received signal SIGSEGV, Segmentation fault.
0x0000007fa3cbadc0 in cv::_InputArray::size(int) const () from /usr/lib/aarch64-linux-gnu/libopencv_core.so.4.1
#0 0x0000007fa3cbadc0 in cv::_InputArray::size(int) const () at /usr/lib/aarch64-linux-gnu/libopencv_core.so.4.1
#1 0x0000007fb6f8edfc in cv::resize(cv::_InputArray const&, cv::_OutputArray const&, cv::Size_<int>, double, double, int) () at /usr/local/lib/libopencv_imgproc.so.406
#2 0x0000007fb72f5d04 in cv::dnn::dnn4_v20220524::blobFromImages(cv::_InputArray const&, cv::_OutputArray const&, double, cv::Size_<int>, cv::Scalar_<double> const&, bool, bool, int) () at /usr/local/lib/libopencv_dnn.so.406
#3 0x0000007fb72f6864 in cv::dnn::dnn4_v20220524::blobFromImage(cv::_InputArray const&, cv::_OutputArray const&, double, cv::Size_<int> const&, cv::Scalar_<double> const&, bool, bool, int) () at /usr/local/lib/libopencv_dnn.so.406
#4 0x0000005555560498 in main() () at /home/a/ai22/src/yolov4_opencv_dnn_cuda.cpp:66
16 constexpr float CONFIDENCE_THRESHOLD = 0;
17 constexpr float NMS_THRESHOLD = 0.4;
18 constexpr int NUM_CLASSES = 80;
19
20 // colors for bounding boxes
21 const cv::Scalar colors[] = {
22 {0, 255, 255},
23 {255, 255, 0},
24 {0, 255, 0},
25 {255, 0, 0}
A debugging session is active.
Inferior 1 [process 3043] will be killed.
Quit anyway? (y or n) [answered Y; input not from terminal]
The following line makes the issue. What's the problem? How to solve this problem?
cv::dnn::blobFromImage(frame, blob, 0.00392, cv::Size(608, 608), cv::Scalar(), true, false, CV_32F);
Even I change the size to cv::Size(512, 512) to fit my yolov4.cfg, the issue consists.
dfvcd
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@shridharkini6 Please check opencv/opencv#17838
See also: https://stackoverflow.com/questions/68990327/can-i-change-the-batch-size-in-a-yolo-detection-model-in-opencv