Skip to content

Instantly share code, notes, and snippets.

@YashasSamaga
Last active February 18, 2024 04:03
Show Gist options
  • Save YashasSamaga/e2b19a6807a13046e399f4bc3cca3a49 to your computer and use it in GitHub Desktop.
Save YashasSamaga/e2b19a6807a13046e399f4bc3cca3a49 to your computer and use it in GitHub Desktop.
YOLOv4 on OpenCV DNN
import cv2
import time
CONFIDENCE_THRESHOLD = 0.2
NMS_THRESHOLD = 0.4
COLORS = [(0, 255, 255), (255, 255, 0), (0, 255, 0), (255, 0, 0)]
class_names = []
with open("classes.txt", "r") as f:
class_names = [cname.strip() for cname in f.readlines()]
vc = cv2.VideoCapture("demo.mp4")
net = cv2.dnn.readNet("yolov4.weights", "yolov4.cfg")
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA_FP16)
model = cv2.dnn_DetectionModel(net)
model.setInputParams(size=(416, 416), scale=1/255, swapRB=True)
while cv2.waitKey(1) < 1:
(grabbed, frame) = vc.read()
if not grabbed:
exit()
start = time.time()
classes, scores, boxes = model.detect(frame, CONFIDENCE_THRESHOLD, NMS_THRESHOLD)
end = time.time()
start_drawing = time.time()
for (classid, score, box) in zip(classes, scores, boxes):
color = COLORS[int(classid) % len(COLORS)]
label = "%s : %f" % (class_names[classid[0]], score)
cv2.rectangle(frame, box, color, 2)
cv2.putText(frame, label, (box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
end_drawing = time.time()
fps_label = "FPS: %.2f (excluding drawing time of %.2fms)" % (1 / (end - start), (end_drawing - start_drawing) * 1000)
cv2.putText(frame, fps_label, (0, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2)
cv2.imshow("detections", frame)
#include <iostream>
#include <queue>
#include <iterator>
#include <sstream>
#include <fstream>
#include <iomanip>
#include <chrono>
#include <opencv2/core.hpp>
#include <opencv2/dnn.hpp>
#include <opencv2/dnn/all_layers.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
constexpr float CONFIDENCE_THRESHOLD = 0;
constexpr float NMS_THRESHOLD = 0.4;
constexpr int NUM_CLASSES = 80;
// colors for bounding boxes
const cv::Scalar colors[] = {
{0, 255, 255},
{255, 255, 0},
{0, 255, 0},
{255, 0, 0}
};
const auto NUM_COLORS = sizeof(colors)/sizeof(colors[0]);
int main()
{
std::vector<std::string> class_names;
{
std::ifstream class_file("classes.txt");
if (!class_file)
{
std::cerr << "failed to open classes.txt\n";
return 0;
}
std::string line;
while (std::getline(class_file, line))
class_names.push_back(line);
}
cv::VideoCapture source("demo.mp4");
auto net = cv::dnn::readNetFromDarknet("yolov4.cfg", "yolov4.weights");
net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
// net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
// net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
auto output_names = net.getUnconnectedOutLayersNames();
cv::Mat frame, blob;
std::vector<cv::Mat> detections;
while(cv::waitKey(1) < 1)
{
source >> frame;
if (frame.empty())
{
cv::waitKey();
break;
}
auto total_start = std::chrono::steady_clock::now();
cv::dnn::blobFromImage(frame, blob, 0.00392, cv::Size(608, 608), cv::Scalar(), true, false, CV_32F);
net.setInput(blob);
auto dnn_start = std::chrono::steady_clock::now();
net.forward(detections, output_names);
auto dnn_end = std::chrono::steady_clock::now();
std::vector<int> indices[NUM_CLASSES];
std::vector<cv::Rect> boxes[NUM_CLASSES];
std::vector<float> scores[NUM_CLASSES];
for (auto& output : detections)
{
const auto num_boxes = output.rows;
for (int i = 0; i < num_boxes; i++)
{
auto x = output.at<float>(i, 0) * frame.cols;
auto y = output.at<float>(i, 1) * frame.rows;
auto width = output.at<float>(i, 2) * frame.cols;
auto height = output.at<float>(i, 3) * frame.rows;
cv::Rect rect(x - width/2, y - height/2, width, height);
for (int c = 0; c < NUM_CLASSES; c++)
{
auto confidence = *output.ptr<float>(i, 5 + c);
if (confidence >= CONFIDENCE_THRESHOLD)
{
boxes[c].push_back(rect);
scores[c].push_back(confidence);
}
}
}
}
for (int c = 0; c < NUM_CLASSES; c++)
cv::dnn::NMSBoxes(boxes[c], scores[c], 0.0, NMS_THRESHOLD, indices[c]);
for (int c= 0; c < NUM_CLASSES; c++)
{
for (size_t i = 0; i < indices[c].size(); ++i)
{
const auto color = colors[c % NUM_COLORS];
auto idx = indices[c][i];
const auto& rect = boxes[c][idx];
cv::rectangle(frame, cv::Point(rect.x, rect.y), cv::Point(rect.x + rect.width, rect.y + rect.height), color, 3);
std::ostringstream label_ss;
label_ss << class_names[c] << ": " << std::fixed << std::setprecision(2) << scores[c][idx];
auto label = label_ss.str();
int baseline;
auto label_bg_sz = cv::getTextSize(label.c_str(), cv::FONT_HERSHEY_COMPLEX_SMALL, 1, 1, &baseline);
cv::rectangle(frame, cv::Point(rect.x, rect.y - label_bg_sz.height - baseline - 10), cv::Point(rect.x + label_bg_sz.width, rect.y), color, cv::FILLED);
cv::putText(frame, label.c_str(), cv::Point(rect.x, rect.y - baseline - 5), cv::FONT_HERSHEY_COMPLEX_SMALL, 1, cv::Scalar(0, 0, 0));
}
}
auto total_end = std::chrono::steady_clock::now();
float inference_fps = 1000.0 / std::chrono::duration_cast<std::chrono::milliseconds>(dnn_end - dnn_start).count();
float total_fps = 1000.0 / std::chrono::duration_cast<std::chrono::milliseconds>(total_end - total_start).count();
std::ostringstream stats_ss;
stats_ss << std::fixed << std::setprecision(2);
stats_ss << "Inference FPS: " << inference_fps << ", Total FPS: " << total_fps;
auto stats = stats_ss.str();
int baseline;
auto stats_bg_sz = cv::getTextSize(stats.c_str(), cv::FONT_HERSHEY_COMPLEX_SMALL, 1, 1, &baseline);
cv::rectangle(frame, cv::Point(0, 0), cv::Point(stats_bg_sz.width, stats_bg_sz.height + 10), cv::Scalar(0, 0, 0), cv::FILLED);
cv::putText(frame, stats.c_str(), cv::Point(0, stats_bg_sz.height + 5), cv::FONT_HERSHEY_COMPLEX_SMALL, 1, cv::Scalar(255, 255, 255));
cv::namedWindow("output");
cv::imshow("output", frame);
}
return 0;
}
@marvision-ai
Copy link

@dgp52 very interesting! You didn't actually have to train the network with thresh=0.01 correct? Only set it in the .cfg file for a trained model?

@dgp52
Copy link

dgp52 commented Jan 1, 2022

@marvision-ai Sorry for not getting back to you sooner. That is correct. Just needed to update the .cfg file.

@PROGRAMMINGENGINEER-NIKI
Copy link

PROGRAMMINGENGINEER-NIKI commented Jan 13, 2022

Hi Yashas, I need to access GPU power when running YOLOv4. What CUDA and Cudnn version should I install?

Because I installed CUDA 11.5 and cudnn 8.3.2, but when I install OpenCV, it does not find cudnn, as a result, it does not run fast.

@YashasSamaga
Copy link
Author

YashasSamaga commented Jan 13, 2022

I haven't checked the latest versions of cuDNN but cuDNN 7.6.5 gave the best performance six months ago.

@doleron
Copy link

doleron commented Jan 18, 2022

Hi @YashasSamaga , I wrote this code for YOLO V5 / OpenCV / DNN

C++

#include <fstream>

#include <opencv2/opencv.hpp>

std::vector<std::string> load_class_list()
{
    std::vector<std::string> class_list;
    std::ifstream ifs("config_files/classes.txt");
    std::string line;
    while (getline(ifs, line))
    {
        class_list.push_back(line);
    }
    return class_list;
}

void load_net(cv::dnn::Net &net, bool is_cuda)
{
    auto result = cv::dnn::readNet("config_files/yolov5s.onnx");
    if (is_cuda)
    {
        std::cout << "Attempty to use CUDA\n";
        result.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
        result.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA_FP16);
    }
    else
    {
        std::cout << "Running on CPU\n";
        result.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
        result.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
    }
    net = result;
}

const std::vector<cv::Scalar> colors = {cv::Scalar(255, 255, 0), cv::Scalar(0, 255, 0), cv::Scalar(0, 255, 255), cv::Scalar(255, 0, 0)};

const float INPUT_WIDTH = 640.0;
const float INPUT_HEIGHT = 640.0;
const float SCORE_THRESHOLD = 0.2;
const float NMS_THRESHOLD = 0.4;
const float CONFIDENCE_THRESHOLD = 0.4;

struct Detection
{
    int class_id;
    float confidence;
    cv::Rect box;
};

cv::Mat format_yolov5(const cv::Mat &source) {
    int col = source.cols;
    int row = source.rows;
    int _max = MAX(col, row);
    cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3);
    source.copyTo(result(cv::Rect(0, 0, col, row)));
    return result;
}

void detect(cv::Mat &image, cv::dnn::Net &net, std::vector<Detection> &output, const std::vector<std::string> &className) {
    cv::Mat blob;

    auto input_image = format_yolov5(image);
    
    cv::dnn::blobFromImage(input_image, blob, 1./255., cv::Size(INPUT_WIDTH, INPUT_HEIGHT), cv::Scalar(), true, false);
    net.setInput(blob);
    std::vector<cv::Mat> outputs;
    net.forward(outputs, net.getUnconnectedOutLayersNames());

    float x_factor = input_image.cols / INPUT_WIDTH;
    float y_factor = input_image.rows / INPUT_HEIGHT;
    
    float *data = (float *)outputs[0].data;

    const int dimensions = 85;
    const int rows = 25200;
    
    std::vector<int> class_ids;
    std::vector<float> confidences;
    std::vector<cv::Rect> boxes;

    for (int i = 0; i < rows; ++i) {

        float confidence = data[4];
        if (confidence >= CONFIDENCE_THRESHOLD) {

            float * classes_scores = data + 5;
            cv::Mat scores(1, className.size(), CV_32FC1, classes_scores);
            cv::Point class_id;
            double max_class_score;
            minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
            if (max_class_score > SCORE_THRESHOLD) {

                confidences.push_back(confidence);

                class_ids.push_back(class_id.x);

                float x = data[0];
                float y = data[1];
                float w = data[2];
                float h = data[3];
                int left = int((x - 0.5 * w) * x_factor);
                int top = int((y - 0.5 * h) * y_factor);
                int width = int(w * x_factor);
                int height = int(h * y_factor);
                boxes.push_back(cv::Rect(left, top, width, height));
            }

        }

        data += 85;

    }

    std::vector<int> nms_result;
    cv::dnn::NMSBoxes(boxes, confidences, SCORE_THRESHOLD, NMS_THRESHOLD, nms_result);
    for (int i = 0; i < nms_result.size(); i++) {
        int idx = nms_result[i];
        Detection result;
        result.class_id = class_ids[idx];
        result.confidence = confidences[idx];
        result.box = boxes[idx];
        output.push_back(result);
    }
}

int main(int argc, char **argv)
{

    std::vector<std::string> class_list = load_class_list();

    cv::Mat frame;
    cv::VideoCapture capture("sample.mp4");
    if (!capture.isOpened())
    {
        std::cerr << "Error opening video file\n";
        return -1;
    }

    bool is_cuda = argc > 1 && strcmp(argv[1], "cuda") == 0;

    cv::dnn::Net net;
    load_net(net, is_cuda);

    auto start = std::chrono::high_resolution_clock::now();
    int frame_count = 0;
    float fps = -1;
    int total_frames = 0;

    while (true)
    {
        capture.read(frame);
        if (frame.empty())
        {
            std::cout << "End of stream\n";
            break;
        }

        std::vector<Detection> output;
        detect(frame, net, output, class_list);

        frame_count++;
        total_frames++;

        int detections = output.size();

        for (int i = 0; i < detections; ++i)
        {

            auto detection = output[i];
            auto box = detection.box;
            auto classId = detection.class_id;
            const auto color = colors[classId % colors.size()];
            cv::rectangle(frame, box, color, 3);

            cv::rectangle(frame, cv::Point(box.x, box.y - 20), cv::Point(box.x + box.width, box.y), color, cv::FILLED);
            cv::putText(frame, class_list[classId].c_str(), cv::Point(box.x, box.y - 5), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
        }

        if (frame_count >= 30)
        {

            auto end = std::chrono::high_resolution_clock::now();
            fps = frame_count * 1000.0 / std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();

            frame_count = 0;
            start = std::chrono::high_resolution_clock::now();
        }

        if (fps > 0)
        {

            std::ostringstream fps_label;
            fps_label << std::fixed << std::setprecision(2);
            fps_label << "FPS: " << fps;
            std::string fps_label_str = fps_label.str();

            cv::putText(frame, fps_label_str.c_str(), cv::Point(10, 25), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 0, 255), 2);
        }

        cv::imshow("output", frame);

        if (cv::waitKey(1) != -1)
        {
            capture.release();
            std::cout << "finished by user\n";
            break;
        }
    }

    std::cout << "Total frames: " << total_frames << "\n";

    return 0;
}

Python

import cv2
import time
import sys
import numpy as np

def build_model(is_cuda):
    net = cv2.dnn.readNet("config_files/yolov5s.onnx")
    if is_cuda:
        print("Attempty to use CUDA")
        net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
        net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA_FP16)
    else:
        print("Running on CPU")
        net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
        net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)
    return net

INPUT_WIDTH = 640
INPUT_HEIGHT = 640
SCORE_THRESHOLD = 0.2
NMS_THRESHOLD = 0.4
CONFIDENCE_THRESHOLD = 0.4

def detect(image, net):
    blob = cv2.dnn.blobFromImage(image, 1/255.0, (INPUT_WIDTH, INPUT_HEIGHT), swapRB=True, crop=False)
    net.setInput(blob)
    preds = net.forward()
    return preds

def load_capture():
    capture = cv2.VideoCapture("sample.mp4")
    return capture

def load_classes():
    class_list = []
    with open("config_files/classes.txt", "r") as f:
        class_list = [cname.strip() for cname in f.readlines()]
    return class_list

class_list = load_classes()

def wrap_detection(input_image, output_data):
    class_ids = []
    confidences = []
    boxes = []

    rows = output_data.shape[0]

    image_width, image_height, _ = input_image.shape

    x_factor = image_width / INPUT_WIDTH
    y_factor =  image_height / INPUT_HEIGHT

    for r in range(rows):
        row = output_data[r]
        confidence = row[4]
        if confidence >= 0.4:

            classes_scores = row[5:]
            _, _, _, max_indx = cv2.minMaxLoc(classes_scores)
            class_id = max_indx[1]
            if (classes_scores[class_id] > .25):

                confidences.append(confidence)

                class_ids.append(class_id)

                x, y, w, h = row[0].item(), row[1].item(), row[2].item(), row[3].item() 
                left = int((x - 0.5 * w) * x_factor)
                top = int((y - 0.5 * h) * y_factor)
                width = int(w * x_factor)
                height = int(h * y_factor)
                box = np.array([left, top, width, height])
                boxes.append(box)

    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.25, 0.45) 

    result_class_ids = []
    result_confidences = []
    result_boxes = []

    for i in indexes:
        result_confidences.append(confidences[i])
        result_class_ids.append(class_ids[i])
        result_boxes.append(boxes[i])

    return result_class_ids, result_confidences, result_boxes

def format_yolov5(frame):

    row, col, _ = frame.shape
    _max = max(col, row)
    result = np.zeros((_max, _max, 3), np.uint8)
    result[0:row, 0:col] = frame
    return result


colors = [(255, 255, 0), (0, 255, 0), (0, 255, 255), (255, 0, 0)]

is_cuda = len(sys.argv) > 1 and sys.argv[1] == "cuda"

net = build_model(is_cuda)
capture = load_capture()

start = time.time_ns()
frame_count = 0
total_frames = 0
fps = -1

while True:

    _, frame = capture.read()
    if frame is None:
        print("End of stream")
        break

    inputImage = format_yolov5(frame)
    outs = detect(inputImage, net)

    class_ids, confidences, boxes = wrap_detection(inputImage, outs[0])

    frame_count += 1
    total_frames += 1

    for (classid, confidence, box) in zip(class_ids, confidences, boxes):
         color = colors[int(classid) % len(colors)]
         cv2.rectangle(frame, box, color, 2)
         cv2.rectangle(frame, (box[0], box[1] - 20), (box[0] + box[2], box[1]), color, -1)
         cv2.putText(frame, class_list[classid], (box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, .5, (0,0,0))

    if frame_count >= 30:
        end = time.time_ns()
        fps = 1000000000 * frame_count / (end - start)
        frame_count = 0
        start = time.time_ns()
    
    if fps > 0:
        fps_label = "FPS: %.2f" % fps
        cv2.putText(frame, fps_label, (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

    cv2.imshow("output", frame)

    if cv2.waitKey(1) > -1:
        print("finished by user")
        break

print("Total frames: " + str(total_frames))

The more update version and instructions to run code can be found here: https://github.com/doleron/yolov4-opencv-cpp-python

@YashGugliya
Copy link

I want to use the output from the opencv object detections how can i do that?

@shridharkini6
Copy link

@YashasSamaga
How to achieve batched inference using cv2.dnn_DetectionModel.detect for yolo

@mochechan
Copy link

mochechan commented Jul 18, 2022

The code in python is fine for me, but its c++ version doesn't work for me.

Compile:

/usr/bin/c++   -lstdc++  -g -pg -O0 -pthread -lpthread -lstdc++fs  -std=c++14 -std=c++17 -fPIC -std=gnu++11 -rdynamic -I/usr/local/include/tkDNN/ -I/usr/local/cuda/include -std=c++1z  -std=gnu++1z  CMakeFiles/predev2.dir/src/yolov4_opencv_dnn_cuda.cpp.o -o predev2   -L/usr/local/cuda/lib64  -Wl,-rpath,/usr/local/cuda/lib64:/usr/local/lib -ltkDNN -lcurl /usr/lib/aarch64-linux-gnu/libnvinfer.so /usr/local/cuda/lib64/libcudart_static.a -ldl /usr/lib/aarch64-linux-gnu/librt.so /usr/lib/aarch64-linux-gnu/libcublas.so /usr/lib/aarch64-linux-gnu/libcudnn.so /usr/lib/aarch64-linux-gnu/libnvinfer.so /usr/local/lib/libopencv_gapi.so.4.6.0 /usr/local/lib/libopencv_stitching.so.4.6.0 /usr/local/lib/libopencv_alphamat.so.4.6.0 /usr/local/lib/libopencv_aruco.so.4.6.0 /usr/local/lib/libopencv_barcode.so.4.6.0 /usr/local/lib/libopencv_bgsegm.so.4.6.0 /usr/local/lib/libopencv_bioinspired.so.4.6.0 /usr/local/lib/libopencv_ccalib.so.4.6.0 /usr/local/lib/libopencv_cudabgsegm.so.4.6.0 /usr/local/lib/libopencv_cudafeatures2d.so.4.6.0 /usr/local/lib/libopencv_cudaobjdetect.so.4.6.0 /usr/local/lib/libopencv_cudastereo.so.4.6.0 /usr/local/lib/libopencv_dnn_objdetect.so.4.6.0 /usr/local/lib/libopencv_dnn_superres.so.4.6.0 /usr/local/lib/libopencv_dpm.so.4.6.0 /usr/local/lib/libopencv_face.so.4.6.0 /usr/local/lib/libopencv_freetype.so.4.6.0 /usr/local/lib/libopencv_fuzzy.so.4.6.0 /usr/local/lib/libopencv_hdf.so.4.6.0 /usr/local/lib/libopencv_hfs.so.4.6.0 /usr/local/lib/libopencv_img_hash.so.4.6.0 /usr/local/lib/libopencv_intensity_transform.so.4.6.0 /usr/local/lib/libopencv_line_descriptor.so.4.6.0 /usr/local/lib/libopencv_mcc.so.4.6.0 /usr/local/lib/libopencv_quality.so.4.6.0 /usr/local/lib/libopencv_rapid.so.4.6.0 /usr/local/lib/libopencv_reg.so.4.6.0 /usr/local/lib/libopencv_rgbd.so.4.6.0 /usr/local/lib/libopencv_saliency.so.4.6.0 /usr/local/lib/libopencv_stereo.so.4.6.0 /usr/local/lib/libopencv_structured_light.so.4.6.0 /usr/local/lib/libopencv_superres.so.4.6.0 /usr/local/lib/libopencv_surface_matching.so.4.6.0 /usr/local/lib/libopencv_tracking.so.4.6.0 /usr/local/lib/libopencv_videostab.so.4.6.0 /usr/local/lib/libopencv_wechat_qrcode.so.4.6.0 /usr/local/lib/libopencv_xfeatures2d.so.4.6.0 /usr/local/lib/libopencv_xobjdetect.so.4.6.0 /usr/local/lib/libopencv_xphoto.so.4.6.0 /usr/local/cuda/lib64/libcudart_static.a -ldl /usr/lib/aarch64-linux-gnu/librt.so /usr/lib/aarch64-linux-gnu/libcublas.so /usr/lib/aarch64-linux-gnu/libcudnn.so -lpthread /usr/local/lib/libopencv_shape.so.4.6.0 /usr/local/lib/libopencv_highgui.so.4.6.0 /usr/local/lib/libopencv_datasets.so.4.6.0 /usr/local/lib/libopencv_plot.so.4.6.0 /usr/local/lib/libopencv_text.so.4.6.0 /usr/local/lib/libopencv_ml.so.4.6.0 /usr/local/lib/libopencv_phase_unwrapping.so.4.6.0 /usr/local/lib/libopencv_cudacodec.so.4.6.0 /usr/local/lib/libopencv_videoio.so.4.6.0 /usr/local/lib/libopencv_cudaoptflow.so.4.6.0 /usr/local/lib/libopencv_cudalegacy.so.4.6.0 /usr/local/lib/libopencv_cudawarping.so.4.6.0 /usr/local/lib/libopencv_optflow.so.4.6.0 /usr/local/lib/libopencv_ximgproc.so.4.6.0 /usr/local/lib/libopencv_video.so.4.6.0 /usr/local/lib/libopencv_imgcodecs.so.4.6.0 /usr/local/lib/libopencv_objdetect.so.4.6.0 /usr/local/lib/libopencv_calib3d.so.4.6.0 /usr/local/lib/libopencv_dnn.so.4.6.0 /usr/local/lib/libopencv_features2d.so.4.6.0 /usr/local/lib/libopencv_flann.so.4.6.0 /usr/local/lib/libopencv_photo.so.4.6.0 /usr/local/lib/libopencv_cudaimgproc.so.4.6.0 /usr/local/lib/libopencv_cudafilters.so.4.6.0 /usr/local/lib/libopencv_imgproc.so.4.6.0 /usr/local/lib/libopencv_cudaarithm.so.4.6.0 /usr/local/lib/libopencv_core.so.4.6.0 /usr/local/lib/libopencv_cudev.so.4.6.0 

Run:

$ gdb -batch -ex run -ex where -ex list -ex quit --args  ./predev2 
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/aarch64-linux-gnu/libthread_db.so.1".
[h264 @ 0x55557908e0] top block unavailable for requested intra mode -1
[h264 @ 0x55557908e0] error while decoding MB 2 0, bytestream 213160
[h264 @ 0x55557908e0] top block unavailable for requested intra mode -1
[h264 @ 0x55557908e0] error while decoding MB 69 0, bytestream 669
[New Thread 0x7f97f79b50 (LWP 3049)]
[New Thread 0x7f97778b50 (LWP 3050)]
[New Thread 0x7f96f77b50 (LWP 3051)]
[New Thread 0x7f96776b50 (LWP 3052)]
[New Thread 0x7f95f75b50 (LWP 3053)]
[New Thread 0x7f95774b50 (LWP 3054)]
[h264 @ 0x5555792890] top block unavailable for requested intra mode -1
[h264 @ 0x5555792890] error while decoding MB 2 0, bytestream 213160

Thread 1 "predev2" received signal SIGSEGV, Segmentation fault.
0x0000007fa3cbadc0 in cv::_InputArray::size(int) const () from /usr/lib/aarch64-linux-gnu/libopencv_core.so.4.1
#0  0x0000007fa3cbadc0 in cv::_InputArray::size(int) const () at /usr/lib/aarch64-linux-gnu/libopencv_core.so.4.1
#1  0x0000007fb6f8edfc in cv::resize(cv::_InputArray const&, cv::_OutputArray const&, cv::Size_<int>, double, double, int) () at /usr/local/lib/libopencv_imgproc.so.406
#2  0x0000007fb72f5d04 in cv::dnn::dnn4_v20220524::blobFromImages(cv::_InputArray const&, cv::_OutputArray const&, double, cv::Size_<int>, cv::Scalar_<double> const&, bool, bool, int) () at /usr/local/lib/libopencv_dnn.so.406
#3  0x0000007fb72f6864 in cv::dnn::dnn4_v20220524::blobFromImage(cv::_InputArray const&, cv::_OutputArray const&, double, cv::Size_<int> const&, cv::Scalar_<double> const&, bool, bool, int) () at /usr/local/lib/libopencv_dnn.so.406
#4  0x0000005555560498 in main() () at /home/a/ai22/src/yolov4_opencv_dnn_cuda.cpp:66
16	constexpr float CONFIDENCE_THRESHOLD = 0;
17	constexpr float NMS_THRESHOLD = 0.4;
18	constexpr int NUM_CLASSES = 80;
19	
20	// colors for bounding boxes
21	const cv::Scalar colors[] = {
22	    {0, 255, 255},
23	    {255, 255, 0},
24	    {0, 255, 0},
25	    {255, 0, 0}
A debugging session is active.

	Inferior 1 [process 3043] will be killed.

Quit anyway? (y or n) [answered Y; input not from terminal]

The following line makes the issue. What's the problem? How to solve this problem?

cv::dnn::blobFromImage(frame, blob, 0.00392, cv::Size(608, 608), cv::Scalar(), true, false, CV_32F);

Even I change the size to cv::Size(512, 512) to fit my yolov4.cfg, the issue consists.

@fabhost1
Copy link

dfvcd

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment