-
-
Save YashasSamaga/e2b19a6807a13046e399f4bc3cca3a49 to your computer and use it in GitHub Desktop.
import cv2 | |
import time | |
CONFIDENCE_THRESHOLD = 0.2 | |
NMS_THRESHOLD = 0.4 | |
COLORS = [(0, 255, 255), (255, 255, 0), (0, 255, 0), (255, 0, 0)] | |
class_names = [] | |
with open("classes.txt", "r") as f: | |
class_names = [cname.strip() for cname in f.readlines()] | |
vc = cv2.VideoCapture("demo.mp4") | |
net = cv2.dnn.readNet("yolov4.weights", "yolov4.cfg") | |
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA) | |
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA_FP16) | |
model = cv2.dnn_DetectionModel(net) | |
model.setInputParams(size=(416, 416), scale=1/255, swapRB=True) | |
while cv2.waitKey(1) < 1: | |
(grabbed, frame) = vc.read() | |
if not grabbed: | |
exit() | |
start = time.time() | |
classes, scores, boxes = model.detect(frame, CONFIDENCE_THRESHOLD, NMS_THRESHOLD) | |
end = time.time() | |
start_drawing = time.time() | |
for (classid, score, box) in zip(classes, scores, boxes): | |
color = COLORS[int(classid) % len(COLORS)] | |
label = "%s : %f" % (class_names[classid[0]], score) | |
cv2.rectangle(frame, box, color, 2) | |
cv2.putText(frame, label, (box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) | |
end_drawing = time.time() | |
fps_label = "FPS: %.2f (excluding drawing time of %.2fms)" % (1 / (end - start), (end_drawing - start_drawing) * 1000) | |
cv2.putText(frame, fps_label, (0, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2) | |
cv2.imshow("detections", frame) |
#include <iostream> | |
#include <queue> | |
#include <iterator> | |
#include <sstream> | |
#include <fstream> | |
#include <iomanip> | |
#include <chrono> | |
#include <opencv2/core.hpp> | |
#include <opencv2/dnn.hpp> | |
#include <opencv2/dnn/all_layers.hpp> | |
#include <opencv2/imgproc.hpp> | |
#include <opencv2/highgui.hpp> | |
constexpr float CONFIDENCE_THRESHOLD = 0; | |
constexpr float NMS_THRESHOLD = 0.4; | |
constexpr int NUM_CLASSES = 80; | |
// colors for bounding boxes | |
const cv::Scalar colors[] = { | |
{0, 255, 255}, | |
{255, 255, 0}, | |
{0, 255, 0}, | |
{255, 0, 0} | |
}; | |
const auto NUM_COLORS = sizeof(colors)/sizeof(colors[0]); | |
int main() | |
{ | |
std::vector<std::string> class_names; | |
{ | |
std::ifstream class_file("classes.txt"); | |
if (!class_file) | |
{ | |
std::cerr << "failed to open classes.txt\n"; | |
return 0; | |
} | |
std::string line; | |
while (std::getline(class_file, line)) | |
class_names.push_back(line); | |
} | |
cv::VideoCapture source("demo.mp4"); | |
auto net = cv::dnn::readNetFromDarknet("yolov4.cfg", "yolov4.weights"); | |
net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA); | |
net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA); | |
// net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV); | |
// net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU); | |
auto output_names = net.getUnconnectedOutLayersNames(); | |
cv::Mat frame, blob; | |
std::vector<cv::Mat> detections; | |
while(cv::waitKey(1) < 1) | |
{ | |
source >> frame; | |
if (frame.empty()) | |
{ | |
cv::waitKey(); | |
break; | |
} | |
auto total_start = std::chrono::steady_clock::now(); | |
cv::dnn::blobFromImage(frame, blob, 0.00392, cv::Size(608, 608), cv::Scalar(), true, false, CV_32F); | |
net.setInput(blob); | |
auto dnn_start = std::chrono::steady_clock::now(); | |
net.forward(detections, output_names); | |
auto dnn_end = std::chrono::steady_clock::now(); | |
std::vector<int> indices[NUM_CLASSES]; | |
std::vector<cv::Rect> boxes[NUM_CLASSES]; | |
std::vector<float> scores[NUM_CLASSES]; | |
for (auto& output : detections) | |
{ | |
const auto num_boxes = output.rows; | |
for (int i = 0; i < num_boxes; i++) | |
{ | |
auto x = output.at<float>(i, 0) * frame.cols; | |
auto y = output.at<float>(i, 1) * frame.rows; | |
auto width = output.at<float>(i, 2) * frame.cols; | |
auto height = output.at<float>(i, 3) * frame.rows; | |
cv::Rect rect(x - width/2, y - height/2, width, height); | |
for (int c = 0; c < NUM_CLASSES; c++) | |
{ | |
auto confidence = *output.ptr<float>(i, 5 + c); | |
if (confidence >= CONFIDENCE_THRESHOLD) | |
{ | |
boxes[c].push_back(rect); | |
scores[c].push_back(confidence); | |
} | |
} | |
} | |
} | |
for (int c = 0; c < NUM_CLASSES; c++) | |
cv::dnn::NMSBoxes(boxes[c], scores[c], 0.0, NMS_THRESHOLD, indices[c]); | |
for (int c= 0; c < NUM_CLASSES; c++) | |
{ | |
for (size_t i = 0; i < indices[c].size(); ++i) | |
{ | |
const auto color = colors[c % NUM_COLORS]; | |
auto idx = indices[c][i]; | |
const auto& rect = boxes[c][idx]; | |
cv::rectangle(frame, cv::Point(rect.x, rect.y), cv::Point(rect.x + rect.width, rect.y + rect.height), color, 3); | |
std::ostringstream label_ss; | |
label_ss << class_names[c] << ": " << std::fixed << std::setprecision(2) << scores[c][idx]; | |
auto label = label_ss.str(); | |
int baseline; | |
auto label_bg_sz = cv::getTextSize(label.c_str(), cv::FONT_HERSHEY_COMPLEX_SMALL, 1, 1, &baseline); | |
cv::rectangle(frame, cv::Point(rect.x, rect.y - label_bg_sz.height - baseline - 10), cv::Point(rect.x + label_bg_sz.width, rect.y), color, cv::FILLED); | |
cv::putText(frame, label.c_str(), cv::Point(rect.x, rect.y - baseline - 5), cv::FONT_HERSHEY_COMPLEX_SMALL, 1, cv::Scalar(0, 0, 0)); | |
} | |
} | |
auto total_end = std::chrono::steady_clock::now(); | |
float inference_fps = 1000.0 / std::chrono::duration_cast<std::chrono::milliseconds>(dnn_end - dnn_start).count(); | |
float total_fps = 1000.0 / std::chrono::duration_cast<std::chrono::milliseconds>(total_end - total_start).count(); | |
std::ostringstream stats_ss; | |
stats_ss << std::fixed << std::setprecision(2); | |
stats_ss << "Inference FPS: " << inference_fps << ", Total FPS: " << total_fps; | |
auto stats = stats_ss.str(); | |
int baseline; | |
auto stats_bg_sz = cv::getTextSize(stats.c_str(), cv::FONT_HERSHEY_COMPLEX_SMALL, 1, 1, &baseline); | |
cv::rectangle(frame, cv::Point(0, 0), cv::Point(stats_bg_sz.width, stats_bg_sz.height + 10), cv::Scalar(0, 0, 0), cv::FILLED); | |
cv::putText(frame, stats.c_str(), cv::Point(0, stats_bg_sz.height + 5), cv::FONT_HERSHEY_COMPLEX_SMALL, 1, cv::Scalar(255, 255, 255)); | |
cv::namedWindow("output"); | |
cv::imshow("output", frame); | |
} | |
return 0; | |
} |
@vvmatorin @Fetulhak do you see the same issue if you resize all your images with cv2.resize and scale your annotations to your specific [net] custom_width x custom_height BEFORE training with darknet? I think the goal is to make sure you have your data at the size you want before training the models.
Note that if you do not resize prior to training, this is the slowest training option, as Darknet must continuously resize all of the images as they are loaded from disk. Darknet does not keep a local image cache.When resizing images, Darknet does not maintain or respect aspect ratio unless you use letterbox. All images will be stretched as necessary to match the exact network dimensions defined in the "[net]" section of the cfg file.
Hi everyone,
I've trained a model and some objects are multilabel annotated. When I use the darknet for detecting gives me labels for each object but OpenCV-DNN only returns one label.
I used the latest version of the darknet for detecting and I used OpenCV-dnn for detecting in OpenCV.
I found this example of OpenCV and I used this too and everything is Okay but the inference is slow and FP is much because I have almost 400 classes.
I changed the parameters in
cv::dnn::NMSBoxes(boxes, confidences, CONFIDENCE_THRESHOLD, NMS_THRESHOLD, indices); but I couldn't give a good result.
Where is the problem?
Thanks in advance!
@zpmmehrdad
I've trained a model and some objects are multilabel annotated. When I use the darknet for detecting gives me labels for each object but OpenCV-DNN only returns one label.
I suspect it's performing a single NMS. What you need is classwise NMS. You need to store boxes of a particular class separately and perform NMS on each such class separately.
Okay but the inference is slow and FP is much because I have almost 400 classes.
You should check what step in the pipeline is taking most of the time. I think the DNN inference is probably fast and IO is the bottleneck. You have to identify the bottleneck and try to mitigate it.
@YashasSamaga
I suspect it's performing a single NMS. What you need is classwise NMS. You need to store boxes of a particular class separately and perform NMS on each such class separately.
Thanks for the reply, how can I set up NMS for a few specific classes, not all.
@zpmmehrdad The C++ example in this gist does classwise NMS. DetectionModel class allows you to choose between classwise NMS and across class NMS.
@YashasSamaga Hi,
I tested this DetectionModel but some objects that are multi-label only detect one class. This version works well but have two problems, FP is high and very slow with about 450 classes.
@zpmmehrdad Can you try with the other NMS configuration in DetectionModel? (try both options and see if one of them meets your requirements)
What is FP? What GPU do you have? Did you use DNN_TARGET_CUDA
or DNN_TARGET_CUDA_FP16
?
Hi, YashasSamaga!!! Thank you for a code sharing, i am using your code example for yolov4-tiny model inference trained with images dataset (416x416) for face mask detection! Everything is working good, except of one thing, when i stay close to webcam (approximately 0.5 - 1 meters) algorithm constantly determines the face mask is on even if there is no mask on the face, if i stay 1,5 and more meters far from the web camera everything works as expected and face masks determines in correct way!!! Could you please to help me tune the code to solve my problem
Hi, YashasSamaga!!! Thank you for a code sharing, i am using your code example for yolov4-tiny model inference trained with images dataset (416x416) for face mask detection! Everything is working good, except of one thing, when i stay close to webcam (approximately 0.5 - 1 meters) algorithm constantly determines the face mask is on even if there is no mask on the face, if i stay 1,5 and more meters far from the web camera everything works as expected and face masks determines in correct way!!! Could you please to help me tune the code to solve my problem
I think this is an issue with your dataset used for training and less to do with OpenCV or DNN inference. Maybe your dataset lacks sufficient samples where people are close to the camera. I don't really know much about training models but here is an idea:
- detect faces using a face detector
- crop around the face (include some background too)
- resize the image
This might augment your dataset to have more samples where the face occupies a large portion of the image. I am not sure if this will exactly mimic being closer to the camera but it may work.
I think this is an issue with your dataset used for training and less to do with OpenCV or DNN inference. Maybe your dataset lacks sufficient samples where people are close to the camera. I don't really know much about training models but here is an idea:
1. detect faces using a face detector 2. crop around the face (include some background too) 3. resize the image
This might augment your dataset to have more samples where the face occupies a large portion of the image. I am not sure if this will exactly mimic being closer to the camera but it may work.
Thank you very much for a quick reply and tips you gave me! I will try to use another images dataset with closer faces and will see what happened
Hi @YashasSamaga, I'm having the similar issue like others. OpenCV is missing some of the detections as compared to the darknet detections. Has anyone found any solution to this issue?
Heres what I have (Using Google colab):
Inside yolov4-custom.cfg
[net]
#Testing
batch=1
subdivisions=1
#Training
#batch=64
#subdivisions=64
width=640
height=640
.
.
filters=27
classes=4
.
Test image size is 640x640
Darknet:
!./darknet/darknet detector test custom_data_sides_colored/labelled_data.data /content/drive/MyDrive/yolo/test_model/sides-colored/yolov4-custom.cfg /content/drive/MyDrive/yolo/backup-yolov4-models/yolov4-custom_final.weights /content/drive/MyDrive/custom-object-detection/color-img/640-bw.jpg -thresh 0.01
Console info after running the above code:
CUDA-version: 11010 (11020), cuDNN: 7.6.5, GPU count: 1
OpenCV version: 3.2.0
.
Notice how it prints "OpenCV version 3.2.0". Could this be part of the problem ? because it's using different version than the installed cv2 version (4.5.3)?
OpenCV:
cv2.__version__
4.5.3
net = cv2.dnn.readNet("/content/drive/MyDrive/yolo/backup-yolov4-models/yolov4-custom_final.weights", "/content/drive/MyDrive/yolo/test_model/sides-colored/yolov4-custom.cfg")
img = cv2.imread('/content/drive/MyDrive/custom-object-detection/color-img/640-bw.jpg', flags=cv2.IMREAD_COLOR)
CONFIDENCE_THRESHOLD = 0.01
NMS_THRESHOLD = 0.4
#Tried DNN_BACKEND_CUDA and DNN_TARGET_CUDA_FP16 but no difference
#net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
#net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA_FP16)
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)
model = cv2.dnn_DetectionModel(net)
model.setInputParams(size=(640, 640), scale=1/255, swapRB=True,crop= False)
classes, scores, boxes = model.detect(img, CONFIDENCE_THRESHOLD, NMS_THRESHOLD)
for idx, (classid, score, box) in enumerate(zip(classes, scores, boxes)):
print("Class: " + str(classid) + " Score:" + str(score))
cv2.rectangle(img, box, (0, 0, 0), 2)
cv2_imshow(img)
@dgp52 Does it happen with DNN_BACKEND_OPENCV
and DNN_TARGET_CPU
?
@YashasSamaga I have tried a lot of options to see why this happens but sadly I did not get any solution. The results in Darknet are far better than OpenCv-dnn as well as other tensorflow implementations of darknet yolo.
@dgp52 Does it happen with
DNN_BACKEND_OPENCV
andDNN_TARGET_CPU
?
Correct. It happens with DNN_BACKEND_OPENCV
and DNN_TARGET_CPU
. I have also tried using DNN_BACKEND_CUDA
and DNN_TARGET_CUDA_FP16
but no difference.
Correct. It happens with DNN_BACKEND_OPENCV and DNN_TARGET_CPU. I have also tried using DNN_BACKEND_CUDA and DNN_TARGET_CUDA_FP16 but no difference.
@dgp52 @Fetulhak @zpmmehrdad @vvmatorin and others
OpenCV has internal tests most of which are shared across all backends. The CUDA backend always trails the OpenCV CPU backend in terms of correctness and features; it just mimics what OpenCV CPU backend does (this is like the reference implementation). Since it also happens in the CPU backend, the patch must first go there. I think an issue should be opened at OpenCV repository reporting this issue in the OCV CPU backend (and mention as a sidenote that the same behavior is observed in the CUDA backend).
Please also check AlexeyAB's comment:
About different resize approaches in the Darknet (letter_box=1 vs letter_box=0) and OpenCV-dnn AlexeyAB/darknet#232 (comment)
@YashasSamaga @vvmatorin @marvision-ai @Fetulhak
Explicitly setting the thresh value in the test cfg file worked for me! Now it's giving me exact detections!
Here's what I think is happening. Feel free to correct me but it seems like OpenCV has a default confidence value it checks against during the detection process. Almost like a minimum threshold, so anything lower than this value won't get detected. We can overwrite this value by adding thresh = 0.01 or any default value you would like. Add this value for all three yolo layers.
Ex:
[yolo]
thresh = 0.01
.
.
[yolo]
thresh = 0.01
.
.
[yolo]
thresh = 0.01
I'm curious to see if this works for others.
@dgp52 this seems very interesting if it works. I will check it for my dataset.
When I changed yolov3 to yolov4's cfg and weights, a cv::Exception error occurred during dnn::readNetFromDarknet. I think it may be because of the opencv version problem, because 4.2 was proposed at the end of last year, but yolov4 was proposed in April this year.
i got the same error before...
i just ignored the FromDarknet and just used dnn.readNet
starting from opencv 4.5.4 classid is no longer list, so code needs to be fixed
from
label = "%s : %f" % (class_names[classid[0]], score)
to
label = "%s : %f" % (class_names[classid], score)
@dgp52 very interesting! You didn't actually have to train the network with thresh=0.01
correct? Only set it in the .cfg
file for a trained model?
@marvision-ai Sorry for not getting back to you sooner. That is correct. Just needed to update the .cfg file.
Hi Yashas, I need to access GPU power when running YOLOv4. What CUDA and Cudnn version should I install?
Because I installed CUDA 11.5 and cudnn 8.3.2, but when I install OpenCV, it does not find cudnn, as a result, it does not run fast.
I haven't checked the latest versions of cuDNN but cuDNN 7.6.5 gave the best performance six months ago.
Hi @YashasSamaga , I wrote this code for YOLO V5 / OpenCV / DNN
C++
#include <fstream>
#include <opencv2/opencv.hpp>
std::vector<std::string> load_class_list()
{
std::vector<std::string> class_list;
std::ifstream ifs("config_files/classes.txt");
std::string line;
while (getline(ifs, line))
{
class_list.push_back(line);
}
return class_list;
}
void load_net(cv::dnn::Net &net, bool is_cuda)
{
auto result = cv::dnn::readNet("config_files/yolov5s.onnx");
if (is_cuda)
{
std::cout << "Attempty to use CUDA\n";
result.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
result.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA_FP16);
}
else
{
std::cout << "Running on CPU\n";
result.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
result.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
}
net = result;
}
const std::vector<cv::Scalar> colors = {cv::Scalar(255, 255, 0), cv::Scalar(0, 255, 0), cv::Scalar(0, 255, 255), cv::Scalar(255, 0, 0)};
const float INPUT_WIDTH = 640.0;
const float INPUT_HEIGHT = 640.0;
const float SCORE_THRESHOLD = 0.2;
const float NMS_THRESHOLD = 0.4;
const float CONFIDENCE_THRESHOLD = 0.4;
struct Detection
{
int class_id;
float confidence;
cv::Rect box;
};
cv::Mat format_yolov5(const cv::Mat &source) {
int col = source.cols;
int row = source.rows;
int _max = MAX(col, row);
cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3);
source.copyTo(result(cv::Rect(0, 0, col, row)));
return result;
}
void detect(cv::Mat &image, cv::dnn::Net &net, std::vector<Detection> &output, const std::vector<std::string> &className) {
cv::Mat blob;
auto input_image = format_yolov5(image);
cv::dnn::blobFromImage(input_image, blob, 1./255., cv::Size(INPUT_WIDTH, INPUT_HEIGHT), cv::Scalar(), true, false);
net.setInput(blob);
std::vector<cv::Mat> outputs;
net.forward(outputs, net.getUnconnectedOutLayersNames());
float x_factor = input_image.cols / INPUT_WIDTH;
float y_factor = input_image.rows / INPUT_HEIGHT;
float *data = (float *)outputs[0].data;
const int dimensions = 85;
const int rows = 25200;
std::vector<int> class_ids;
std::vector<float> confidences;
std::vector<cv::Rect> boxes;
for (int i = 0; i < rows; ++i) {
float confidence = data[4];
if (confidence >= CONFIDENCE_THRESHOLD) {
float * classes_scores = data + 5;
cv::Mat scores(1, className.size(), CV_32FC1, classes_scores);
cv::Point class_id;
double max_class_score;
minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
if (max_class_score > SCORE_THRESHOLD) {
confidences.push_back(confidence);
class_ids.push_back(class_id.x);
float x = data[0];
float y = data[1];
float w = data[2];
float h = data[3];
int left = int((x - 0.5 * w) * x_factor);
int top = int((y - 0.5 * h) * y_factor);
int width = int(w * x_factor);
int height = int(h * y_factor);
boxes.push_back(cv::Rect(left, top, width, height));
}
}
data += 85;
}
std::vector<int> nms_result;
cv::dnn::NMSBoxes(boxes, confidences, SCORE_THRESHOLD, NMS_THRESHOLD, nms_result);
for (int i = 0; i < nms_result.size(); i++) {
int idx = nms_result[i];
Detection result;
result.class_id = class_ids[idx];
result.confidence = confidences[idx];
result.box = boxes[idx];
output.push_back(result);
}
}
int main(int argc, char **argv)
{
std::vector<std::string> class_list = load_class_list();
cv::Mat frame;
cv::VideoCapture capture("sample.mp4");
if (!capture.isOpened())
{
std::cerr << "Error opening video file\n";
return -1;
}
bool is_cuda = argc > 1 && strcmp(argv[1], "cuda") == 0;
cv::dnn::Net net;
load_net(net, is_cuda);
auto start = std::chrono::high_resolution_clock::now();
int frame_count = 0;
float fps = -1;
int total_frames = 0;
while (true)
{
capture.read(frame);
if (frame.empty())
{
std::cout << "End of stream\n";
break;
}
std::vector<Detection> output;
detect(frame, net, output, class_list);
frame_count++;
total_frames++;
int detections = output.size();
for (int i = 0; i < detections; ++i)
{
auto detection = output[i];
auto box = detection.box;
auto classId = detection.class_id;
const auto color = colors[classId % colors.size()];
cv::rectangle(frame, box, color, 3);
cv::rectangle(frame, cv::Point(box.x, box.y - 20), cv::Point(box.x + box.width, box.y), color, cv::FILLED);
cv::putText(frame, class_list[classId].c_str(), cv::Point(box.x, box.y - 5), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
}
if (frame_count >= 30)
{
auto end = std::chrono::high_resolution_clock::now();
fps = frame_count * 1000.0 / std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
frame_count = 0;
start = std::chrono::high_resolution_clock::now();
}
if (fps > 0)
{
std::ostringstream fps_label;
fps_label << std::fixed << std::setprecision(2);
fps_label << "FPS: " << fps;
std::string fps_label_str = fps_label.str();
cv::putText(frame, fps_label_str.c_str(), cv::Point(10, 25), cv::FONT_HERSHEY_SIMPLEX, 1, cv::Scalar(0, 0, 255), 2);
}
cv::imshow("output", frame);
if (cv::waitKey(1) != -1)
{
capture.release();
std::cout << "finished by user\n";
break;
}
}
std::cout << "Total frames: " << total_frames << "\n";
return 0;
}
Python
import cv2
import time
import sys
import numpy as np
def build_model(is_cuda):
net = cv2.dnn.readNet("config_files/yolov5s.onnx")
if is_cuda:
print("Attempty to use CUDA")
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA_FP16)
else:
print("Running on CPU")
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)
return net
INPUT_WIDTH = 640
INPUT_HEIGHT = 640
SCORE_THRESHOLD = 0.2
NMS_THRESHOLD = 0.4
CONFIDENCE_THRESHOLD = 0.4
def detect(image, net):
blob = cv2.dnn.blobFromImage(image, 1/255.0, (INPUT_WIDTH, INPUT_HEIGHT), swapRB=True, crop=False)
net.setInput(blob)
preds = net.forward()
return preds
def load_capture():
capture = cv2.VideoCapture("sample.mp4")
return capture
def load_classes():
class_list = []
with open("config_files/classes.txt", "r") as f:
class_list = [cname.strip() for cname in f.readlines()]
return class_list
class_list = load_classes()
def wrap_detection(input_image, output_data):
class_ids = []
confidences = []
boxes = []
rows = output_data.shape[0]
image_width, image_height, _ = input_image.shape
x_factor = image_width / INPUT_WIDTH
y_factor = image_height / INPUT_HEIGHT
for r in range(rows):
row = output_data[r]
confidence = row[4]
if confidence >= 0.4:
classes_scores = row[5:]
_, _, _, max_indx = cv2.minMaxLoc(classes_scores)
class_id = max_indx[1]
if (classes_scores[class_id] > .25):
confidences.append(confidence)
class_ids.append(class_id)
x, y, w, h = row[0].item(), row[1].item(), row[2].item(), row[3].item()
left = int((x - 0.5 * w) * x_factor)
top = int((y - 0.5 * h) * y_factor)
width = int(w * x_factor)
height = int(h * y_factor)
box = np.array([left, top, width, height])
boxes.append(box)
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.25, 0.45)
result_class_ids = []
result_confidences = []
result_boxes = []
for i in indexes:
result_confidences.append(confidences[i])
result_class_ids.append(class_ids[i])
result_boxes.append(boxes[i])
return result_class_ids, result_confidences, result_boxes
def format_yolov5(frame):
row, col, _ = frame.shape
_max = max(col, row)
result = np.zeros((_max, _max, 3), np.uint8)
result[0:row, 0:col] = frame
return result
colors = [(255, 255, 0), (0, 255, 0), (0, 255, 255), (255, 0, 0)]
is_cuda = len(sys.argv) > 1 and sys.argv[1] == "cuda"
net = build_model(is_cuda)
capture = load_capture()
start = time.time_ns()
frame_count = 0
total_frames = 0
fps = -1
while True:
_, frame = capture.read()
if frame is None:
print("End of stream")
break
inputImage = format_yolov5(frame)
outs = detect(inputImage, net)
class_ids, confidences, boxes = wrap_detection(inputImage, outs[0])
frame_count += 1
total_frames += 1
for (classid, confidence, box) in zip(class_ids, confidences, boxes):
color = colors[int(classid) % len(colors)]
cv2.rectangle(frame, box, color, 2)
cv2.rectangle(frame, (box[0], box[1] - 20), (box[0] + box[2], box[1]), color, -1)
cv2.putText(frame, class_list[classid], (box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, .5, (0,0,0))
if frame_count >= 30:
end = time.time_ns()
fps = 1000000000 * frame_count / (end - start)
frame_count = 0
start = time.time_ns()
if fps > 0:
fps_label = "FPS: %.2f" % fps
cv2.putText(frame, fps_label, (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
cv2.imshow("output", frame)
if cv2.waitKey(1) > -1:
print("finished by user")
break
print("Total frames: " + str(total_frames))
The more update version and instructions to run code can be found here: https://github.com/doleron/yolov4-opencv-cpp-python
I want to use the output from the opencv object detections how can i do that?
@YashasSamaga
How to achieve batched inference using cv2.dnn_DetectionModel.detect for yolo
The code in python is fine for me, but its c++ version doesn't work for me.
Compile:
/usr/bin/c++ -lstdc++ -g -pg -O0 -pthread -lpthread -lstdc++fs -std=c++14 -std=c++17 -fPIC -std=gnu++11 -rdynamic -I/usr/local/include/tkDNN/ -I/usr/local/cuda/include -std=c++1z -std=gnu++1z CMakeFiles/predev2.dir/src/yolov4_opencv_dnn_cuda.cpp.o -o predev2 -L/usr/local/cuda/lib64 -Wl,-rpath,/usr/local/cuda/lib64:/usr/local/lib -ltkDNN -lcurl /usr/lib/aarch64-linux-gnu/libnvinfer.so /usr/local/cuda/lib64/libcudart_static.a -ldl /usr/lib/aarch64-linux-gnu/librt.so /usr/lib/aarch64-linux-gnu/libcublas.so /usr/lib/aarch64-linux-gnu/libcudnn.so /usr/lib/aarch64-linux-gnu/libnvinfer.so /usr/local/lib/libopencv_gapi.so.4.6.0 /usr/local/lib/libopencv_stitching.so.4.6.0 /usr/local/lib/libopencv_alphamat.so.4.6.0 /usr/local/lib/libopencv_aruco.so.4.6.0 /usr/local/lib/libopencv_barcode.so.4.6.0 /usr/local/lib/libopencv_bgsegm.so.4.6.0 /usr/local/lib/libopencv_bioinspired.so.4.6.0 /usr/local/lib/libopencv_ccalib.so.4.6.0 /usr/local/lib/libopencv_cudabgsegm.so.4.6.0 /usr/local/lib/libopencv_cudafeatures2d.so.4.6.0 /usr/local/lib/libopencv_cudaobjdetect.so.4.6.0 /usr/local/lib/libopencv_cudastereo.so.4.6.0 /usr/local/lib/libopencv_dnn_objdetect.so.4.6.0 /usr/local/lib/libopencv_dnn_superres.so.4.6.0 /usr/local/lib/libopencv_dpm.so.4.6.0 /usr/local/lib/libopencv_face.so.4.6.0 /usr/local/lib/libopencv_freetype.so.4.6.0 /usr/local/lib/libopencv_fuzzy.so.4.6.0 /usr/local/lib/libopencv_hdf.so.4.6.0 /usr/local/lib/libopencv_hfs.so.4.6.0 /usr/local/lib/libopencv_img_hash.so.4.6.0 /usr/local/lib/libopencv_intensity_transform.so.4.6.0 /usr/local/lib/libopencv_line_descriptor.so.4.6.0 /usr/local/lib/libopencv_mcc.so.4.6.0 /usr/local/lib/libopencv_quality.so.4.6.0 /usr/local/lib/libopencv_rapid.so.4.6.0 /usr/local/lib/libopencv_reg.so.4.6.0 /usr/local/lib/libopencv_rgbd.so.4.6.0 /usr/local/lib/libopencv_saliency.so.4.6.0 /usr/local/lib/libopencv_stereo.so.4.6.0 /usr/local/lib/libopencv_structured_light.so.4.6.0 /usr/local/lib/libopencv_superres.so.4.6.0 /usr/local/lib/libopencv_surface_matching.so.4.6.0 /usr/local/lib/libopencv_tracking.so.4.6.0 /usr/local/lib/libopencv_videostab.so.4.6.0 /usr/local/lib/libopencv_wechat_qrcode.so.4.6.0 /usr/local/lib/libopencv_xfeatures2d.so.4.6.0 /usr/local/lib/libopencv_xobjdetect.so.4.6.0 /usr/local/lib/libopencv_xphoto.so.4.6.0 /usr/local/cuda/lib64/libcudart_static.a -ldl /usr/lib/aarch64-linux-gnu/librt.so /usr/lib/aarch64-linux-gnu/libcublas.so /usr/lib/aarch64-linux-gnu/libcudnn.so -lpthread /usr/local/lib/libopencv_shape.so.4.6.0 /usr/local/lib/libopencv_highgui.so.4.6.0 /usr/local/lib/libopencv_datasets.so.4.6.0 /usr/local/lib/libopencv_plot.so.4.6.0 /usr/local/lib/libopencv_text.so.4.6.0 /usr/local/lib/libopencv_ml.so.4.6.0 /usr/local/lib/libopencv_phase_unwrapping.so.4.6.0 /usr/local/lib/libopencv_cudacodec.so.4.6.0 /usr/local/lib/libopencv_videoio.so.4.6.0 /usr/local/lib/libopencv_cudaoptflow.so.4.6.0 /usr/local/lib/libopencv_cudalegacy.so.4.6.0 /usr/local/lib/libopencv_cudawarping.so.4.6.0 /usr/local/lib/libopencv_optflow.so.4.6.0 /usr/local/lib/libopencv_ximgproc.so.4.6.0 /usr/local/lib/libopencv_video.so.4.6.0 /usr/local/lib/libopencv_imgcodecs.so.4.6.0 /usr/local/lib/libopencv_objdetect.so.4.6.0 /usr/local/lib/libopencv_calib3d.so.4.6.0 /usr/local/lib/libopencv_dnn.so.4.6.0 /usr/local/lib/libopencv_features2d.so.4.6.0 /usr/local/lib/libopencv_flann.so.4.6.0 /usr/local/lib/libopencv_photo.so.4.6.0 /usr/local/lib/libopencv_cudaimgproc.so.4.6.0 /usr/local/lib/libopencv_cudafilters.so.4.6.0 /usr/local/lib/libopencv_imgproc.so.4.6.0 /usr/local/lib/libopencv_cudaarithm.so.4.6.0 /usr/local/lib/libopencv_core.so.4.6.0 /usr/local/lib/libopencv_cudev.so.4.6.0
Run:
$ gdb -batch -ex run -ex where -ex list -ex quit --args ./predev2
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/aarch64-linux-gnu/libthread_db.so.1".
[h264 @ 0x55557908e0] top block unavailable for requested intra mode -1
[h264 @ 0x55557908e0] error while decoding MB 2 0, bytestream 213160
[h264 @ 0x55557908e0] top block unavailable for requested intra mode -1
[h264 @ 0x55557908e0] error while decoding MB 69 0, bytestream 669
[New Thread 0x7f97f79b50 (LWP 3049)]
[New Thread 0x7f97778b50 (LWP 3050)]
[New Thread 0x7f96f77b50 (LWP 3051)]
[New Thread 0x7f96776b50 (LWP 3052)]
[New Thread 0x7f95f75b50 (LWP 3053)]
[New Thread 0x7f95774b50 (LWP 3054)]
[h264 @ 0x5555792890] top block unavailable for requested intra mode -1
[h264 @ 0x5555792890] error while decoding MB 2 0, bytestream 213160
Thread 1 "predev2" received signal SIGSEGV, Segmentation fault.
0x0000007fa3cbadc0 in cv::_InputArray::size(int) const () from /usr/lib/aarch64-linux-gnu/libopencv_core.so.4.1
#0 0x0000007fa3cbadc0 in cv::_InputArray::size(int) const () at /usr/lib/aarch64-linux-gnu/libopencv_core.so.4.1
#1 0x0000007fb6f8edfc in cv::resize(cv::_InputArray const&, cv::_OutputArray const&, cv::Size_<int>, double, double, int) () at /usr/local/lib/libopencv_imgproc.so.406
#2 0x0000007fb72f5d04 in cv::dnn::dnn4_v20220524::blobFromImages(cv::_InputArray const&, cv::_OutputArray const&, double, cv::Size_<int>, cv::Scalar_<double> const&, bool, bool, int) () at /usr/local/lib/libopencv_dnn.so.406
#3 0x0000007fb72f6864 in cv::dnn::dnn4_v20220524::blobFromImage(cv::_InputArray const&, cv::_OutputArray const&, double, cv::Size_<int> const&, cv::Scalar_<double> const&, bool, bool, int) () at /usr/local/lib/libopencv_dnn.so.406
#4 0x0000005555560498 in main() () at /home/a/ai22/src/yolov4_opencv_dnn_cuda.cpp:66
16 constexpr float CONFIDENCE_THRESHOLD = 0;
17 constexpr float NMS_THRESHOLD = 0.4;
18 constexpr int NUM_CLASSES = 80;
19
20 // colors for bounding boxes
21 const cv::Scalar colors[] = {
22 {0, 255, 255},
23 {255, 255, 0},
24 {0, 255, 0},
25 {255, 0, 0}
A debugging session is active.
Inferior 1 [process 3043] will be killed.
Quit anyway? (y or n) [answered Y; input not from terminal]
The following line makes the issue. What's the problem? How to solve this problem?
cv::dnn::blobFromImage(frame, blob, 0.00392, cv::Size(608, 608), cv::Scalar(), true, false, CV_32F);
Even I change the size to cv::Size(512, 512) to fit my yolov4.cfg, the issue consists.
dfvcd
About different resize approaches in the Darknet (letter_box=1 vs letter_box=0) and OpenCV-dnn AlexeyAB/darknet#232 (comment)