Skip to content

Instantly share code, notes, and snippets.

@alexcpn
Last active June 14, 2022 15:03
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save alexcpn/aeb8a4b8304639d8f91cc2fbc0c1c7df to your computer and use it in GitHub Desktop.
Save alexcpn/aeb8a4b8304639d8f91cc2fbc0c1c7df to your computer and use it in GitHub Desktop.
opencv Object detection with CUDA
#include "stdafx.h"
#include <iostream>
#include <time.h>
#include "opencv2/objdetect.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/cudaobjdetect.hpp"
#include "opencv2/cudaimgproc.hpp"
#include "opencv2/cudawarping.hpp"
#include "opencv2/core/cuda.hpp"
using namespace std;
using namespace cv;
using namespace cv::cuda;
/** Global variables */
String opencv_path = "D:/opencv";
String videoFile = "D:/Coding/resources/313060469_hd_720.mp4";
String outFile = "/tmp/out.avi";
String useAlgo = "hog";
bool useGpu = false;
int counter_frames_processed = 0;
int counter_frames_skipped = 0;
int counter_frames_detected = 0;
Size downFrameSize(640, 480);
Ptr<cuda::CascadeClassifier> cascade_gpu_upperbody, cascade_gpu_lowerbody, cascade_gpu_fullbody;
Ptr<cv::cuda::HOG> gpu_hog;
/*
These are the setting for HOG Person detector; There is no one setting that is good for all
Using daimlerpeopledetector ,see where the SVM is set
Default people detector getDefaultPeopleDetector work only with win_width = 48, with GPU it works with
win_width = 64 as well; but detection rate is very poor
-->OpenCV Error : Assertion failed(checkDetectorSize()) in cv::HOGDescriptor::setSVMDetector
*/
int win_width = 48;
//48*96 rectangle is found for HOG
int cell_width = 8;
int nbins = 9;
int win_stride_width = 8;
int win_stride_height = win_stride_width;
int block_width = win_stride_width*2;
int num_frames = 1000; // Process 2 fps; usually video will have higher frame rate like 24 fps; We may not need to analze all
int hogLevels = HOGDescriptor::DEFAULT_NLEVELS;
int hogGroupThreshold = 16;
/* From above these below are standard setting*/
Size win_stride(win_stride_width, win_stride_height);
Size win_size(win_width, win_width * 2);
Size block_size(block_width, block_width);
int block_stride_width = block_width / 2;
int block_stride_height = block_width / 2;
Size block_stride(block_stride_width, block_stride_height);
Size cell_size(cell_width, cell_width);
cv::HOGDescriptor cpu_hog(win_size, block_size, block_stride, cell_size, nbins, 1, -1,
HOGDescriptor::L2Hys, .2, false, hogLevels);
cv::CascadeClassifier upperbody_cascade;
cv::CascadeClassifier lowerbody_cascade;
cv::CascadeClassifier fullbody_cascade;
/**
Sclar - BGR value
**/
void drawMarker(Mat img, std::vector<cv::Rect> found, Scalar sc, int size = 2) {
for (int i = 0; i < (int)found.size(); i++)
{
cv::Rect r = found[i];
cv::rectangle(img, r, sc, size);
}
}
/** @function detectAndDisplay using CPU */
void detectAndDisplayHOG(Mat img, VideoWriter oVideoWriter, bool useGPU)
{
Mat frame;
std::vector<cv::Rect> found;
//The GroupThreshold and ScaleFactor are the two important parameters
//decrease will get more hits, with more false positives
int _hitThreshold = 0;// //going mad tuning this for cuda// not to be adjusted
double _scaleFactor = 1.01;//1.05;// 20 sec --> huge impact on performance
if (useGPU) {
cv::cvtColor(img, frame, COLOR_BGR2BGRA);// COLOR_BGR2BGRA);
GpuMat gpuFrame(frame);
gpu_hog->setScaleFactor(_scaleFactor);
gpu_hog->setNumLevels(hogLevels);
gpu_hog->setWinStride(win_stride);
//gpu_hog->setHitThreshold(0); // play with this at your own risk :)
gpu_hog->setGroupThreshold(hogGroupThreshold);// setting it to higher will reduce false positives// give all
gpu_hog->detectMultiScale(gpuFrame, found);
drawMarker(img, found, Scalar(255, 0, 0), 1);//BGR
gpu_hog->setGroupThreshold(hogGroupThreshold*3);// setting it to higher will group more
gpu_hog->detectMultiScale(gpuFrame, found);
drawMarker(img, found, Scalar(0, 255, 0));//BGR
}
else
{
//std::vector<DetectionROI> locations;
//cpu_hog.detectMultiScaleROI(frame, found, locations, _hitThreshold, 0);
cv::cvtColor(img, frame, COLOR_BGR2GRAY);//(img.type() == CV_8U || img.type() == CV_8UC3)
cpu_hog.detectMultiScale(frame, found, _hitThreshold, win_stride, cv::Size(4, 4), _scaleFactor);
drawMarker(img, found, Scalar(255, 0, 0));//BGR
}
if (found.size() > 1) {
counter_frames_detected += 1;
}
oVideoWriter.write(img);
}
/** Helper funcitons**/
void setCudaClassifierProperties(Ptr<cuda::CascadeClassifier> classifier) {
classifier->setScaleFactor(1.02); // The smaller it is the better, though tradeoff is processing (should be >1 )
classifier->setMinNeighbors(3); // the larger this is there would be less false positives;
// However it will also start to miss ;best is 3 to 4, but there are misses wiht this
}
/** Helper funcitons**/
void run_classifier_detection(Ptr<cuda::CascadeClassifier> classifier, GpuMat gpuGreyFrame, std::vector<cv::Rect> *found) {
GpuMat facesBuf_gpu;
//Now let the cascaders run
setCudaClassifierProperties(classifier);
classifier->detectMultiScale(gpuGreyFrame, facesBuf_gpu);
classifier->convert(facesBuf_gpu, *found);
}
/** @function detectAndDisplay using CPU */
void detectAndDisplayHAAR(Mat img, VideoWriter oVideoWriter, bool useGPU)
{
Mat frame;
//cv::cvtColor(img,frame, COLOR_BGR2BGRA);does not work with HAAR
cv::cvtColor(img, frame, COLOR_BGR2GRAY);
std::vector<cv::Rect> found;
//-- Detect Upper body classifier
// http://fewtutorials.bravesites.com/entries/emgu-cv-c/level-3c---how-to-improve-face-detection
//Now let the cascaders run, we are running three cascades here
// Running on GPU for HAAR is much faster than for CPU
if (useGPU) {
GpuMat gray_gpu(frame);// , gray_gpu, resized_gpu;
//Need to convert and resize before it is able to detect
run_classifier_detection(cascade_gpu_upperbody, gray_gpu, &found);
drawMarker(img, found, Scalar(0, 255, 0));//Green .BGR
run_classifier_detection(cascade_gpu_fullbody, gray_gpu, &found);
drawMarker(img, found, Scalar(0, 0, 255));//BGR
run_classifier_detection(cascade_gpu_lowerbody, gray_gpu, &found);
drawMarker(img, found, Scalar(255, 0, 0));//BGR
}
else {
double scalingFactor = 1.05;// with 1.001,too much false positive
int numberOfNeighbours = 3;
upperbody_cascade.detectMultiScale(frame, found, scalingFactor, numberOfNeighbours, 0, cv::Size(8, 8), cv::Size(32, 32));
drawMarker(img, found, Scalar(0, 255, 0));//Green .BGR
lowerbody_cascade.detectMultiScale(frame, found, scalingFactor, numberOfNeighbours, 0, cv::Size(8, 8), cv::Size(32, 32));
drawMarker(img, found, Scalar(0, 0, 255));//BGR
fullbody_cascade.detectMultiScale(frame, found, scalingFactor, numberOfNeighbours, 0, cv::Size(8, 8), cv::Size(32, 32));
drawMarker(img, found, Scalar(255, 0, 0));//BGR
}
if (found.size() > 1) {
counter_frames_detected += 1;
}
oVideoWriter.write(img);
// imshow("opencv", img);
}
#pragma warning(disable:4996)
void getEnvSetting(String &videoFileP, String &opencvPath, String &outFileP, String &useAlgoP, bool &useGpuP) {
char* videopath = NULL;
videopath = getenv("VIDEO_PATH");
if (videopath != NULL) {
videoFileP = videopath;
}
char* opencvpath = NULL;
opencvpath = getenv("OPENCV_PATH");
if (opencvpath != NULL) {
opencvPath = opencvpath;
}
char* out_path = NULL;
out_path = getenv("OUT_PATH");
if (out_path != NULL) {
outFileP = out_path;
}
char* usehog = NULL;
usehog = getenv("USE_ALGO");
if (usehog != NULL) {
useAlgoP = usehog;
}
char* run_on_gpu = NULL;
run_on_gpu = getenv("USE_GPU");
if (run_on_gpu != NULL) {
if (strcmp(run_on_gpu, "true") == 0) {
useGpuP = 1;
}
else
{
useGpuP = 0;
}
}
}
// To run this you need OpenCV compiled with CUDA support (and a machine with CUDA compliant /NVDIA GPU card
// Based on the sample program from OpenCV - \opencv\samples\gpu\cascadeclassifier.cpp and other samples in net
int main(int argc, char* argv[])
{
cout << "A Simple Object detection test from Video" <<endl;
cout << "Set VIDEO_PATH, OPENCV_PATH, USE_GPU=<true/false> USE_ALGO=haar/hog OUT_PATH <output file *avi full path> for configuring" << endl;
///assert((win_stride_.width % block_stride_.width == 0 && win_stride_.height % block_stride_.height == 0));
getEnvSetting(videoFile, opencv_path, outFile, useAlgo, useGpu);
cout << "videoFile = " << videoFile << endl;
cout << "opencvpath = " << opencv_path << endl;
cout << "Algorithm Used = " << useAlgo << endl;
cout << "run_on_gpu = " << useGpu << endl;
cout << "outFile = " << outFile << endl;
/**
Intialize the Algorithm Settings; The speed as well as false positives depended on these
Unfortunately there is no one setting that is good for all
**/
VideoCapture cap(videoFile); // open the video file for reading
if (!cap.isOpened()) // if not success, exit program
{
cout << " Cannot open the video file" << videoFile << endl;
return -1;
}
cout << " Opened the video file" << videoFile << endl;
double dWidth = cap.get(CV_CAP_PROP_FRAME_WIDTH); //get the width of frames of the video
double dHeight = cap.get(CV_CAP_PROP_FRAME_HEIGHT); //get the height of frames of the video
double totalfps = cap.get(CV_CAP_PROP_FRAME_COUNT);
Size frameSize(static_cast<int>(dWidth), static_cast<int>(dHeight));
downFrameSize = frameSize; // If you dont want to re-size the frame you could uncomment this , it will take more CPU/GPU
cout << " Orginal Frame Size = " << dWidth << "x" << dHeight << endl;
cout << " Reduced Frame Size = " << downFrameSize << endl;
double fps = cap.get(CV_CAP_PROP_FPS); //get the frames per seconds of the video
cout << "Frame per seconds : " << fps << endl;
VideoWriter oVideoWriter(outFile, CV_FOURCC('D', 'I', 'V', 'X'), 3, downFrameSize, true);
if (!oVideoWriter.isOpened()) //if not initialize the VideoWriter successfully, exit the program
{
cout << "ERROR: Failed to write the video" << endl;
return -1;
}
if (useGpu) {
if (cv::cuda::getCudaEnabledDeviceCount() == 0) {
cout << "No GPU found or the library is compiled without CUDA support" << endl;
return -1;
}
cv::cuda::printShortCudaDeviceInfo(cv::cuda::getDevice());
if (useAlgo == "hog") {
// If you need to detect other objects you need to train it
// https://github.com/DaHoC/trainHOG
gpu_hog = cv::cuda::HOG::create(win_size, block_size, block_stride, cell_size, nbins);
Mat detector = gpu_hog->getDefaultPeopleDetector(); //this will select 48*96 or 64*128 based on window size
gpu_hog->setSVMDetector(detector);
cout << "Created the CUDA HOG Classifuer" << endl;
//cout << gpu_hog->getScaleFactor() << "---" << gpu_hog->getGroupThreshold() << endl;
}
else //use harr
{
//The below are the path to the HAAR trained casrcades
//The below taken from http://alereimondo.no-ip.org/OpenCV/34.version?id=60 ; not for commercial use
String upperbody_cascade_name = opencv_path + "/data/HS22x20/HS.xml"; //head and sholders
//The below are CUDA Classisfier does not work with older format Cascade xmls; the below are from OpenCV source
String cuda_lowerbody_cascade_name = opencv_path + "/data/haarcascades_cuda/haarcascade_lowerbody.xml";
String cuda_fullbody_cascade_name = opencv_path + "/data/haarcascades_cuda/haarcascade_fullbody.xml";
cout << "head and Shoulder Cascade Name" << upperbody_cascade_name << "Colored GREEN Rectangle" << endl;
cout << "lowerbody_cascade_name" << cuda_lowerbody_cascade_name << "Colored BLUE Rectangle" << endl;
cout << "fullbody_cascade_name" << cuda_fullbody_cascade_name << "Colored RED Rectangle" << endl;
//Load the GPU/CUdA Compliant video cascaders
cascade_gpu_upperbody = cuda::CascadeClassifier::create(upperbody_cascade_name);
cascade_gpu_lowerbody = cuda::CascadeClassifier::create(cuda_lowerbody_cascade_name);
cascade_gpu_fullbody = cuda::CascadeClassifier::create(cuda_fullbody_cascade_name);
cout << "Created the CUDA HAAR Classifiers" << endl;
}
}
else //use CPU
{
if (useAlgo == "haar") {
//The below are the path to the HAAR trained casrcades
//The below taken from http://alereimondo.no-ip.org/OpenCV/34.version?id=60 ; not for commercial use
String upperbody_cascade_name = opencv_path + "/data/HS22x20/HS.xml"; //head and sholders
String lowerbody_cascade_name = opencv_path + "/data/haarcascades/haarcascade_lowerbody.xml";
String fullbody_cascade_name = opencv_path + "/data/haarcascades/haarcascade_fullbody.xml";
cout << "head and Shoulder Cascade Name" << upperbody_cascade_name << "Colored GREEN Rectangle" << endl;
cout << "lowerbody_cascade_name" << lowerbody_cascade_name << "Colored BLUE Rectangle" << endl;
cout << "fullbody_cascade_name" << fullbody_cascade_name << "Colored RED Rectangle" << endl;
//-- 1. Load the cascades
if (!upperbody_cascade.load(upperbody_cascade_name)) {
printf("--(!)Error loading UpperBody\n");
return -1;
};
if (!lowerbody_cascade.load(lowerbody_cascade_name)) {
printf("--(!)Error loading lowerbody \n");
return -1;
};
if (!fullbody_cascade.load(fullbody_cascade_name)) {
printf("--(!)Error loading fullbody\n");
return -1;
};
cout << "Created the HAAR Classifiers" << endl;
}
else //use hog
{
cpu_hog.setSVMDetector(cv::HOGDescriptor::getDaimlerPeopleDetector());
cout << "Set the HOG Classifiers" << endl;
}
}
double delay = num_frames / fps;
cout << "Delay is " << delay << endl;
clock_t startTimeG = clock();
bool doLoop = true;
while (doLoop)
{
Mat frame, resized;
bool bSuccess = cap.read(frame); // read a new frame from video
if (!bSuccess) //if not success, break loop
{
cout << "Cannot read the frame from video file" << endl;
doLoop = false;
break;
}
counter_frames_processed += 1;
cv::resize(frame, resized, downFrameSize);// resize the frame to something smaller- makes computatin faster
if (useAlgo == "hog") {
detectAndDisplayHOG(resized, oVideoWriter,useGpu);
}
else //haar
{
detectAndDisplayHAAR(resized, oVideoWriter,useGpu);
}
clock_t endTime = clock() + delay; // this is a wrong way ; mabye multipy by CLOCKS_PER_SEC ? leaving it for now
while (clock() < endTime) { // This is the best my card supports
if (cap.read(frame)) { //read only one frame per
counter_frames_skipped += 1;
cout << "Frames processed = " << counter_frames_processed << " Frames found = "
<< counter_frames_detected << " Frames skipped = " << counter_frames_skipped
<< " Percentage processed = " << int( ((counter_frames_skipped + counter_frames_processed) / totalfps)* 100)
<< " % Time taken =" << (clock() - startTimeG) / 1000 << " seconds"
<< " \r";
//cv::resize(frame, resized, Size(640, 480));// resize the frame to something smaller- makes computatin faster
//cv::putText(resized, "Skipping Frame", cvPoint(30, 30),
//FONT_HERSHEY_COMPLEX_SMALL, 1, cvScalar(0, 0, 0), 1, CV_AA);
//oVideoWriter.write(resized);
}
waitKey(1);
}
}
oVideoWriter.release();
cout << "Total time taken = " << (clock() - startTimeG) / 1000 << " seconds" << endl;
cout << "counter_frames_processed = " << counter_frames_processed << endl;
cout << "counter_frames_skipped = " << counter_frames_skipped << endl;
cout << "counter_frames_detected = " << counter_frames_detected << endl;
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment