Last active
June 14, 2022 15:03
-
-
Save alexcpn/aeb8a4b8304639d8f91cc2fbc0c1c7df to your computer and use it in GitHub Desktop.
opencv Object detection with CUDA
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "stdafx.h" | |
#include <iostream> | |
#include <time.h> | |
#include "opencv2/objdetect.hpp" | |
#include "opencv2/highgui.hpp" | |
#include "opencv2/imgproc.hpp" | |
#include "opencv2/cudaobjdetect.hpp" | |
#include "opencv2/cudaimgproc.hpp" | |
#include "opencv2/cudawarping.hpp" | |
#include "opencv2/core/cuda.hpp" | |
using namespace std; | |
using namespace cv; | |
using namespace cv::cuda; | |
/** Global variables */ | |
String opencv_path = "D:/opencv"; | |
String videoFile = "D:/Coding/resources/313060469_hd_720.mp4"; | |
String outFile = "/tmp/out.avi"; | |
String useAlgo = "hog"; | |
bool useGpu = false; | |
int counter_frames_processed = 0; | |
int counter_frames_skipped = 0; | |
int counter_frames_detected = 0; | |
Size downFrameSize(640, 480); | |
Ptr<cuda::CascadeClassifier> cascade_gpu_upperbody, cascade_gpu_lowerbody, cascade_gpu_fullbody; | |
Ptr<cv::cuda::HOG> gpu_hog; | |
/* | |
These are the setting for HOG Person detector; There is no one setting that is good for all | |
Using daimlerpeopledetector ,see where the SVM is set | |
Default people detector getDefaultPeopleDetector work only with win_width = 48, with GPU it works with | |
win_width = 64 as well; but detection rate is very poor | |
-->OpenCV Error : Assertion failed(checkDetectorSize()) in cv::HOGDescriptor::setSVMDetector | |
*/ | |
int win_width = 48; | |
//48*96 rectangle is found for HOG | |
int cell_width = 8; | |
int nbins = 9; | |
int win_stride_width = 8; | |
int win_stride_height = win_stride_width; | |
int block_width = win_stride_width*2; | |
int num_frames = 1000; // Process 2 fps; usually video will have higher frame rate like 24 fps; We may not need to analze all | |
int hogLevels = HOGDescriptor::DEFAULT_NLEVELS; | |
int hogGroupThreshold = 16; | |
/* From above these below are standard setting*/ | |
Size win_stride(win_stride_width, win_stride_height); | |
Size win_size(win_width, win_width * 2); | |
Size block_size(block_width, block_width); | |
int block_stride_width = block_width / 2; | |
int block_stride_height = block_width / 2; | |
Size block_stride(block_stride_width, block_stride_height); | |
Size cell_size(cell_width, cell_width); | |
cv::HOGDescriptor cpu_hog(win_size, block_size, block_stride, cell_size, nbins, 1, -1, | |
HOGDescriptor::L2Hys, .2, false, hogLevels); | |
cv::CascadeClassifier upperbody_cascade; | |
cv::CascadeClassifier lowerbody_cascade; | |
cv::CascadeClassifier fullbody_cascade; | |
/** | |
Sclar - BGR value | |
**/ | |
void drawMarker(Mat img, std::vector<cv::Rect> found, Scalar sc, int size = 2) { | |
for (int i = 0; i < (int)found.size(); i++) | |
{ | |
cv::Rect r = found[i]; | |
cv::rectangle(img, r, sc, size); | |
} | |
} | |
/** @function detectAndDisplay using CPU */ | |
void detectAndDisplayHOG(Mat img, VideoWriter oVideoWriter, bool useGPU) | |
{ | |
Mat frame; | |
std::vector<cv::Rect> found; | |
//The GroupThreshold and ScaleFactor are the two important parameters | |
//decrease will get more hits, with more false positives | |
int _hitThreshold = 0;// //going mad tuning this for cuda// not to be adjusted | |
double _scaleFactor = 1.01;//1.05;// 20 sec --> huge impact on performance | |
if (useGPU) { | |
cv::cvtColor(img, frame, COLOR_BGR2BGRA);// COLOR_BGR2BGRA); | |
GpuMat gpuFrame(frame); | |
gpu_hog->setScaleFactor(_scaleFactor); | |
gpu_hog->setNumLevels(hogLevels); | |
gpu_hog->setWinStride(win_stride); | |
//gpu_hog->setHitThreshold(0); // play with this at your own risk :) | |
gpu_hog->setGroupThreshold(hogGroupThreshold);// setting it to higher will reduce false positives// give all | |
gpu_hog->detectMultiScale(gpuFrame, found); | |
drawMarker(img, found, Scalar(255, 0, 0), 1);//BGR | |
gpu_hog->setGroupThreshold(hogGroupThreshold*3);// setting it to higher will group more | |
gpu_hog->detectMultiScale(gpuFrame, found); | |
drawMarker(img, found, Scalar(0, 255, 0));//BGR | |
} | |
else | |
{ | |
//std::vector<DetectionROI> locations; | |
//cpu_hog.detectMultiScaleROI(frame, found, locations, _hitThreshold, 0); | |
cv::cvtColor(img, frame, COLOR_BGR2GRAY);//(img.type() == CV_8U || img.type() == CV_8UC3) | |
cpu_hog.detectMultiScale(frame, found, _hitThreshold, win_stride, cv::Size(4, 4), _scaleFactor); | |
drawMarker(img, found, Scalar(255, 0, 0));//BGR | |
} | |
if (found.size() > 1) { | |
counter_frames_detected += 1; | |
} | |
oVideoWriter.write(img); | |
} | |
/** Helper funcitons**/ | |
void setCudaClassifierProperties(Ptr<cuda::CascadeClassifier> classifier) { | |
classifier->setScaleFactor(1.02); // The smaller it is the better, though tradeoff is processing (should be >1 ) | |
classifier->setMinNeighbors(3); // the larger this is there would be less false positives; | |
// However it will also start to miss ;best is 3 to 4, but there are misses wiht this | |
} | |
/** Helper funcitons**/ | |
void run_classifier_detection(Ptr<cuda::CascadeClassifier> classifier, GpuMat gpuGreyFrame, std::vector<cv::Rect> *found) { | |
GpuMat facesBuf_gpu; | |
//Now let the cascaders run | |
setCudaClassifierProperties(classifier); | |
classifier->detectMultiScale(gpuGreyFrame, facesBuf_gpu); | |
classifier->convert(facesBuf_gpu, *found); | |
} | |
/** @function detectAndDisplay using CPU */ | |
void detectAndDisplayHAAR(Mat img, VideoWriter oVideoWriter, bool useGPU) | |
{ | |
Mat frame; | |
//cv::cvtColor(img,frame, COLOR_BGR2BGRA);does not work with HAAR | |
cv::cvtColor(img, frame, COLOR_BGR2GRAY); | |
std::vector<cv::Rect> found; | |
//-- Detect Upper body classifier | |
// http://fewtutorials.bravesites.com/entries/emgu-cv-c/level-3c---how-to-improve-face-detection | |
//Now let the cascaders run, we are running three cascades here | |
// Running on GPU for HAAR is much faster than for CPU | |
if (useGPU) { | |
GpuMat gray_gpu(frame);// , gray_gpu, resized_gpu; | |
//Need to convert and resize before it is able to detect | |
run_classifier_detection(cascade_gpu_upperbody, gray_gpu, &found); | |
drawMarker(img, found, Scalar(0, 255, 0));//Green .BGR | |
run_classifier_detection(cascade_gpu_fullbody, gray_gpu, &found); | |
drawMarker(img, found, Scalar(0, 0, 255));//BGR | |
run_classifier_detection(cascade_gpu_lowerbody, gray_gpu, &found); | |
drawMarker(img, found, Scalar(255, 0, 0));//BGR | |
} | |
else { | |
double scalingFactor = 1.05;// with 1.001,too much false positive | |
int numberOfNeighbours = 3; | |
upperbody_cascade.detectMultiScale(frame, found, scalingFactor, numberOfNeighbours, 0, cv::Size(8, 8), cv::Size(32, 32)); | |
drawMarker(img, found, Scalar(0, 255, 0));//Green .BGR | |
lowerbody_cascade.detectMultiScale(frame, found, scalingFactor, numberOfNeighbours, 0, cv::Size(8, 8), cv::Size(32, 32)); | |
drawMarker(img, found, Scalar(0, 0, 255));//BGR | |
fullbody_cascade.detectMultiScale(frame, found, scalingFactor, numberOfNeighbours, 0, cv::Size(8, 8), cv::Size(32, 32)); | |
drawMarker(img, found, Scalar(255, 0, 0));//BGR | |
} | |
if (found.size() > 1) { | |
counter_frames_detected += 1; | |
} | |
oVideoWriter.write(img); | |
// imshow("opencv", img); | |
} | |
#pragma warning(disable:4996) | |
void getEnvSetting(String &videoFileP, String &opencvPath, String &outFileP, String &useAlgoP, bool &useGpuP) { | |
char* videopath = NULL; | |
videopath = getenv("VIDEO_PATH"); | |
if (videopath != NULL) { | |
videoFileP = videopath; | |
} | |
char* opencvpath = NULL; | |
opencvpath = getenv("OPENCV_PATH"); | |
if (opencvpath != NULL) { | |
opencvPath = opencvpath; | |
} | |
char* out_path = NULL; | |
out_path = getenv("OUT_PATH"); | |
if (out_path != NULL) { | |
outFileP = out_path; | |
} | |
char* usehog = NULL; | |
usehog = getenv("USE_ALGO"); | |
if (usehog != NULL) { | |
useAlgoP = usehog; | |
} | |
char* run_on_gpu = NULL; | |
run_on_gpu = getenv("USE_GPU"); | |
if (run_on_gpu != NULL) { | |
if (strcmp(run_on_gpu, "true") == 0) { | |
useGpuP = 1; | |
} | |
else | |
{ | |
useGpuP = 0; | |
} | |
} | |
} | |
// To run this you need OpenCV compiled with CUDA support (and a machine with CUDA compliant /NVDIA GPU card | |
// Based on the sample program from OpenCV - \opencv\samples\gpu\cascadeclassifier.cpp and other samples in net | |
int main(int argc, char* argv[]) | |
{ | |
cout << "A Simple Object detection test from Video" <<endl; | |
cout << "Set VIDEO_PATH, OPENCV_PATH, USE_GPU=<true/false> USE_ALGO=haar/hog OUT_PATH <output file *avi full path> for configuring" << endl; | |
///assert((win_stride_.width % block_stride_.width == 0 && win_stride_.height % block_stride_.height == 0)); | |
getEnvSetting(videoFile, opencv_path, outFile, useAlgo, useGpu); | |
cout << "videoFile = " << videoFile << endl; | |
cout << "opencvpath = " << opencv_path << endl; | |
cout << "Algorithm Used = " << useAlgo << endl; | |
cout << "run_on_gpu = " << useGpu << endl; | |
cout << "outFile = " << outFile << endl; | |
/** | |
Intialize the Algorithm Settings; The speed as well as false positives depended on these | |
Unfortunately there is no one setting that is good for all | |
**/ | |
VideoCapture cap(videoFile); // open the video file for reading | |
if (!cap.isOpened()) // if not success, exit program | |
{ | |
cout << " Cannot open the video file" << videoFile << endl; | |
return -1; | |
} | |
cout << " Opened the video file" << videoFile << endl; | |
double dWidth = cap.get(CV_CAP_PROP_FRAME_WIDTH); //get the width of frames of the video | |
double dHeight = cap.get(CV_CAP_PROP_FRAME_HEIGHT); //get the height of frames of the video | |
double totalfps = cap.get(CV_CAP_PROP_FRAME_COUNT); | |
Size frameSize(static_cast<int>(dWidth), static_cast<int>(dHeight)); | |
downFrameSize = frameSize; // If you dont want to re-size the frame you could uncomment this , it will take more CPU/GPU | |
cout << " Orginal Frame Size = " << dWidth << "x" << dHeight << endl; | |
cout << " Reduced Frame Size = " << downFrameSize << endl; | |
double fps = cap.get(CV_CAP_PROP_FPS); //get the frames per seconds of the video | |
cout << "Frame per seconds : " << fps << endl; | |
VideoWriter oVideoWriter(outFile, CV_FOURCC('D', 'I', 'V', 'X'), 3, downFrameSize, true); | |
if (!oVideoWriter.isOpened()) //if not initialize the VideoWriter successfully, exit the program | |
{ | |
cout << "ERROR: Failed to write the video" << endl; | |
return -1; | |
} | |
if (useGpu) { | |
if (cv::cuda::getCudaEnabledDeviceCount() == 0) { | |
cout << "No GPU found or the library is compiled without CUDA support" << endl; | |
return -1; | |
} | |
cv::cuda::printShortCudaDeviceInfo(cv::cuda::getDevice()); | |
if (useAlgo == "hog") { | |
// If you need to detect other objects you need to train it | |
// https://github.com/DaHoC/trainHOG | |
gpu_hog = cv::cuda::HOG::create(win_size, block_size, block_stride, cell_size, nbins); | |
Mat detector = gpu_hog->getDefaultPeopleDetector(); //this will select 48*96 or 64*128 based on window size | |
gpu_hog->setSVMDetector(detector); | |
cout << "Created the CUDA HOG Classifuer" << endl; | |
//cout << gpu_hog->getScaleFactor() << "---" << gpu_hog->getGroupThreshold() << endl; | |
} | |
else //use harr | |
{ | |
//The below are the path to the HAAR trained casrcades | |
//The below taken from http://alereimondo.no-ip.org/OpenCV/34.version?id=60 ; not for commercial use | |
String upperbody_cascade_name = opencv_path + "/data/HS22x20/HS.xml"; //head and sholders | |
//The below are CUDA Classisfier does not work with older format Cascade xmls; the below are from OpenCV source | |
String cuda_lowerbody_cascade_name = opencv_path + "/data/haarcascades_cuda/haarcascade_lowerbody.xml"; | |
String cuda_fullbody_cascade_name = opencv_path + "/data/haarcascades_cuda/haarcascade_fullbody.xml"; | |
cout << "head and Shoulder Cascade Name" << upperbody_cascade_name << "Colored GREEN Rectangle" << endl; | |
cout << "lowerbody_cascade_name" << cuda_lowerbody_cascade_name << "Colored BLUE Rectangle" << endl; | |
cout << "fullbody_cascade_name" << cuda_fullbody_cascade_name << "Colored RED Rectangle" << endl; | |
//Load the GPU/CUdA Compliant video cascaders | |
cascade_gpu_upperbody = cuda::CascadeClassifier::create(upperbody_cascade_name); | |
cascade_gpu_lowerbody = cuda::CascadeClassifier::create(cuda_lowerbody_cascade_name); | |
cascade_gpu_fullbody = cuda::CascadeClassifier::create(cuda_fullbody_cascade_name); | |
cout << "Created the CUDA HAAR Classifiers" << endl; | |
} | |
} | |
else //use CPU | |
{ | |
if (useAlgo == "haar") { | |
//The below are the path to the HAAR trained casrcades | |
//The below taken from http://alereimondo.no-ip.org/OpenCV/34.version?id=60 ; not for commercial use | |
String upperbody_cascade_name = opencv_path + "/data/HS22x20/HS.xml"; //head and sholders | |
String lowerbody_cascade_name = opencv_path + "/data/haarcascades/haarcascade_lowerbody.xml"; | |
String fullbody_cascade_name = opencv_path + "/data/haarcascades/haarcascade_fullbody.xml"; | |
cout << "head and Shoulder Cascade Name" << upperbody_cascade_name << "Colored GREEN Rectangle" << endl; | |
cout << "lowerbody_cascade_name" << lowerbody_cascade_name << "Colored BLUE Rectangle" << endl; | |
cout << "fullbody_cascade_name" << fullbody_cascade_name << "Colored RED Rectangle" << endl; | |
//-- 1. Load the cascades | |
if (!upperbody_cascade.load(upperbody_cascade_name)) { | |
printf("--(!)Error loading UpperBody\n"); | |
return -1; | |
}; | |
if (!lowerbody_cascade.load(lowerbody_cascade_name)) { | |
printf("--(!)Error loading lowerbody \n"); | |
return -1; | |
}; | |
if (!fullbody_cascade.load(fullbody_cascade_name)) { | |
printf("--(!)Error loading fullbody\n"); | |
return -1; | |
}; | |
cout << "Created the HAAR Classifiers" << endl; | |
} | |
else //use hog | |
{ | |
cpu_hog.setSVMDetector(cv::HOGDescriptor::getDaimlerPeopleDetector()); | |
cout << "Set the HOG Classifiers" << endl; | |
} | |
} | |
double delay = num_frames / fps; | |
cout << "Delay is " << delay << endl; | |
clock_t startTimeG = clock(); | |
bool doLoop = true; | |
while (doLoop) | |
{ | |
Mat frame, resized; | |
bool bSuccess = cap.read(frame); // read a new frame from video | |
if (!bSuccess) //if not success, break loop | |
{ | |
cout << "Cannot read the frame from video file" << endl; | |
doLoop = false; | |
break; | |
} | |
counter_frames_processed += 1; | |
cv::resize(frame, resized, downFrameSize);// resize the frame to something smaller- makes computatin faster | |
if (useAlgo == "hog") { | |
detectAndDisplayHOG(resized, oVideoWriter,useGpu); | |
} | |
else //haar | |
{ | |
detectAndDisplayHAAR(resized, oVideoWriter,useGpu); | |
} | |
clock_t endTime = clock() + delay; // this is a wrong way ; mabye multipy by CLOCKS_PER_SEC ? leaving it for now | |
while (clock() < endTime) { // This is the best my card supports | |
if (cap.read(frame)) { //read only one frame per | |
counter_frames_skipped += 1; | |
cout << "Frames processed = " << counter_frames_processed << " Frames found = " | |
<< counter_frames_detected << " Frames skipped = " << counter_frames_skipped | |
<< " Percentage processed = " << int( ((counter_frames_skipped + counter_frames_processed) / totalfps)* 100) | |
<< " % Time taken =" << (clock() - startTimeG) / 1000 << " seconds" | |
<< " \r"; | |
//cv::resize(frame, resized, Size(640, 480));// resize the frame to something smaller- makes computatin faster | |
//cv::putText(resized, "Skipping Frame", cvPoint(30, 30), | |
//FONT_HERSHEY_COMPLEX_SMALL, 1, cvScalar(0, 0, 0), 1, CV_AA); | |
//oVideoWriter.write(resized); | |
} | |
waitKey(1); | |
} | |
} | |
oVideoWriter.release(); | |
cout << "Total time taken = " << (clock() - startTimeG) / 1000 << " seconds" << endl; | |
cout << "counter_frames_processed = " << counter_frames_processed << endl; | |
cout << "counter_frames_skipped = " << counter_frames_skipped << endl; | |
cout << "counter_frames_detected = " << counter_frames_detected << endl; | |
return 0; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment