alexcpn/VideoTestHaar.cpp

## VideoTestHaar.cpp
#include "stdafx.h"
#include <iostream>
#include <time.h>

#include "opencv2/objdetect.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/cudaobjdetect.hpp"
#include "opencv2/cudaimgproc.hpp"
#include "opencv2/cudawarping.hpp"
#include "opencv2/core/cuda.hpp"

using namespace std;
using namespace cv;
using namespace cv::cuda;


/** Global variables */
String opencv_path = "D:/opencv";
String videoFile = "D:/Coding/resources/313060469_hd_720.mp4";
String outFile = "/tmp/out.avi";
String useAlgo = "hog";
bool useGpu = false;
int counter_frames_processed = 0;
int counter_frames_skipped = 0;
int counter_frames_detected = 0;
Size  downFrameSize(640, 480);


Ptr<cuda::CascadeClassifier> cascade_gpu_upperbody, cascade_gpu_lowerbody, cascade_gpu_fullbody;
Ptr<cv::cuda::HOG> gpu_hog;

/*
These are the setting for HOG Person detector; There is no one setting that is good for all
Using daimlerpeopledetector ,see where the SVM is set
Default people detector   getDefaultPeopleDetector work only with win_width = 48, with GPU it works with
win_width = 64 as well; but detection rate is very poor
 -->OpenCV Error : Assertion failed(checkDetectorSize()) in cv::HOGDescriptor::setSVMDetector

 */
int win_width = 48;
//48*96 rectangle is found for HOG
int cell_width = 8;
int nbins = 9;
int win_stride_width = 8;
int win_stride_height = win_stride_width;
int block_width = win_stride_width*2;
int num_frames = 1000; // Process 2 fps; usually video will have higher frame rate like 24 fps; We may not need to analze all

int hogLevels =  HOGDescriptor::DEFAULT_NLEVELS;
int hogGroupThreshold = 16;
/* From above these below are standard setting*/
Size win_stride(win_stride_width, win_stride_height);
Size win_size(win_width, win_width * 2);
Size block_size(block_width, block_width);
int block_stride_width = block_width / 2;
int block_stride_height = block_width / 2;
Size block_stride(block_stride_width, block_stride_height);
Size cell_size(cell_width, cell_width);

cv::HOGDescriptor cpu_hog(win_size, block_size, block_stride, cell_size, nbins, 1, -1,
	HOGDescriptor::L2Hys, .2, false, hogLevels);

cv::CascadeClassifier upperbody_cascade;
cv::CascadeClassifier lowerbody_cascade;
cv::CascadeClassifier fullbody_cascade;

/**
Sclar - BGR value
**/
void drawMarker(Mat img, std::vector<cv::Rect>  found, Scalar sc, int size = 2) {

	for (int i = 0; i < (int)found.size(); i++)
	{
		cv::Rect r = found[i];
		cv::rectangle(img, r, sc, size);
	}
}


/** @function detectAndDisplay using CPU  */
void detectAndDisplayHOG(Mat img, VideoWriter oVideoWriter, bool useGPU)
{
	Mat frame;
	std::vector<cv::Rect>  found;
	//The GroupThreshold and ScaleFactor are the two important parameters
	//decrease will get more hits, with more false positives
	int _hitThreshold = 0;// //going mad tuning this for cuda// not to be adjusted
	double _scaleFactor = 1.01;//1.05;// 20 sec --> huge impact on performance

	if (useGPU) {
		cv::cvtColor(img, frame, COLOR_BGR2BGRA);// COLOR_BGR2BGRA);
		GpuMat gpuFrame(frame);
		gpu_hog->setScaleFactor(_scaleFactor);
		gpu_hog->setNumLevels(hogLevels);
		gpu_hog->setWinStride(win_stride);
		//gpu_hog->setHitThreshold(0); // play with this at your own risk :)
		gpu_hog->setGroupThreshold(hogGroupThreshold);// setting it to higher will reduce false positives// give all
		gpu_hog->detectMultiScale(gpuFrame, found);
		drawMarker(img, found, Scalar(255, 0, 0), 1);//BGR
		gpu_hog->setGroupThreshold(hogGroupThreshold*3);// setting it to higher will group more
		gpu_hog->detectMultiScale(gpuFrame, found);
		drawMarker(img, found, Scalar(0, 255, 0));//BGR
	}
	else
	{
		//std::vector<DetectionROI> locations;
		//cpu_hog.detectMultiScaleROI(frame, found, locations, _hitThreshold, 0);
		cv::cvtColor(img, frame, COLOR_BGR2GRAY);//(img.type() == CV_8U || img.type() == CV_8UC3)
		cpu_hog.detectMultiScale(frame, found, _hitThreshold, win_stride, cv::Size(4, 4), _scaleFactor);
		drawMarker(img, found, Scalar(255, 0, 0));//BGR

	}
	if (found.size() > 1) {
		counter_frames_detected += 1;
	}
	oVideoWriter.write(img);
}

/** Helper funcitons**/
void setCudaClassifierProperties(Ptr<cuda::CascadeClassifier> classifier) {

	classifier->setScaleFactor(1.02); // The smaller it is the better, though tradeoff is processing (should be >1 )
	classifier->setMinNeighbors(3); // the larger this is there would be less false positives;
									// However it will also start to miss ;best is 3 to 4, but there are misses wiht this

}
/** Helper funcitons**/
void run_classifier_detection(Ptr<cuda::CascadeClassifier> classifier, GpuMat gpuGreyFrame, std::vector<cv::Rect>  *found) {
	GpuMat facesBuf_gpu;
	//Now let the cascaders run
	setCudaClassifierProperties(classifier);
	classifier->detectMultiScale(gpuGreyFrame, facesBuf_gpu);
	classifier->convert(facesBuf_gpu, *found);
}


/** @function detectAndDisplay using CPU  */
void detectAndDisplayHAAR(Mat img, VideoWriter oVideoWriter, bool useGPU)
{

	Mat frame;
	//cv::cvtColor(img,frame, COLOR_BGR2BGRA);does not work with HAAR
	cv::cvtColor(img, frame, COLOR_BGR2GRAY);
	std::vector<cv::Rect>  found;
	//-- Detect Upper body classifier
	// http://fewtutorials.bravesites.com/entries/emgu-cv-c/level-3c---how-to-improve-face-detection

	//Now let the cascaders run, we are running three cascades here
	// Running on GPU for HAAR is much faster than for CPU
	if (useGPU) {

		GpuMat gray_gpu(frame);// , gray_gpu, resized_gpu;
							   //Need to convert and resize before it is able to detect
		run_classifier_detection(cascade_gpu_upperbody, gray_gpu, &found);
		drawMarker(img, found, Scalar(0, 255, 0));//Green .BGR

		run_classifier_detection(cascade_gpu_fullbody, gray_gpu, &found);
		drawMarker(img, found, Scalar(0, 0, 255));//BGR

		run_classifier_detection(cascade_gpu_lowerbody, gray_gpu, &found);
		drawMarker(img, found, Scalar(255, 0, 0));//BGR

	}
	else {

		double scalingFactor = 1.05;// with 1.001,too much false positive
		int numberOfNeighbours = 3;

		upperbody_cascade.detectMultiScale(frame, found, scalingFactor, numberOfNeighbours, 0, cv::Size(8, 8), cv::Size(32, 32));
		drawMarker(img, found, Scalar(0, 255, 0));//Green .BGR

		lowerbody_cascade.detectMultiScale(frame, found, scalingFactor, numberOfNeighbours, 0, cv::Size(8, 8), cv::Size(32, 32));
		drawMarker(img, found, Scalar(0, 0, 255));//BGR

		fullbody_cascade.detectMultiScale(frame, found, scalingFactor, numberOfNeighbours, 0, cv::Size(8, 8), cv::Size(32, 32));
		drawMarker(img, found, Scalar(255, 0, 0));//BGR
	}
	if (found.size() > 1) {
		counter_frames_detected += 1;
	}
	oVideoWriter.write(img);
	//	imshow("opencv", img);

}

#pragma warning(disable:4996)
void  getEnvSetting(String &videoFileP, String &opencvPath, String &outFileP, String &useAlgoP, bool &useGpuP) {

	char* videopath = NULL;
	videopath = getenv("VIDEO_PATH");
	if (videopath != NULL) {
		videoFileP = videopath;
	}
	char* opencvpath = NULL;
	opencvpath = getenv("OPENCV_PATH");
	if (opencvpath != NULL) {
		opencvPath = opencvpath;
	}

	char* out_path = NULL;
	out_path = getenv("OUT_PATH");
	if (out_path != NULL) {
		outFileP = out_path;
	}

	char* usehog = NULL;
	usehog = getenv("USE_ALGO");
	if (usehog != NULL) {
		useAlgoP = usehog;
	}

	char* run_on_gpu = NULL;
	run_on_gpu = getenv("USE_GPU");
	if (run_on_gpu != NULL) {

		if (strcmp(run_on_gpu, "true") == 0) {
			useGpuP = 1;
		}
		else
		{
			useGpuP = 0;
		}
	}


}

// To run this you need OpenCV compiled with CUDA support (and a machine with CUDA compliant /NVDIA GPU card
// Based on the sample program from OpenCV - \opencv\samples\gpu\cascadeclassifier.cpp and other samples in net

int main(int argc, char* argv[])
{
	cout << "A Simple Object detection test from Video" <<endl;
	cout << "Set VIDEO_PATH, OPENCV_PATH, USE_GPU=<true/false> USE_ALGO=haar/hog OUT_PATH <output file *avi full path> for configuring" << endl;
	///assert((win_stride_.width % block_stride_.width == 0 && win_stride_.height % block_stride_.height == 0));
	getEnvSetting(videoFile, opencv_path, outFile, useAlgo, useGpu);
	cout << "videoFile = " << videoFile << endl;
	cout << "opencvpath = " << opencv_path << endl;
	cout << "Algorithm Used = " << useAlgo << endl;
	cout << "run_on_gpu = " << useGpu << endl;
	cout << "outFile = " << outFile << endl;

	/**
	Intialize the Algorithm Settings; The speed as well as false positives depended on these
	Unfortunately there is no one setting that is good for all
	**/

	VideoCapture cap(videoFile); // open the video file for reading
	if (!cap.isOpened())  // if not success, exit program
	{
		cout << " Cannot open the video file" << videoFile << endl;
		return -1;
	}
	cout << " Opened the video file" << videoFile << endl;

	double dWidth = cap.get(CV_CAP_PROP_FRAME_WIDTH); //get the width of frames of the video
	double dHeight = cap.get(CV_CAP_PROP_FRAME_HEIGHT); //get the height of frames of the video
	double totalfps = cap.get(CV_CAP_PROP_FRAME_COUNT);
	Size frameSize(static_cast<int>(dWidth), static_cast<int>(dHeight));
	downFrameSize = frameSize; // If you dont want to re-size the frame you could uncomment this , it will take more CPU/GPU
	cout << " Orginal Frame Size = " << dWidth << "x" << dHeight << endl;
	cout << " Reduced Frame Size = " << downFrameSize << endl;

	double fps = cap.get(CV_CAP_PROP_FPS); //get the frames per seconds of the video
	cout << "Frame per seconds : " << fps << endl;

	VideoWriter oVideoWriter(outFile, CV_FOURCC('D', 'I', 'V', 'X'), 3, downFrameSize, true);
	if (!oVideoWriter.isOpened()) //if not initialize the VideoWriter successfully, exit the program
	{
		cout << "ERROR: Failed to write the video" << endl;
		return -1;
	}

	if (useGpu) {

		if (cv::cuda::getCudaEnabledDeviceCount() == 0) {

			cout << "No GPU found or the library is compiled without CUDA support" << endl;
			return -1;
		}
		cv::cuda::printShortCudaDeviceInfo(cv::cuda::getDevice());

		if (useAlgo == "hog") {
			// If you need to detect other objects you need to train it
			// https://github.com/DaHoC/trainHOG
			gpu_hog = cv::cuda::HOG::create(win_size, block_size, block_stride, cell_size, nbins);
			Mat detector = gpu_hog->getDefaultPeopleDetector(); //this will select 48*96 or 64*128 based on window size
			gpu_hog->setSVMDetector(detector);
			cout << "Created the CUDA HOG Classifuer" << endl;
			//cout << gpu_hog->getScaleFactor() << "---" <<  gpu_hog->getGroupThreshold() << endl;
		}
		else //use harr
		{
			//The below are the path to the HAAR trained casrcades
			//The below taken from http://alereimondo.no-ip.org/OpenCV/34.version?id=60 ; not for commercial use
			String upperbody_cascade_name = opencv_path + "/data/HS22x20/HS.xml"; //head and sholders
																				  //The below are CUDA Classisfier does not work with older format Cascade xmls; the below are from OpenCV source
			String cuda_lowerbody_cascade_name = opencv_path + "/data/haarcascades_cuda/haarcascade_lowerbody.xml";
			String cuda_fullbody_cascade_name = opencv_path + "/data/haarcascades_cuda/haarcascade_fullbody.xml";

			cout << "head and Shoulder Cascade Name" << upperbody_cascade_name << "Colored GREEN Rectangle" << endl;
			cout << "lowerbody_cascade_name" << cuda_lowerbody_cascade_name << "Colored BLUE Rectangle" << endl;
			cout << "fullbody_cascade_name" << cuda_fullbody_cascade_name << "Colored RED Rectangle" << endl;

			//Load the GPU/CUdA Compliant  video cascaders
			cascade_gpu_upperbody = cuda::CascadeClassifier::create(upperbody_cascade_name);
			cascade_gpu_lowerbody = cuda::CascadeClassifier::create(cuda_lowerbody_cascade_name);
			cascade_gpu_fullbody = cuda::CascadeClassifier::create(cuda_fullbody_cascade_name);
			cout << "Created the CUDA HAAR Classifiers" << endl;
		}

	}
	else //use CPU
	{

		if (useAlgo == "haar") {

			//The below are the path to the HAAR trained casrcades
			//The below taken from http://alereimondo.no-ip.org/OpenCV/34.version?id=60 ; not for commercial use
			String upperbody_cascade_name = opencv_path + "/data/HS22x20/HS.xml"; //head and sholders
			String lowerbody_cascade_name = opencv_path + "/data/haarcascades/haarcascade_lowerbody.xml";
			String fullbody_cascade_name = opencv_path + "/data/haarcascades/haarcascade_fullbody.xml";

			cout << "head and Shoulder Cascade Name" << upperbody_cascade_name << "Colored GREEN Rectangle" << endl;
			cout << "lowerbody_cascade_name" << lowerbody_cascade_name << "Colored BLUE Rectangle" << endl;
			cout << "fullbody_cascade_name" << fullbody_cascade_name << "Colored RED Rectangle" << endl;

			//-- 1. Load the cascades
			if (!upperbody_cascade.load(upperbody_cascade_name)) {
				printf("--(!)Error loading UpperBody\n");
				return -1;
			};
			if (!lowerbody_cascade.load(lowerbody_cascade_name)) {
				printf("--(!)Error loading lowerbody \n");
				return -1;
			};

			if (!fullbody_cascade.load(fullbody_cascade_name)) {
				printf("--(!)Error loading fullbody\n");
				return -1;
			};

			cout << "Created the HAAR Classifiers" << endl;
		}
		else //use hog
		{
			cpu_hog.setSVMDetector(cv::HOGDescriptor::getDaimlerPeopleDetector());
			cout << "Set the HOG Classifiers" << endl;
		}

	}

	double delay = num_frames / fps;
	cout << "Delay is " << delay << endl;
	clock_t startTimeG = clock();
	bool doLoop = true;
	while (doLoop)
	{
		Mat frame, resized;

		bool bSuccess = cap.read(frame); // read a new frame from video
		if (!bSuccess) //if not success, break loop
		{
			cout << "Cannot read the frame from video file" << endl;
			doLoop = false;
			break;
		}
		counter_frames_processed += 1;

		cv::resize(frame, resized, downFrameSize);// resize the frame to something smaller- makes computatin faster
		if (useAlgo == "hog") {
			detectAndDisplayHOG(resized, oVideoWriter,useGpu);
		}
		else //haar
		{
			detectAndDisplayHAAR(resized, oVideoWriter,useGpu);
		}

		clock_t endTime = clock() + delay; // this is a wrong way ; mabye multipy by CLOCKS_PER_SEC ? leaving it for now
		while (clock()  < endTime) { // This is the best my card supports
			if (cap.read(frame)) { //read only one frame per
				counter_frames_skipped += 1;
				cout << "Frames processed = " << counter_frames_processed << " Frames found = "
					<< counter_frames_detected << " Frames skipped = " << counter_frames_skipped
					<<  " Percentage processed = " << int( ((counter_frames_skipped + counter_frames_processed) / totalfps)* 100)
					<< " % Time taken =" << (clock() - startTimeG) / 1000 << " seconds"
					<< " \r";
				//cv::resize(frame, resized, Size(640, 480));// resize the frame to something smaller- makes computatin faster
				//cv::putText(resized, "Skipping Frame", cvPoint(30, 30),
					//FONT_HERSHEY_COMPLEX_SMALL, 1, cvScalar(0, 0, 0), 1, CV_AA);
				//oVideoWriter.write(resized);
			}
			waitKey(1);
		}
	}
	oVideoWriter.release();
	cout << "Total time taken = " << (clock() - startTimeG) / 1000 << " seconds" << endl;
	cout << "counter_frames_processed = " << counter_frames_processed << endl;
	cout << "counter_frames_skipped = " << counter_frames_skipped << endl;
	cout << "counter_frames_detected = " << counter_frames_detected << endl;
	return 0;
}
	#include "stdafx.h"
	#include <iostream>
	#include <time.h>

	#include "opencv2/objdetect.hpp"
	#include "opencv2/highgui.hpp"
	#include "opencv2/imgproc.hpp"
	#include "opencv2/cudaobjdetect.hpp"
	#include "opencv2/cudaimgproc.hpp"
	#include "opencv2/cudawarping.hpp"
	#include "opencv2/core/cuda.hpp"

	using namespace std;
	using namespace cv;
	using namespace cv::cuda;


	/** Global variables */
	String opencv_path = "D:/opencv";
	String videoFile = "D:/Coding/resources/313060469_hd_720.mp4";
	String outFile = "/tmp/out.avi";
	String useAlgo = "hog";
	bool useGpu = false;
	int counter_frames_processed = 0;
	int counter_frames_skipped = 0;
	int counter_frames_detected = 0;
	Size downFrameSize(640, 480);


	Ptr<cuda::CascadeClassifier> cascade_gpu_upperbody, cascade_gpu_lowerbody, cascade_gpu_fullbody;
	Ptr<cv::cuda::HOG> gpu_hog;

	/*
	These are the setting for HOG Person detector; There is no one setting that is good for all
	Using daimlerpeopledetector ,see where the SVM is set
	Default people detector getDefaultPeopleDetector work only with win_width = 48, with GPU it works with
	win_width = 64 as well; but detection rate is very poor
	-->OpenCV Error : Assertion failed(checkDetectorSize()) in cv::HOGDescriptor::setSVMDetector

	*/
	int win_width = 48;
	//48*96 rectangle is found for HOG
	int cell_width = 8;
	int nbins = 9;
	int win_stride_width = 8;
	int win_stride_height = win_stride_width;
	int block_width = win_stride_width*2;
	int num_frames = 1000; // Process 2 fps; usually video will have higher frame rate like 24 fps; We may not need to analze all

	int hogLevels = HOGDescriptor::DEFAULT_NLEVELS;
	int hogGroupThreshold = 16;
	/* From above these below are standard setting*/
	Size win_stride(win_stride_width, win_stride_height);
	Size win_size(win_width, win_width * 2);
	Size block_size(block_width, block_width);
	int block_stride_width = block_width / 2;
	int block_stride_height = block_width / 2;
	Size block_stride(block_stride_width, block_stride_height);
	Size cell_size(cell_width, cell_width);

	cv::HOGDescriptor cpu_hog(win_size, block_size, block_stride, cell_size, nbins, 1, -1,
	HOGDescriptor::L2Hys, .2, false, hogLevels);

	cv::CascadeClassifier upperbody_cascade;
	cv::CascadeClassifier lowerbody_cascade;
	cv::CascadeClassifier fullbody_cascade;

	/**
	Sclar - BGR value
	**/
	void drawMarker(Mat img, std::vector<cv::Rect> found, Scalar sc, int size = 2) {

	for (int i = 0; i < (int)found.size(); i++)
	{
	cv::Rect r = found[i];
	cv::rectangle(img, r, sc, size);
	}
	}


	/** @function detectAndDisplay using CPU */
	void detectAndDisplayHOG(Mat img, VideoWriter oVideoWriter, bool useGPU)
	{
	Mat frame;
	std::vector<cv::Rect> found;
	//The GroupThreshold and ScaleFactor are the two important parameters
	//decrease will get more hits, with more false positives
	int _hitThreshold = 0;// //going mad tuning this for cuda// not to be adjusted
	double _scaleFactor = 1.01;//1.05;// 20 sec --> huge impact on performance

	if (useGPU) {
	cv::cvtColor(img, frame, COLOR_BGR2BGRA);// COLOR_BGR2BGRA);
	GpuMat gpuFrame(frame);
	gpu_hog->setScaleFactor(_scaleFactor);
	gpu_hog->setNumLevels(hogLevels);
	gpu_hog->setWinStride(win_stride);
	//gpu_hog->setHitThreshold(0); // play with this at your own risk :)
	gpu_hog->setGroupThreshold(hogGroupThreshold);// setting it to higher will reduce false positives// give all
	gpu_hog->detectMultiScale(gpuFrame, found);
	drawMarker(img, found, Scalar(255, 0, 0), 1);//BGR
	gpu_hog->setGroupThreshold(hogGroupThreshold*3);// setting it to higher will group more
	gpu_hog->detectMultiScale(gpuFrame, found);
	drawMarker(img, found, Scalar(0, 255, 0));//BGR
	}
	else
	{
	//std::vector<DetectionROI> locations;
	//cpu_hog.detectMultiScaleROI(frame, found, locations, _hitThreshold, 0);
	cv::cvtColor(img, frame, COLOR_BGR2GRAY);//(img.type() == CV_8U \|\| img.type() == CV_8UC3)
	cpu_hog.detectMultiScale(frame, found, _hitThreshold, win_stride, cv::Size(4, 4), _scaleFactor);
	drawMarker(img, found, Scalar(255, 0, 0));//BGR

	}
	if (found.size() > 1) {
	counter_frames_detected += 1;
	}
	oVideoWriter.write(img);
	}

	/ Helper funcitons/
	void setCudaClassifierProperties(Ptr<cuda::CascadeClassifier> classifier) {

	classifier->setScaleFactor(1.02); // The smaller it is the better, though tradeoff is processing (should be >1 )
	classifier->setMinNeighbors(3); // the larger this is there would be less false positives;
	// However it will also start to miss ;best is 3 to 4, but there are misses wiht this

	}
	/ Helper funcitons/
	void run_classifier_detection(Ptr<cuda::CascadeClassifier> classifier, GpuMat gpuGreyFrame, std::vector<cv::Rect> *found) {
	GpuMat facesBuf_gpu;
	//Now let the cascaders run
	setCudaClassifierProperties(classifier);
	classifier->detectMultiScale(gpuGreyFrame, facesBuf_gpu);
	classifier->convert(facesBuf_gpu, *found);
	}


	/** @function detectAndDisplay using CPU */
	void detectAndDisplayHAAR(Mat img, VideoWriter oVideoWriter, bool useGPU)
	{

	Mat frame;
	//cv::cvtColor(img,frame, COLOR_BGR2BGRA);does not work with HAAR
	cv::cvtColor(img, frame, COLOR_BGR2GRAY);
	std::vector<cv::Rect> found;
	//-- Detect Upper body classifier
	// http://fewtutorials.bravesites.com/entries/emgu-cv-c/level-3c---how-to-improve-face-detection

	//Now let the cascaders run, we are running three cascades here
	// Running on GPU for HAAR is much faster than for CPU
	if (useGPU) {

	GpuMat gray_gpu(frame);// , gray_gpu, resized_gpu;
	//Need to convert and resize before it is able to detect
	run_classifier_detection(cascade_gpu_upperbody, gray_gpu, &found);
	drawMarker(img, found, Scalar(0, 255, 0));//Green .BGR

	run_classifier_detection(cascade_gpu_fullbody, gray_gpu, &found);
	drawMarker(img, found, Scalar(0, 0, 255));//BGR

	run_classifier_detection(cascade_gpu_lowerbody, gray_gpu, &found);
	drawMarker(img, found, Scalar(255, 0, 0));//BGR

	}
	else {

	double scalingFactor = 1.05;// with 1.001,too much false positive
	int numberOfNeighbours = 3;

	upperbody_cascade.detectMultiScale(frame, found, scalingFactor, numberOfNeighbours, 0, cv::Size(8, 8), cv::Size(32, 32));
	drawMarker(img, found, Scalar(0, 255, 0));//Green .BGR

	lowerbody_cascade.detectMultiScale(frame, found, scalingFactor, numberOfNeighbours, 0, cv::Size(8, 8), cv::Size(32, 32));
	drawMarker(img, found, Scalar(0, 0, 255));//BGR

	fullbody_cascade.detectMultiScale(frame, found, scalingFactor, numberOfNeighbours, 0, cv::Size(8, 8), cv::Size(32, 32));
	drawMarker(img, found, Scalar(255, 0, 0));//BGR
	}
	if (found.size() > 1) {
	counter_frames_detected += 1;
	}
	oVideoWriter.write(img);
	// imshow("opencv", img);

	}

	#pragma warning(disable:4996)
	void getEnvSetting(String &videoFileP, String &opencvPath, String &outFileP, String &useAlgoP, bool &useGpuP) {

	char* videopath = NULL;
	videopath = getenv("VIDEO_PATH");
	if (videopath != NULL) {
	videoFileP = videopath;
	}
	char* opencvpath = NULL;
	opencvpath = getenv("OPENCV_PATH");
	if (opencvpath != NULL) {
	opencvPath = opencvpath;
	}

	char* out_path = NULL;
	out_path = getenv("OUT_PATH");
	if (out_path != NULL) {
	outFileP = out_path;
	}

	char* usehog = NULL;
	usehog = getenv("USE_ALGO");
	if (usehog != NULL) {
	useAlgoP = usehog;
	}

	char* run_on_gpu = NULL;
	run_on_gpu = getenv("USE_GPU");
	if (run_on_gpu != NULL) {

	if (strcmp(run_on_gpu, "true") == 0) {
	useGpuP = 1;
	}
	else
	{
	useGpuP = 0;
	}
	}


	}

	// To run this you need OpenCV compiled with CUDA support (and a machine with CUDA compliant /NVDIA GPU card
	// Based on the sample program from OpenCV - \opencv\samples\gpu\cascadeclassifier.cpp and other samples in net

	int main(int argc, char* argv[])
	{
	cout << "A Simple Object detection test from Video" <<endl;
	cout << "Set VIDEO_PATH, OPENCV_PATH, USE_GPU=<true/false> USE_ALGO=haar/hog OUT_PATH <output file *avi full path> for configuring" << endl;
	///assert((win_stride_.width % block_stride_.width == 0 && win_stride_.height % block_stride_.height == 0));
	getEnvSetting(videoFile, opencv_path, outFile, useAlgo, useGpu);
	cout << "videoFile = " << videoFile << endl;
	cout << "opencvpath = " << opencv_path << endl;
	cout << "Algorithm Used = " << useAlgo << endl;
	cout << "run_on_gpu = " << useGpu << endl;
	cout << "outFile = " << outFile << endl;

	/**
	Intialize the Algorithm Settings; The speed as well as false positives depended on these
	Unfortunately there is no one setting that is good for all
	**/

	VideoCapture cap(videoFile); // open the video file for reading
	if (!cap.isOpened()) // if not success, exit program
	{
	cout << " Cannot open the video file" << videoFile << endl;
	return -1;
	}
	cout << " Opened the video file" << videoFile << endl;

	double dWidth = cap.get(CV_CAP_PROP_FRAME_WIDTH); //get the width of frames of the video
	double dHeight = cap.get(CV_CAP_PROP_FRAME_HEIGHT); //get the height of frames of the video
	double totalfps = cap.get(CV_CAP_PROP_FRAME_COUNT);
	Size frameSize(static_cast<int>(dWidth), static_cast<int>(dHeight));
	downFrameSize = frameSize; // If you dont want to re-size the frame you could uncomment this , it will take more CPU/GPU
	cout << " Orginal Frame Size = " << dWidth << "x" << dHeight << endl;
	cout << " Reduced Frame Size = " << downFrameSize << endl;

	double fps = cap.get(CV_CAP_PROP_FPS); //get the frames per seconds of the video
	cout << "Frame per seconds : " << fps << endl;

	VideoWriter oVideoWriter(outFile, CV_FOURCC('D', 'I', 'V', 'X'), 3, downFrameSize, true);
	if (!oVideoWriter.isOpened()) //if not initialize the VideoWriter successfully, exit the program
	{
	cout << "ERROR: Failed to write the video" << endl;
	return -1;
	}

	if (useGpu) {

	if (cv::cuda::getCudaEnabledDeviceCount() == 0) {

	cout << "No GPU found or the library is compiled without CUDA support" << endl;
	return -1;
	}
	cv::cuda::printShortCudaDeviceInfo(cv::cuda::getDevice());

	if (useAlgo == "hog") {
	// If you need to detect other objects you need to train it
	// https://github.com/DaHoC/trainHOG
	gpu_hog = cv::cuda::HOG::create(win_size, block_size, block_stride, cell_size, nbins);
	Mat detector = gpu_hog->getDefaultPeopleDetector(); //this will select 4896 or 64128 based on window size
	gpu_hog->setSVMDetector(detector);
	cout << "Created the CUDA HOG Classifuer" << endl;
	//cout << gpu_hog->getScaleFactor() << "---" << gpu_hog->getGroupThreshold() << endl;
	}
	else //use harr
	{
	//The below are the path to the HAAR trained casrcades
	//The below taken from http://alereimondo.no-ip.org/OpenCV/34.version?id=60 ; not for commercial use
	String upperbody_cascade_name = opencv_path + "/data/HS22x20/HS.xml"; //head and sholders
	//The below are CUDA Classisfier does not work with older format Cascade xmls; the below are from OpenCV source
	String cuda_lowerbody_cascade_name = opencv_path + "/data/haarcascades_cuda/haarcascade_lowerbody.xml";
	String cuda_fullbody_cascade_name = opencv_path + "/data/haarcascades_cuda/haarcascade_fullbody.xml";

	cout << "head and Shoulder Cascade Name" << upperbody_cascade_name << "Colored GREEN Rectangle" << endl;
	cout << "lowerbody_cascade_name" << cuda_lowerbody_cascade_name << "Colored BLUE Rectangle" << endl;
	cout << "fullbody_cascade_name" << cuda_fullbody_cascade_name << "Colored RED Rectangle" << endl;

	//Load the GPU/CUdA Compliant video cascaders
	cascade_gpu_upperbody = cuda::CascadeClassifier::create(upperbody_cascade_name);
	cascade_gpu_lowerbody = cuda::CascadeClassifier::create(cuda_lowerbody_cascade_name);
	cascade_gpu_fullbody = cuda::CascadeClassifier::create(cuda_fullbody_cascade_name);
	cout << "Created the CUDA HAAR Classifiers" << endl;
	}

	}
	else //use CPU
	{

	if (useAlgo == "haar") {

	//The below are the path to the HAAR trained casrcades
	//The below taken from http://alereimondo.no-ip.org/OpenCV/34.version?id=60 ; not for commercial use
	String upperbody_cascade_name = opencv_path + "/data/HS22x20/HS.xml"; //head and sholders
	String lowerbody_cascade_name = opencv_path + "/data/haarcascades/haarcascade_lowerbody.xml";
	String fullbody_cascade_name = opencv_path + "/data/haarcascades/haarcascade_fullbody.xml";

	cout << "head and Shoulder Cascade Name" << upperbody_cascade_name << "Colored GREEN Rectangle" << endl;
	cout << "lowerbody_cascade_name" << lowerbody_cascade_name << "Colored BLUE Rectangle" << endl;
	cout << "fullbody_cascade_name" << fullbody_cascade_name << "Colored RED Rectangle" << endl;

	//-- 1. Load the cascades
	if (!upperbody_cascade.load(upperbody_cascade_name)) {
	printf("--(!)Error loading UpperBody\n");
	return -1;
	};
	if (!lowerbody_cascade.load(lowerbody_cascade_name)) {
	printf("--(!)Error loading lowerbody \n");
	return -1;
	};

	if (!fullbody_cascade.load(fullbody_cascade_name)) {
	printf("--(!)Error loading fullbody\n");
	return -1;
	};

	cout << "Created the HAAR Classifiers" << endl;
	}
	else //use hog
	{
	cpu_hog.setSVMDetector(cv::HOGDescriptor::getDaimlerPeopleDetector());
	cout << "Set the HOG Classifiers" << endl;
	}

	}

	double delay = num_frames / fps;
	cout << "Delay is " << delay << endl;
	clock_t startTimeG = clock();
	bool doLoop = true;
	while (doLoop)
	{
	Mat frame, resized;

	bool bSuccess = cap.read(frame); // read a new frame from video
	if (!bSuccess) //if not success, break loop
	{
	cout << "Cannot read the frame from video file" << endl;
	doLoop = false;
	break;
	}
	counter_frames_processed += 1;

	cv::resize(frame, resized, downFrameSize);// resize the frame to something smaller- makes computatin faster
	if (useAlgo == "hog") {
	detectAndDisplayHOG(resized, oVideoWriter,useGpu);
	}
	else //haar
	{
	detectAndDisplayHAAR(resized, oVideoWriter,useGpu);
	}

	clock_t endTime = clock() + delay; // this is a wrong way ; mabye multipy by CLOCKS_PER_SEC ? leaving it for now
	while (clock() < endTime) { // This is the best my card supports
	if (cap.read(frame)) { //read only one frame per
	counter_frames_skipped += 1;
	cout << "Frames processed = " << counter_frames_processed << " Frames found = "
	<< counter_frames_detected << " Frames skipped = " << counter_frames_skipped
	<< " Percentage processed = " << int( ((counter_frames_skipped + counter_frames_processed) / totalfps)* 100)
	<< " % Time taken =" << (clock() - startTimeG) / 1000 << " seconds"
	<< " \r";
	//cv::resize(frame, resized, Size(640, 480));// resize the frame to something smaller- makes computatin faster
	//cv::putText(resized, "Skipping Frame", cvPoint(30, 30),
	//FONT_HERSHEY_COMPLEX_SMALL, 1, cvScalar(0, 0, 0), 1, CV_AA);
	//oVideoWriter.write(resized);
	}
	waitKey(1);
	}
	}
	oVideoWriter.release();
	cout << "Total time taken = " << (clock() - startTimeG) / 1000 << " seconds" << endl;
	cout << "counter_frames_processed = " << counter_frames_processed << endl;
	cout << "counter_frames_skipped = " << counter_frames_skipped << endl;
	cout << "counter_frames_detected = " << counter_frames_detected << endl;
	return 0;
	}