Skip to content

Instantly share code, notes, and snippets.

@tlaitinen
Created January 18, 2017 10:08
Show Gist options
  • Save tlaitinen/71796ea690d6220030f14e0f39c097b4 to your computer and use it in GitHub Desktop.
Save tlaitinen/71796ea690d6220030f14e0f39c097b4 to your computer and use it in GitHub Desktop.
OpenCV test to detect and follow faces in a video
#include "opencv2/objdetect.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/videoio.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/face.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/core/utility.hpp"
#include "opencv2/core/core_c.h"
#include "opencv2/videoio/videoio_c.h"
#include "opencv2/highgui/highgui_c.h"
#include <opencv2/video/background_segm.hpp>
#include <opencv2/video/tracking.hpp>
#include <opencv2/core/ocl.hpp>
#include <cctype>
#include <iostream>
#include <iterator>
#include <stdio.h>
#include <deque>
#include <set>
#define FACE_SIZE 128
#define FACE_DETECT_MIN_SIZE 90
#define EYE_DETECT_MIN_SIZE 10
#define FACE_THRESHOLD 80.0
#define MAX_FACE_IMAGES 15
#define MAX_FEATURE_ERROR 10
#define FACE_CONFIRMATION_TIME 30
#define FACE_CONFIRMATION_COUNT 2
#define FACE_EXPIRY_TIME 100
#define CORNER_EXPIRY_TIME 5
#define OVERLAP_FORCE_MATCH_DIST 100
#define OVERLAP_FORCE_MATCH_TIME 20
#define TRANSPOSE 0
using namespace std;
using namespace cv;
string cascadeName = "data/haarcascades/haarcascade_frontalface_alt.xml";
string eyesName = "data/haarcascades/haarcascade_eye_tree_eyeglasses.xml";
string noseName = "data/haarcascades/haarcascade_mcs_nose.xml";
string mouthName = "data/haarcascades/haarcascade_mcs_mouth.xml";
struct Face {
int id;
Rect rect;
Mat img;
deque<Mat> faceImgs;
Rect imgRect;
vector<Point2f> corners;
vector<int> cornerFrames;
vector<Point2f> trail;
int lastMatch;
int firstFrame;
int lastFrame;
int nestedCount;
public:
void pushTrail(const Point2f& p) {
trail.push_back(p);
}
void pushTrail(const Rect& r) {
pushTrail(Point2f(r.x + r.width / 2, r.y + r.height / 2));
}
Face(int id_, const Rect& r, const Mat& i, const Mat& fi, const Rect& ir, const vector<Point2f>& cs, int num, int nestedCount_)
: id(id_), rect(r), img(i), imgRect(ir), corners(cs), firstFrame(num), lastFrame(num), nestedCount(nestedCount_) {
pushTrail(r);
faceImgs.push_back(fi);
lastMatch = num;
cornerFrames.resize(corners.size());
for (vector<int>::iterator cf = cornerFrames.begin(); cf != cornerFrames.end(); cf++)
*cf = num;
}
};
class Detector {
vector<Face> faces;
Mat gray;
int nextId;
Ptr<cv::face::FaceRecognizer> recognizer;
bool trained;
Rect detectArea;
bool overlapsFace(const Rect& r) {
for (vector<Face>::iterator f = faces.begin(); f != faces.end(); f++) {
Rect i = r & f->rect;
if (i.width > 0)
return true;
}
return false;
}
vector<Face>::iterator closestFace(const Point& p) {
double d = 100000;
vector<Face>::iterator res = faces.end();
for (vector<Face>::iterator f = faces.begin(); f != faces.end(); f++) {
if (f->rect.contains(p)) {
Point c(f->rect.x + f->rect.width / 2, f->rect.y + f->rect.height / 2);
double dist = cv::norm(p-c);
if (dist < d && dist < OVERLAP_FORCE_MATCH_DIST) {
d = dist;
res = f;
}
}
}
return res;
}
Rect makeImgRect(Rect r) {
Rect imgRect = r;
imgRect.x -= imgRect.width/2;
imgRect.y -= imgRect.height/2;
imgRect.width *= 2;
imgRect.height *= 2;
if (imgRect.x < 0)
imgRect.x = 0;
if (imgRect.y < 0)
imgRect.y = 0;
if (imgRect.x + imgRect.width > gray.cols)
imgRect.width = gray.cols - imgRect.x;
if (imgRect.y + imgRect.height > gray.rows)
imgRect.height = gray.rows - imgRect.y;
return imgRect;
}
void drawFaces(Mat& screen) {
int y = 0,x;
vector<pair<Mat, Rect> > copies;
for (vector<Face>::const_iterator f = faces.begin(); f != faces.end(); f++) {
x = 0;
for (deque<Mat>::const_iterator fi = f->faceImgs.begin(); fi != f->faceImgs.end(); fi++) {
Rect dst(x*FACE_SIZE/2, y*FACE_SIZE/2, FACE_SIZE/2, FACE_SIZE/2);
copies.push_back(make_pair(*fi, dst));
x++;
}
y++;
}
for (size_t i = 0; i < copies.size(); i++) {
Mat roi = screen(copies[i].second);
Mat src;
cvtColor(copies[i].first, src, CV_GRAY2RGB);
resize(src,src, Size(), 0.5, 0.5);
src.copyTo(roi);
}
}
void updateFaces() {
vector<Mat> images;
vector<int> labels;
for (vector<Face>::const_iterator f = faces.begin(); f != faces.end(); f++) {
for (deque<Mat>::const_iterator fi = f->faceImgs.begin(); fi != f->faceImgs.end(); fi++) {
images.push_back(*fi);
labels.push_back(f->id);
}
}
if (images.empty())
return;
if (!trained) {
recognizer->train(images, labels);
trained = true;
} else {
// recognizer->update(images, labels);
recognizer->train(images, labels);
}
}
void cropAndResizeFaceImage(Mat& face) {
Rect r;
r.width = min(face.cols, face.rows);
r.height = min(face.cols, face.rows);
r.x = (face.cols - r.width) / 2;
r.y = (face.rows - r.height) / 2;
Mat tmp = face(r);
cv::resize(tmp, face, cv::Size(FACE_SIZE, FACE_SIZE));
}
void forceMatchFace(vector<Face>::iterator f, const Rect& r, const Mat& roiColor, const Mat& roi, const Mat& faceImg, const Rect& imgRect,
const vector<Point2f>& corners, int num, int nestedCount) {
char buf[32];
snprintf(buf, 32, "faces/%02d-%04d.jpg", f->id, num);
imwrite(buf, roiColor);
f->rect = r;
f->img = roi;
f->faceImgs.push_back(faceImg);
if (f->faceImgs.size() > MAX_FACE_IMAGES) {
f->faceImgs.pop_front();
}
f->imgRect = imgRect;
f->corners = corners;
f->cornerFrames.resize(corners.size());
f->nestedCount += nestedCount;
f->pushTrail(r);
for (vector<int>::iterator cf = f->cornerFrames.begin(); cf != f->cornerFrames.end(); cf++)
*cf = num;
f->lastFrame = num;
}
bool matchFace(const Rect& r, const Mat& roiColor, const Mat& roi, const Mat& faceImg, const Rect& imgRect,
const vector<Point2f>& corners, int num, int foundNested) {
int label;
double confidence;
recognizer->predict(faceImg, label, confidence);
cerr << "match label = " << label << endl;
cerr << "condidence = " << confidence << endl;
cerr << "nested count = " << foundNested << endl;
for (vector<Face>::iterator f = faces.begin(); f != faces.end(); f++) {
if (f->id == label) {
forceMatchFace(f, r, roiColor, roi, faceImg, imgRect, corners, num, foundNested);
return true;
}
}
return false;
}
CascadeClassifier cascade;
CascadeClassifier eyes;
CascadeClassifier mouth;
CascadeClassifier nose;
public:
Detector() : nextId(1),
recognizer(cv::face::createLBPHFaceRecognizer(1,8,8,8, FACE_THRESHOLD)), trained(false) {
cascade.load( cascadeName );
eyes.load(eyesName);
mouth.load(mouthName);
nose.load(noseName);
}
void setDetectArea(const Rect& detectArea_) {
detectArea = detectArea_;
}
void adjustDetectArea(double x, double y, bool topLeft) {
if (topLeft) {
detectArea.width -= x - detectArea.x;
detectArea.height -= y - detectArea.y;
detectArea.x = x;
detectArea.y = y;
} else {
detectArea.width = x - detectArea.x;
detectArea.height = y - detectArea.y;
}
}
int nestedDetect(const Rect& r, Mat& img, const Mat& face, CascadeClassifier& cc, const string& name) {
vector<Rect> bonusRects;
cout << "detecting " << name << endl;
cc.detectMultiScale(face, bonusRects,
1.1, 2 , 0
//|CASCADE_FIND_BIGGEST_OBJECT
//|CASCADE_DO_ROUGH_SEARCH
|CASCADE_SCALE_IMAGE
,
Size(EYE_DETECT_MIN_SIZE, EYE_DETECT_MIN_SIZE) );
cout << "Found " << bonusRects.size() << " " << name << endl;
for (vector<Rect>::iterator er = bonusRects.begin(); er != bonusRects.end(); er++) {
er->x += r.x;
er->y += r.y;
rectangle( img, cvPoint(cvRound(er->x), cvRound(er->y)),
cvPoint(cvRound((er->x + er->width-1)), cvRound((er->y + er->height-1))),
CV_RGB(255,255,0), 2, 8, 0);
}
return bonusRects.size();
}
void detectAndDraw( int num, Mat& img, Mat& imgHsv, VideoWriter& writer)
{
int i = 0;
double t = 0;
vector<Rect> rects;
Mat orig = img.clone();
bool facesChanged = false;
const static Scalar colors[] = { CV_RGB(0,0,255),
CV_RGB(0,128,255),
CV_RGB(0,255,255),
CV_RGB(0,255,0),
CV_RGB(255,128,0),
CV_RGB(255,255,0),
CV_RGB(255,0,0),
CV_RGB(255,0,255)} ;
cvtColor( img, gray, COLOR_BGR2GRAY );
equalizeHist( gray, gray );
int maxCorners = 64;
double qualityLevel = 0.01;
double minDistance = 5;
int blockSize = 3;
vector<cv::Point2f> corners;
int win_size = 15;
t = (double)cvGetTickCount();
if (num%5==0) {
Rect r = detectArea;
rectangle(img, Point(r.x, r.y), Point(r.x+r.width, r.y+r.height), colors[4], 1, 8, 0);
cout << "detecting faces" << endl;
cascade.detectMultiScale( gray(detectArea), rects,
1.05, 3, 0
//|CASCADE_FIND_BIGGEST_OBJECT
//|CASCADE_DO_ROUGH_SEARCH
|CASCADE_SCALE_IMAGE
,
Size(FACE_DETECT_MIN_SIZE, FACE_DETECT_MIN_SIZE) );
t = (double)cvGetTickCount() - t;
printf( "detection time = %g ms\n", t/((double)cvGetTickFrequency()*1000.) );
for( vector<Rect>::iterator r = rects.begin(); r != rects.end(); r++, i++ )
{
cout << "face of size " << r->width << "x" << r->height << endl;
r->x += detectArea.x;
r->y += detectArea.y;
Point center;
Scalar color = colors[i%8];
float skinSum=0;
for (int y=r->y; y<r->y+r->height; y++)
{
for (int x=r->x; x<r->x+r->width; x++)
{
Vec3b bgr = img.at<Vec3b>(y, x);
Vec3b hsv = imgHsv.at<Vec3b>(y, x);
float cb = 0.148*bgr.val[2] - 0.291*bgr.val[1] + 0.439*bgr.val[0] + 128;
float cr = 0.439*bgr.val[2] - 0.368*bgr.val[1] - 0.071*bgr.val[0] + 128;
if ( ( hsv.val[0]>245 || hsv.val[0]<25.5) && 140<=cr && cr<=165 && 140<=cb && cb<=195)
{
skinSum++;
}
}
}
//if less than 30% skin, this face doesnt count
if (skinSum / (r->width*r->height) < 0.3)
{
continue;
//break;
}
Mat faceImg = gray(*r).clone();
int eyesCount = nestedDetect(*r, img, faceImg, eyes, "eyes");
// nestedDetect(*r, img, faceImg ,mouth, "mouth");
int noseCount = nestedDetect(*r, img, faceImg, nose, "nose");
int nestedCount = eyesCount + noseCount;
cropAndResizeFaceImage(faceImg);
Rect imgRect = makeImgRect(*r);
Mat roi = gray(imgRect).clone();
Mat roiColor = orig(*r).clone();
Rect maskRect = *r;
maskRect.x -= imgRect.x;
maskRect.y -= imgRect.y;
Mat mask = cv::Mat::zeros(imgRect.height, imgRect.width, CV_8U);
mask(maskRect) = 1;
cv::goodFeaturesToTrack(roi, corners, maxCorners, qualityLevel, minDistance, mask, blockSize, false, 0);
if (corners.empty())
continue;
cornerSubPix( roi, corners, Size( win_size, win_size ), Size( -1, -1 ),
TermCriteria( CV_TERMCRIT_ITER | CV_TERMCRIT_EPS, 20, 0.03 ) );
for (size_t i = 0; i < corners.size(); i++) {
Point center = corners[i];
center.x += imgRect.x;
center.y += imgRect.y;
circle(img, center, 2, colors[2], 1, 8, 0);
}
if (trained && matchFace(*r, roiColor, roi, faceImg, imgRect, corners, num, nestedCount)) {
facesChanged = true;
continue;
} else {
if (!trained) {
cout << "not trained" << endl;
}
}
Point middle(r->x + r->width / 2, r->y + r->height / 2);
vector<Face>::iterator o = closestFace(middle);
if (o != faces.end()) {
if (num - o->lastMatch < OVERLAP_FORCE_MATCH_TIME) {
cout << "forcing match!" << endl;
forceMatchFace(o, *r, roiColor, roi, faceImg, imgRect, corners, num, nestedCount);
facesChanged = true;
}
continue;
}
if (overlapsFace(*r)) {
cout << "overlaps with a face. not doing anything" << endl;
continue;
}
faces.push_back(Face(nextId++, *r, roi, faceImg, imgRect, corners, num, nestedCount));
facesChanged = true;
rectangle( img, cvPoint(cvRound(r->x), cvRound(r->y)),
cvPoint(cvRound((r->x + r->width-1)), cvRound((r->y + r->height-1))),
colors[0], 3, 8, 0);
}
}
for (size_t i = 0; i < faces.size(); ) {
if ((num - faces[i].firstFrame > FACE_CONFIRMATION_TIME
&& faces[i].nestedCount < FACE_CONFIRMATION_COUNT)
||
(num - faces[i].lastFrame > FACE_EXPIRY_TIME)) {
cout << "removing face " << faces[i].id << " nestedCount=" << faces[i].nestedCount << endl;
faces[i] = faces.back();
faces.pop_back();
facesChanged = true;
} else
i++;
}
for (vector<Face>::iterator f = faces.begin(); f != faces.end(); f++) {
Point2f prev(0,0);
if (faces[i].nestedCount < FACE_CONFIRMATION_COUNT)
continue;
for (vector<Point2f>::iterator p = f->trail.begin(); p != f->trail.end(); p++) {
if (prev.x != 0) {
line( img, prev, *p, CV_RGB(255,0,255), 1 );
}
prev = *p;
}
}
for (vector<Face>::iterator f = faces.begin(); f != faces.end(); f++) {
corners.clear();
Mat roi = gray(f->imgRect);
for (size_t i = 0; i < f->corners.size(); i++) {
if (num - f->cornerFrames[i] > CORNER_EXPIRY_TIME) {
f->cornerFrames[i] = f->cornerFrames.back();
f->corners[i] = f->corners.back();
f->cornerFrames.pop_back();
f->corners.pop_back();
} else
i++;
}
if (f->corners.empty())
continue;
std::vector<uchar> featuresFound;
featuresFound.reserve(maxCorners);
std::vector<float> featureErrors;
featureErrors.reserve(maxCorners);
calcOpticalFlowPyrLK( f->img, roi, f->corners, corners, featuresFound, featureErrors ,
Size( win_size, win_size ), 5,
cvTermCriteria( CV_TERMCRIT_ITER | CV_TERMCRIT_EPS, 20, 0.3 ), 0 );
f->lastFrame = num;
Point minP, maxP;
minP.x = 1000;
minP.y = 1000;
maxP.x = 0;
maxP.y = 0;
Point diff;
diff.x = 0;
diff.y = 0;
float numDiffs = 0;
for( size_t i=0; i < featuresFound.size(); i++ ){
Point p0( f->imgRect.x + f->corners[i].x , f->imgRect.y + f->corners[i].y );
Point p1( f->imgRect.x + corners[i].x , f->imgRect.y + corners[i].y );
if (!featuresFound[i]) {
continue;
}
if (featureErrors[i] > MAX_FEATURE_ERROR)
continue;
line( img, p0, p1, CV_RGB(255,255,255), 2 );
f->corners[i] = corners[i];
f->cornerFrames[i] = num;
diff += p1 - p0;
if (p1.x < minP.x)
minP.x = p1.x;
if (p1.y < minP.y)
minP.y = p1.y;
if (p1.x > maxP.x)
maxP.x = p1.x;
if (p1.y > maxP.y)
maxP.y = p1.y;
numDiffs++;
}
if (numDiffs) {
diff.x /= numDiffs;
diff.y /= numDiffs;
f->rect += diff;
f->imgRect = makeImgRect(f->rect);
f->img = gray(f->imgRect).clone();
}
Rect r = f->rect;
f->pushTrail(r);
int fontFace = FONT_HERSHEY_SCRIPT_SIMPLEX;
double fontScale = 2;
int thickness = 3;
int baseline=0;
rectangle(img, Point(r.x, r.y), Point(r.x+r.width, r.y+r.height), colors[3], 1, 8, 0);
char buf[32];
snprintf(buf, 32, "%d", f->id);
Size textSize = getTextSize(buf, fontFace,
fontScale, thickness, &baseline);
Point textOrg(r.x + r.width / 2 - textSize.width / 2, r.y + r.height / 2 - textSize.width/2);
putText(img, buf, textOrg, fontFace, fontScale,
Scalar::all(255), thickness, 8);
}
if (facesChanged)
updateFaces();
drawFaces(img);
writer.write(img);
cv::resize(img, img, cv::Size(), 0.5, 0.5);
cv::imshow( "result", img );
}
};
void mouseCallback(int event, int x, int y, int flags, void* userdata)
{
Detector* detector = (Detector*) userdata;
if ( event == EVENT_LBUTTONDOWN )
{
cout << "Left button of the mouse is clicked - position (" << x << ", " << y << ")" << endl;
detector->adjustDetectArea(x*2, y*2, true);
}
else if ( event == EVENT_RBUTTONDOWN )
{
cout << "Right button of the mouse is clicked - position (" << x << ", " << y << ")" << endl;
detector->adjustDetectArea(x*2, y*2, false);
}
else if ( event == EVENT_MBUTTONDOWN )
{
cout << "Middle button of the mouse is clicked - position (" << x << ", " << y << ")" << endl;
}
else if ( event == EVENT_MOUSEMOVE )
{
cout << "Mouse move over the window - position (" << x << ", " << y << ")" << endl;
}
}
int main( int argc, const char** argv )
{
cv::ocl::setUseOpenCL(false);
Mat frame, frameCopy, image, frameHsv;
string inputName;
bool tryflip = false;
inputName.assign(argv[1]);
Detector detector;
cvNamedWindow( "result", 1 );
setMouseCallback("result",mouseCallback, &detector);
VideoCapture capture(inputName.c_str());
VideoWriter writer;
Size S = Size((int) capture.get(CV_CAP_PROP_FRAME_WIDTH),
(int) capture.get(CV_CAP_PROP_FRAME_HEIGHT));
if (TRANSPOSE) {
int tmp = S.width;
S.width = S.height;
S.height = tmp;
}
detector.setDetectArea(Rect(0, 0, S.width, S.height));
int ex = static_cast<int>(capture.get(CV_CAP_PROP_FOURCC));
char EXT[] = {(char)(ex & 0XFF) , (char)((ex & 0XFF00) >> 8),(char)((ex & 0XFF0000) >> 16),(char)((ex & 0XFF000000) >> 24), 0};
cout << "Input codec type: " << EXT << endl;
string out = "out-" + inputName.substr(0, inputName.size() -4) + ".mov";
//writer.open(("out" + inputName).c_str(), ex, capture.get(CV_CAP_PROP_FPS), S, true);
writer.open(out.c_str(), cv::VideoWriter::fourcc('m','p','4','v'), capture.get(CV_CAP_PROP_FPS), S, true);
if (!writer.isOpened()) {
cerr << "could not open output video" << endl;
return -1;
}
int num;
for(;;)
{
bool done = false;
for (int i = 0; i < 1; i++) {
if (!capture.read(frame)) {
done = true;
break;
}
}
if (done)
break;
if (TRANSPOSE) {
frame = frame.t();
flip(frame, frame,1);
}
cvtColor(frame, frameHsv, CV_BGR2HSV);
detector.detectAndDraw( num, frame, frameHsv, writer);
num++;
if( waitKey( 9 ) >= 0 )
goto _cleanup_;
}
writer.release();
_cleanup_:
cvDestroyWindow("result");
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment