Created
January 18, 2017 10:08
-
-
Save tlaitinen/71796ea690d6220030f14e0f39c097b4 to your computer and use it in GitHub Desktop.
OpenCV test to detect and follow faces in a video
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "opencv2/objdetect.hpp" | |
#include "opencv2/imgcodecs.hpp" | |
#include "opencv2/videoio.hpp" | |
#include "opencv2/highgui.hpp" | |
#include "opencv2/face.hpp" | |
#include "opencv2/imgproc.hpp" | |
#include "opencv2/core/utility.hpp" | |
#include "opencv2/core/core_c.h" | |
#include "opencv2/videoio/videoio_c.h" | |
#include "opencv2/highgui/highgui_c.h" | |
#include <opencv2/video/background_segm.hpp> | |
#include <opencv2/video/tracking.hpp> | |
#include <opencv2/core/ocl.hpp> | |
#include <cctype> | |
#include <iostream> | |
#include <iterator> | |
#include <stdio.h> | |
#include <deque> | |
#include <set> | |
#define FACE_SIZE 128 | |
#define FACE_DETECT_MIN_SIZE 90 | |
#define EYE_DETECT_MIN_SIZE 10 | |
#define FACE_THRESHOLD 80.0 | |
#define MAX_FACE_IMAGES 15 | |
#define MAX_FEATURE_ERROR 10 | |
#define FACE_CONFIRMATION_TIME 30 | |
#define FACE_CONFIRMATION_COUNT 2 | |
#define FACE_EXPIRY_TIME 100 | |
#define CORNER_EXPIRY_TIME 5 | |
#define OVERLAP_FORCE_MATCH_DIST 100 | |
#define OVERLAP_FORCE_MATCH_TIME 20 | |
#define TRANSPOSE 0 | |
using namespace std; | |
using namespace cv; | |
string cascadeName = "data/haarcascades/haarcascade_frontalface_alt.xml"; | |
string eyesName = "data/haarcascades/haarcascade_eye_tree_eyeglasses.xml"; | |
string noseName = "data/haarcascades/haarcascade_mcs_nose.xml"; | |
string mouthName = "data/haarcascades/haarcascade_mcs_mouth.xml"; | |
struct Face { | |
int id; | |
Rect rect; | |
Mat img; | |
deque<Mat> faceImgs; | |
Rect imgRect; | |
vector<Point2f> corners; | |
vector<int> cornerFrames; | |
vector<Point2f> trail; | |
int lastMatch; | |
int firstFrame; | |
int lastFrame; | |
int nestedCount; | |
public: | |
void pushTrail(const Point2f& p) { | |
trail.push_back(p); | |
} | |
void pushTrail(const Rect& r) { | |
pushTrail(Point2f(r.x + r.width / 2, r.y + r.height / 2)); | |
} | |
Face(int id_, const Rect& r, const Mat& i, const Mat& fi, const Rect& ir, const vector<Point2f>& cs, int num, int nestedCount_) | |
: id(id_), rect(r), img(i), imgRect(ir), corners(cs), firstFrame(num), lastFrame(num), nestedCount(nestedCount_) { | |
pushTrail(r); | |
faceImgs.push_back(fi); | |
lastMatch = num; | |
cornerFrames.resize(corners.size()); | |
for (vector<int>::iterator cf = cornerFrames.begin(); cf != cornerFrames.end(); cf++) | |
*cf = num; | |
} | |
}; | |
class Detector { | |
vector<Face> faces; | |
Mat gray; | |
int nextId; | |
Ptr<cv::face::FaceRecognizer> recognizer; | |
bool trained; | |
Rect detectArea; | |
bool overlapsFace(const Rect& r) { | |
for (vector<Face>::iterator f = faces.begin(); f != faces.end(); f++) { | |
Rect i = r & f->rect; | |
if (i.width > 0) | |
return true; | |
} | |
return false; | |
} | |
vector<Face>::iterator closestFace(const Point& p) { | |
double d = 100000; | |
vector<Face>::iterator res = faces.end(); | |
for (vector<Face>::iterator f = faces.begin(); f != faces.end(); f++) { | |
if (f->rect.contains(p)) { | |
Point c(f->rect.x + f->rect.width / 2, f->rect.y + f->rect.height / 2); | |
double dist = cv::norm(p-c); | |
if (dist < d && dist < OVERLAP_FORCE_MATCH_DIST) { | |
d = dist; | |
res = f; | |
} | |
} | |
} | |
return res; | |
} | |
Rect makeImgRect(Rect r) { | |
Rect imgRect = r; | |
imgRect.x -= imgRect.width/2; | |
imgRect.y -= imgRect.height/2; | |
imgRect.width *= 2; | |
imgRect.height *= 2; | |
if (imgRect.x < 0) | |
imgRect.x = 0; | |
if (imgRect.y < 0) | |
imgRect.y = 0; | |
if (imgRect.x + imgRect.width > gray.cols) | |
imgRect.width = gray.cols - imgRect.x; | |
if (imgRect.y + imgRect.height > gray.rows) | |
imgRect.height = gray.rows - imgRect.y; | |
return imgRect; | |
} | |
void drawFaces(Mat& screen) { | |
int y = 0,x; | |
vector<pair<Mat, Rect> > copies; | |
for (vector<Face>::const_iterator f = faces.begin(); f != faces.end(); f++) { | |
x = 0; | |
for (deque<Mat>::const_iterator fi = f->faceImgs.begin(); fi != f->faceImgs.end(); fi++) { | |
Rect dst(x*FACE_SIZE/2, y*FACE_SIZE/2, FACE_SIZE/2, FACE_SIZE/2); | |
copies.push_back(make_pair(*fi, dst)); | |
x++; | |
} | |
y++; | |
} | |
for (size_t i = 0; i < copies.size(); i++) { | |
Mat roi = screen(copies[i].second); | |
Mat src; | |
cvtColor(copies[i].first, src, CV_GRAY2RGB); | |
resize(src,src, Size(), 0.5, 0.5); | |
src.copyTo(roi); | |
} | |
} | |
void updateFaces() { | |
vector<Mat> images; | |
vector<int> labels; | |
for (vector<Face>::const_iterator f = faces.begin(); f != faces.end(); f++) { | |
for (deque<Mat>::const_iterator fi = f->faceImgs.begin(); fi != f->faceImgs.end(); fi++) { | |
images.push_back(*fi); | |
labels.push_back(f->id); | |
} | |
} | |
if (images.empty()) | |
return; | |
if (!trained) { | |
recognizer->train(images, labels); | |
trained = true; | |
} else { | |
// recognizer->update(images, labels); | |
recognizer->train(images, labels); | |
} | |
} | |
void cropAndResizeFaceImage(Mat& face) { | |
Rect r; | |
r.width = min(face.cols, face.rows); | |
r.height = min(face.cols, face.rows); | |
r.x = (face.cols - r.width) / 2; | |
r.y = (face.rows - r.height) / 2; | |
Mat tmp = face(r); | |
cv::resize(tmp, face, cv::Size(FACE_SIZE, FACE_SIZE)); | |
} | |
void forceMatchFace(vector<Face>::iterator f, const Rect& r, const Mat& roiColor, const Mat& roi, const Mat& faceImg, const Rect& imgRect, | |
const vector<Point2f>& corners, int num, int nestedCount) { | |
char buf[32]; | |
snprintf(buf, 32, "faces/%02d-%04d.jpg", f->id, num); | |
imwrite(buf, roiColor); | |
f->rect = r; | |
f->img = roi; | |
f->faceImgs.push_back(faceImg); | |
if (f->faceImgs.size() > MAX_FACE_IMAGES) { | |
f->faceImgs.pop_front(); | |
} | |
f->imgRect = imgRect; | |
f->corners = corners; | |
f->cornerFrames.resize(corners.size()); | |
f->nestedCount += nestedCount; | |
f->pushTrail(r); | |
for (vector<int>::iterator cf = f->cornerFrames.begin(); cf != f->cornerFrames.end(); cf++) | |
*cf = num; | |
f->lastFrame = num; | |
} | |
bool matchFace(const Rect& r, const Mat& roiColor, const Mat& roi, const Mat& faceImg, const Rect& imgRect, | |
const vector<Point2f>& corners, int num, int foundNested) { | |
int label; | |
double confidence; | |
recognizer->predict(faceImg, label, confidence); | |
cerr << "match label = " << label << endl; | |
cerr << "condidence = " << confidence << endl; | |
cerr << "nested count = " << foundNested << endl; | |
for (vector<Face>::iterator f = faces.begin(); f != faces.end(); f++) { | |
if (f->id == label) { | |
forceMatchFace(f, r, roiColor, roi, faceImg, imgRect, corners, num, foundNested); | |
return true; | |
} | |
} | |
return false; | |
} | |
CascadeClassifier cascade; | |
CascadeClassifier eyes; | |
CascadeClassifier mouth; | |
CascadeClassifier nose; | |
public: | |
Detector() : nextId(1), | |
recognizer(cv::face::createLBPHFaceRecognizer(1,8,8,8, FACE_THRESHOLD)), trained(false) { | |
cascade.load( cascadeName ); | |
eyes.load(eyesName); | |
mouth.load(mouthName); | |
nose.load(noseName); | |
} | |
void setDetectArea(const Rect& detectArea_) { | |
detectArea = detectArea_; | |
} | |
void adjustDetectArea(double x, double y, bool topLeft) { | |
if (topLeft) { | |
detectArea.width -= x - detectArea.x; | |
detectArea.height -= y - detectArea.y; | |
detectArea.x = x; | |
detectArea.y = y; | |
} else { | |
detectArea.width = x - detectArea.x; | |
detectArea.height = y - detectArea.y; | |
} | |
} | |
int nestedDetect(const Rect& r, Mat& img, const Mat& face, CascadeClassifier& cc, const string& name) { | |
vector<Rect> bonusRects; | |
cout << "detecting " << name << endl; | |
cc.detectMultiScale(face, bonusRects, | |
1.1, 2 , 0 | |
//|CASCADE_FIND_BIGGEST_OBJECT | |
//|CASCADE_DO_ROUGH_SEARCH | |
|CASCADE_SCALE_IMAGE | |
, | |
Size(EYE_DETECT_MIN_SIZE, EYE_DETECT_MIN_SIZE) ); | |
cout << "Found " << bonusRects.size() << " " << name << endl; | |
for (vector<Rect>::iterator er = bonusRects.begin(); er != bonusRects.end(); er++) { | |
er->x += r.x; | |
er->y += r.y; | |
rectangle( img, cvPoint(cvRound(er->x), cvRound(er->y)), | |
cvPoint(cvRound((er->x + er->width-1)), cvRound((er->y + er->height-1))), | |
CV_RGB(255,255,0), 2, 8, 0); | |
} | |
return bonusRects.size(); | |
} | |
void detectAndDraw( int num, Mat& img, Mat& imgHsv, VideoWriter& writer) | |
{ | |
int i = 0; | |
double t = 0; | |
vector<Rect> rects; | |
Mat orig = img.clone(); | |
bool facesChanged = false; | |
const static Scalar colors[] = { CV_RGB(0,0,255), | |
CV_RGB(0,128,255), | |
CV_RGB(0,255,255), | |
CV_RGB(0,255,0), | |
CV_RGB(255,128,0), | |
CV_RGB(255,255,0), | |
CV_RGB(255,0,0), | |
CV_RGB(255,0,255)} ; | |
cvtColor( img, gray, COLOR_BGR2GRAY ); | |
equalizeHist( gray, gray ); | |
int maxCorners = 64; | |
double qualityLevel = 0.01; | |
double minDistance = 5; | |
int blockSize = 3; | |
vector<cv::Point2f> corners; | |
int win_size = 15; | |
t = (double)cvGetTickCount(); | |
if (num%5==0) { | |
Rect r = detectArea; | |
rectangle(img, Point(r.x, r.y), Point(r.x+r.width, r.y+r.height), colors[4], 1, 8, 0); | |
cout << "detecting faces" << endl; | |
cascade.detectMultiScale( gray(detectArea), rects, | |
1.05, 3, 0 | |
//|CASCADE_FIND_BIGGEST_OBJECT | |
//|CASCADE_DO_ROUGH_SEARCH | |
|CASCADE_SCALE_IMAGE | |
, | |
Size(FACE_DETECT_MIN_SIZE, FACE_DETECT_MIN_SIZE) ); | |
t = (double)cvGetTickCount() - t; | |
printf( "detection time = %g ms\n", t/((double)cvGetTickFrequency()*1000.) ); | |
for( vector<Rect>::iterator r = rects.begin(); r != rects.end(); r++, i++ ) | |
{ | |
cout << "face of size " << r->width << "x" << r->height << endl; | |
r->x += detectArea.x; | |
r->y += detectArea.y; | |
Point center; | |
Scalar color = colors[i%8]; | |
float skinSum=0; | |
for (int y=r->y; y<r->y+r->height; y++) | |
{ | |
for (int x=r->x; x<r->x+r->width; x++) | |
{ | |
Vec3b bgr = img.at<Vec3b>(y, x); | |
Vec3b hsv = imgHsv.at<Vec3b>(y, x); | |
float cb = 0.148*bgr.val[2] - 0.291*bgr.val[1] + 0.439*bgr.val[0] + 128; | |
float cr = 0.439*bgr.val[2] - 0.368*bgr.val[1] - 0.071*bgr.val[0] + 128; | |
if ( ( hsv.val[0]>245 || hsv.val[0]<25.5) && 140<=cr && cr<=165 && 140<=cb && cb<=195) | |
{ | |
skinSum++; | |
} | |
} | |
} | |
//if less than 30% skin, this face doesnt count | |
if (skinSum / (r->width*r->height) < 0.3) | |
{ | |
continue; | |
//break; | |
} | |
Mat faceImg = gray(*r).clone(); | |
int eyesCount = nestedDetect(*r, img, faceImg, eyes, "eyes"); | |
// nestedDetect(*r, img, faceImg ,mouth, "mouth"); | |
int noseCount = nestedDetect(*r, img, faceImg, nose, "nose"); | |
int nestedCount = eyesCount + noseCount; | |
cropAndResizeFaceImage(faceImg); | |
Rect imgRect = makeImgRect(*r); | |
Mat roi = gray(imgRect).clone(); | |
Mat roiColor = orig(*r).clone(); | |
Rect maskRect = *r; | |
maskRect.x -= imgRect.x; | |
maskRect.y -= imgRect.y; | |
Mat mask = cv::Mat::zeros(imgRect.height, imgRect.width, CV_8U); | |
mask(maskRect) = 1; | |
cv::goodFeaturesToTrack(roi, corners, maxCorners, qualityLevel, minDistance, mask, blockSize, false, 0); | |
if (corners.empty()) | |
continue; | |
cornerSubPix( roi, corners, Size( win_size, win_size ), Size( -1, -1 ), | |
TermCriteria( CV_TERMCRIT_ITER | CV_TERMCRIT_EPS, 20, 0.03 ) ); | |
for (size_t i = 0; i < corners.size(); i++) { | |
Point center = corners[i]; | |
center.x += imgRect.x; | |
center.y += imgRect.y; | |
circle(img, center, 2, colors[2], 1, 8, 0); | |
} | |
if (trained && matchFace(*r, roiColor, roi, faceImg, imgRect, corners, num, nestedCount)) { | |
facesChanged = true; | |
continue; | |
} else { | |
if (!trained) { | |
cout << "not trained" << endl; | |
} | |
} | |
Point middle(r->x + r->width / 2, r->y + r->height / 2); | |
vector<Face>::iterator o = closestFace(middle); | |
if (o != faces.end()) { | |
if (num - o->lastMatch < OVERLAP_FORCE_MATCH_TIME) { | |
cout << "forcing match!" << endl; | |
forceMatchFace(o, *r, roiColor, roi, faceImg, imgRect, corners, num, nestedCount); | |
facesChanged = true; | |
} | |
continue; | |
} | |
if (overlapsFace(*r)) { | |
cout << "overlaps with a face. not doing anything" << endl; | |
continue; | |
} | |
faces.push_back(Face(nextId++, *r, roi, faceImg, imgRect, corners, num, nestedCount)); | |
facesChanged = true; | |
rectangle( img, cvPoint(cvRound(r->x), cvRound(r->y)), | |
cvPoint(cvRound((r->x + r->width-1)), cvRound((r->y + r->height-1))), | |
colors[0], 3, 8, 0); | |
} | |
} | |
for (size_t i = 0; i < faces.size(); ) { | |
if ((num - faces[i].firstFrame > FACE_CONFIRMATION_TIME | |
&& faces[i].nestedCount < FACE_CONFIRMATION_COUNT) | |
|| | |
(num - faces[i].lastFrame > FACE_EXPIRY_TIME)) { | |
cout << "removing face " << faces[i].id << " nestedCount=" << faces[i].nestedCount << endl; | |
faces[i] = faces.back(); | |
faces.pop_back(); | |
facesChanged = true; | |
} else | |
i++; | |
} | |
for (vector<Face>::iterator f = faces.begin(); f != faces.end(); f++) { | |
Point2f prev(0,0); | |
if (faces[i].nestedCount < FACE_CONFIRMATION_COUNT) | |
continue; | |
for (vector<Point2f>::iterator p = f->trail.begin(); p != f->trail.end(); p++) { | |
if (prev.x != 0) { | |
line( img, prev, *p, CV_RGB(255,0,255), 1 ); | |
} | |
prev = *p; | |
} | |
} | |
for (vector<Face>::iterator f = faces.begin(); f != faces.end(); f++) { | |
corners.clear(); | |
Mat roi = gray(f->imgRect); | |
for (size_t i = 0; i < f->corners.size(); i++) { | |
if (num - f->cornerFrames[i] > CORNER_EXPIRY_TIME) { | |
f->cornerFrames[i] = f->cornerFrames.back(); | |
f->corners[i] = f->corners.back(); | |
f->cornerFrames.pop_back(); | |
f->corners.pop_back(); | |
} else | |
i++; | |
} | |
if (f->corners.empty()) | |
continue; | |
std::vector<uchar> featuresFound; | |
featuresFound.reserve(maxCorners); | |
std::vector<float> featureErrors; | |
featureErrors.reserve(maxCorners); | |
calcOpticalFlowPyrLK( f->img, roi, f->corners, corners, featuresFound, featureErrors , | |
Size( win_size, win_size ), 5, | |
cvTermCriteria( CV_TERMCRIT_ITER | CV_TERMCRIT_EPS, 20, 0.3 ), 0 ); | |
f->lastFrame = num; | |
Point minP, maxP; | |
minP.x = 1000; | |
minP.y = 1000; | |
maxP.x = 0; | |
maxP.y = 0; | |
Point diff; | |
diff.x = 0; | |
diff.y = 0; | |
float numDiffs = 0; | |
for( size_t i=0; i < featuresFound.size(); i++ ){ | |
Point p0( f->imgRect.x + f->corners[i].x , f->imgRect.y + f->corners[i].y ); | |
Point p1( f->imgRect.x + corners[i].x , f->imgRect.y + corners[i].y ); | |
if (!featuresFound[i]) { | |
continue; | |
} | |
if (featureErrors[i] > MAX_FEATURE_ERROR) | |
continue; | |
line( img, p0, p1, CV_RGB(255,255,255), 2 ); | |
f->corners[i] = corners[i]; | |
f->cornerFrames[i] = num; | |
diff += p1 - p0; | |
if (p1.x < minP.x) | |
minP.x = p1.x; | |
if (p1.y < minP.y) | |
minP.y = p1.y; | |
if (p1.x > maxP.x) | |
maxP.x = p1.x; | |
if (p1.y > maxP.y) | |
maxP.y = p1.y; | |
numDiffs++; | |
} | |
if (numDiffs) { | |
diff.x /= numDiffs; | |
diff.y /= numDiffs; | |
f->rect += diff; | |
f->imgRect = makeImgRect(f->rect); | |
f->img = gray(f->imgRect).clone(); | |
} | |
Rect r = f->rect; | |
f->pushTrail(r); | |
int fontFace = FONT_HERSHEY_SCRIPT_SIMPLEX; | |
double fontScale = 2; | |
int thickness = 3; | |
int baseline=0; | |
rectangle(img, Point(r.x, r.y), Point(r.x+r.width, r.y+r.height), colors[3], 1, 8, 0); | |
char buf[32]; | |
snprintf(buf, 32, "%d", f->id); | |
Size textSize = getTextSize(buf, fontFace, | |
fontScale, thickness, &baseline); | |
Point textOrg(r.x + r.width / 2 - textSize.width / 2, r.y + r.height / 2 - textSize.width/2); | |
putText(img, buf, textOrg, fontFace, fontScale, | |
Scalar::all(255), thickness, 8); | |
} | |
if (facesChanged) | |
updateFaces(); | |
drawFaces(img); | |
writer.write(img); | |
cv::resize(img, img, cv::Size(), 0.5, 0.5); | |
cv::imshow( "result", img ); | |
} | |
}; | |
void mouseCallback(int event, int x, int y, int flags, void* userdata) | |
{ | |
Detector* detector = (Detector*) userdata; | |
if ( event == EVENT_LBUTTONDOWN ) | |
{ | |
cout << "Left button of the mouse is clicked - position (" << x << ", " << y << ")" << endl; | |
detector->adjustDetectArea(x*2, y*2, true); | |
} | |
else if ( event == EVENT_RBUTTONDOWN ) | |
{ | |
cout << "Right button of the mouse is clicked - position (" << x << ", " << y << ")" << endl; | |
detector->adjustDetectArea(x*2, y*2, false); | |
} | |
else if ( event == EVENT_MBUTTONDOWN ) | |
{ | |
cout << "Middle button of the mouse is clicked - position (" << x << ", " << y << ")" << endl; | |
} | |
else if ( event == EVENT_MOUSEMOVE ) | |
{ | |
cout << "Mouse move over the window - position (" << x << ", " << y << ")" << endl; | |
} | |
} | |
int main( int argc, const char** argv ) | |
{ | |
cv::ocl::setUseOpenCL(false); | |
Mat frame, frameCopy, image, frameHsv; | |
string inputName; | |
bool tryflip = false; | |
inputName.assign(argv[1]); | |
Detector detector; | |
cvNamedWindow( "result", 1 ); | |
setMouseCallback("result",mouseCallback, &detector); | |
VideoCapture capture(inputName.c_str()); | |
VideoWriter writer; | |
Size S = Size((int) capture.get(CV_CAP_PROP_FRAME_WIDTH), | |
(int) capture.get(CV_CAP_PROP_FRAME_HEIGHT)); | |
if (TRANSPOSE) { | |
int tmp = S.width; | |
S.width = S.height; | |
S.height = tmp; | |
} | |
detector.setDetectArea(Rect(0, 0, S.width, S.height)); | |
int ex = static_cast<int>(capture.get(CV_CAP_PROP_FOURCC)); | |
char EXT[] = {(char)(ex & 0XFF) , (char)((ex & 0XFF00) >> 8),(char)((ex & 0XFF0000) >> 16),(char)((ex & 0XFF000000) >> 24), 0}; | |
cout << "Input codec type: " << EXT << endl; | |
string out = "out-" + inputName.substr(0, inputName.size() -4) + ".mov"; | |
//writer.open(("out" + inputName).c_str(), ex, capture.get(CV_CAP_PROP_FPS), S, true); | |
writer.open(out.c_str(), cv::VideoWriter::fourcc('m','p','4','v'), capture.get(CV_CAP_PROP_FPS), S, true); | |
if (!writer.isOpened()) { | |
cerr << "could not open output video" << endl; | |
return -1; | |
} | |
int num; | |
for(;;) | |
{ | |
bool done = false; | |
for (int i = 0; i < 1; i++) { | |
if (!capture.read(frame)) { | |
done = true; | |
break; | |
} | |
} | |
if (done) | |
break; | |
if (TRANSPOSE) { | |
frame = frame.t(); | |
flip(frame, frame,1); | |
} | |
cvtColor(frame, frameHsv, CV_BGR2HSV); | |
detector.detectAndDraw( num, frame, frameHsv, writer); | |
num++; | |
if( waitKey( 9 ) >= 0 ) | |
goto _cleanup_; | |
} | |
writer.release(); | |
_cleanup_: | |
cvDestroyWindow("result"); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment