Skip to content

Instantly share code, notes, and snippets.

@prostoiChelovek
Created July 20, 2019 13:12
Show Gist options
  • Save prostoiChelovek/aa3def0175c800e2e3e31ad2f71d6d57 to your computer and use it in GitHub Desktop.
Save prostoiChelovek/aa3def0175c800e2e3e31ad2f71d6d57 to your computer and use it in GitHub Desktop.
cmake_minimum_required(VERSION 3.11)
project(dlibFaces)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
add_subdirectory($ENV{HOME}/dlib dlib_build)
find_package(OpenCV REQUIRED)
include_directories(${OpenCV_INCLUDE_DIRS})
set(SOURCE_FILES main.cpp faces/FaceRecognizer.cpp faces/FaceRecognizer.h faces/utils.hpp)
add_executable(dlibFaces ${SOURCE_FILES})
target_link_libraries(dlibFaces dlib ${OpenCV_LIBS} )
#include <iostream>
#include <vector>
#include <ctime>
#include <fstream>
#include <sstream>
#include <sys/stat.h>
#include <dirent.h>
#include <bits/stdc++.h>
#include <dlib/dnn.h>
#include <dlib/gui_widgets.h>
#include <dlib/clustering.h>
#include <dlib/image_io.h>
#include <dlib/image_processing/frontal_face_detector.h>
#include <dlib/opencv.h>
#include <dlib/svm_threaded.h>
#include <opencv2/opencv.hpp>
// https://github.com/prostoiChelovek/faceDetector
#include "faces/FaceRecognizer.h"
#include "faces/utils.hpp"
using namespace dlib;
using namespace std;
using namespace cv;
const string configFile = "faces/models/deploy.prototxt";
const string weightFile = "faces/models/res10_300x300_ssd_iter_140000_fp16.caffemodel";
typedef matrix<double, 0, 1> sample_type;
// The main object in this example program is the one_vs_one_trainer. It is essentially
// a container class for regular binary classifier trainer objects. In particular, it
// uses the any_trainer object to store any kind of trainer object that implements a
// .train(samples,labels) function which returns some kind of learned decision function.
// It uses these binary classifiers to construct a voting multiclass classifier. If
// there are N classes then it trains N*(N-1)/2 binary classifiers, one for each pair of
// labels, which then vote on the label of a sample.
//
// In this example program we will work with a one_vs_one_trainer object which stores any
// kind of trainer that uses our sample_type samples.
typedef one_vs_one_trainer<any_trainer<sample_type> > ovo_trainer;
typedef polynomial_kernel<sample_type> poly_kernel;
typedef radial_basis_kernel<sample_type> rbf_kernel;
// ----------------------------------------------------------------------------------------
// The next bit of code defines a ResNet network. It's basically copied
// and pasted from the dnn_imagenet_ex.cpp example, except we replaced the loss
// layer with loss_metric and made the network somewhat smaller. Go read the introductory
// dlib DNN examples to learn what all this stuff means.
//
// Also, the dnn_metric_learning_on_images_ex.cpp example shows how to train this network.
// The dlib_face_recognition_resnet_model_v1 model used by this example was trained using
// essentially the code shown in dnn_metric_learning_on_images_ex.cpp except the
// mini-batches were made larger (35x15 instead of 5x5), the iterations without progress
// was set to 10000, and the training dataset consisted of about 3 million images instead of
// 55. Also, the input layer was locked to images of size 150.
template<template<int, template<typename> class, int, typename> class block, int N,
template<typename> class BN, typename SUBNET>
using residual = add_prev1<block<N, BN, 1, tag1<SUBNET>>>;
template<template<int, template<typename> class, int, typename> class block, int N,
template<typename> class BN, typename SUBNET>
using residual_down = add_prev2<avg_pool<2, 2, 2, 2, skip1<tag2<block<N, BN, 2, tag1<SUBNET>>>>>>;
template<int N, template<typename> class BN, int stride, typename SUBNET>
using block = BN<con<N, 3, 3, 1, 1, relu<BN<con<N, 3, 3, stride, stride, SUBNET>>>>>;
template<int N, typename SUBNET> using ares = relu<residual<block, N, affine, SUBNET>>;
template<int N, typename SUBNET> using ares_down = relu<residual_down<block, N, affine, SUBNET>>;
template<typename SUBNET> using alevel0 = ares_down<256, SUBNET>;
template<typename SUBNET> using alevel1 = ares<256, ares<256, ares_down<256, SUBNET>>>;
template<typename SUBNET> using alevel2 = ares<128, ares<128, ares_down<128, SUBNET>>>;
template<typename SUBNET> using alevel3 = ares<64, ares<64, ares<64, ares_down<64, SUBNET>>>>;
template<typename SUBNET> using alevel4 = ares<32, ares<32, ares<32, SUBNET>>>;
using anet_type = loss_metric<fc_no_bias<128, avg_pool_everything<
alevel0<
alevel1<
alevel2<
alevel3<
alevel4<
max_pool<3, 3, 2, 2,
relu<affine<con<
32, 7, 7, 2, 2,
input_rgb_image_sized<150>
>>>>>>>>>>>>;
// ----------------------------------------------------------------------------------------
std::vector<matrix<rgb_pixel>> jitter_image(
const matrix<rgb_pixel> &img
);
static dlib::rectangle openCVRectToDlib(const cv::Rect &r) {
return dlib::rectangle((long) r.tl().x, (long) r.tl().y, (long) r.br().x - 1, (long) r.br().y - 1);
}
template<typename T>
Mat dlib2cv(matrix<T> matr) {
Mat mat = toMat(matr);
Mat bgr;
cvtColor(mat, bgr, COLOR_RGB2BGR);
return bgr;
}
std::vector<string> list_directory(const std::string &name, const string &ext = "") {
std::vector<string> v;
DIR *dirp = opendir(name.c_str());
struct dirent *dp;
while ((dp = readdir(dirp)) != nullptr) {
string n = dp->d_name;
if (!ext.empty() && n.find("." + ext) == string::npos)
continue;
else
v.push_back(n);
}
closedir(dirp);
return v;
}
bool createDirNotExists(const string &name) {
struct stat st{};
if (stat(name.c_str(), &st) == 0) {
if (st.st_mode & S_IFDIR != 0)
return false;
}
if (mkdir(name.c_str(), 0777) == -1) {
log(ERROR, "Cannot create directory", name, ":", strerror(errno));
return false;
}
return true;
}
int main(int argc, char **argv) {
// The first thing we are going to do is load all our models. First, since we need to
// find faces in the image we will need a face detector:
frontal_face_detector detector = get_frontal_face_detector();
// We will also use a face landmarking model to align faces to a standard pose: (see face_landmark_detection_ex.cpp for an introduction)
shape_predictor sp;
deserialize("shape_predictor_5_face_landmarks.dat") >> sp;
// And finally we load the DNN responsible for face recognition.
anet_type net;
deserialize("dlib_face_recognition_resnet_model_v1.dat") >> net;
std::vector<string> names;
fstream namesFs("names.txt", ios::in | ios::out | ios::app);
string str;
while (getline(namesFs, str)) {
if (!str.empty())
names.push_back(str);
}
string faceDescsPath = "faceDescriptors";
createDirNotExists(faceDescsPath);
FaceRecognizer::FaceRecognizer recognizer;
if (!recognizer.readNet(configFile, weightFile)) {
return EXIT_FAILURE;
}
VideoCapture cap(0);
std::vector<matrix<rgb_pixel>> faces;
while (cap.isOpened()) {
Mat cvImg;
cap >> cvImg;
resize(cvImg, cvImg, Size(640, 480));
cv_image<bgr_pixel> img = cvImg;
faces.clear();
recognizer.detectFaces(cvImg);
std::vector<dlib::rectangle> dFaces;
for (const auto &f : recognizer.faces) {
dFaces.emplace_back(openCVRectToDlib(f.rect));
}
// Run the face detector on the image of our action heroes, and for each face extract a
// copy that has been normalized to 150x150 pixels in size and appropriately rotated
// and centered.
int i = 0;
for (dlib::rectangle face : dFaces) {
full_object_detection shape = sp(img, face);
matrix<rgb_pixel> face_chip;
extract_image_chip(img, get_face_chip_details(shape, 150, 0.25), face_chip);
for (int j = 0; j < shape.num_parts(); j++) {
dlib::point pt = shape.part(j);
circle(cvImg, Point(pt.x(), pt.y()), 4, Scalar(0, 255, 0), FILLED);
}
imshow(to_string(i), dlib2cv(face_chip));
i++;
waitKey(1);
faces.push_back(move(face_chip));
}
recognizer.draw(cvImg);
imshow("result", cvImg);
char key = waitKey(6);
if (key == 27)
break;
if (key == 's' && !faces.empty()) {
clock_t begin = clock();
// This call asks the DNN to convert each face image in faces into a 128D vector.
// In this 128D vector space, images from the same person will be close to each other
// but vectors from different people will be far apart. So we can use these vectors to
// identify if a pair of images are from the same person or from different people.
std::vector<matrix<float, 0, 1>> face_descriptors = net(faces);
clock_t end = clock();
double elapsed_secs = double(end - begin) / CLOCKS_PER_SEC;
cout << elapsed_secs << endl;
for (int i = 0; i < face_descriptors.size(); i++) {
cout << "whose is " << i << "th faces?" << endl;
string name;
getline(cin, name);
if (std::find(names.begin(), names.end(), name) == names.end()) {
names.emplace_back(name);
ofstream namesFs("names.txt", ios::app);
namesFs << name << endl;
namesFs.flush();
}
int index = std::distance(names.begin(), find(names.begin(), names.end(), name));
string path = faceDescsPath + "/" + to_string(index) + ".csv";
ofstream descFs(path, ios::app);
stringstream descSS;
for (float d : face_descriptors[i]) {
descSS << d << " ";
}
string descStr = descSS.str();
descStr.pop_back();
descFs << descStr << endl;
descFs.flush();
cout << "Face descriptor for " << name << " written to " << path << endl;
}
}
if (key == 't') {
// Now we make objects to contain our samples and their respective labels.
std::vector<sample_type> samples;
std::vector<double> labels;
std::vector<string> files = list_directory(faceDescsPath, "csv");
for (const string &file : files) {
string labelStr = file.substr(0, file.find(".csv"));
double label = stod(labelStr);
ifstream descFS(faceDescsPath + "/" + file);
std::vector<string> lines;
while (getline(descFS, str)) {
if (!str.empty())
lines.push_back(str);
}
std::vector<std::vector<double>> descs;
for (string &line : lines) {
descs.emplace_back(std::vector<double>{});
std::vector<string> nums = split(line, " ");
for (string &num : nums) {
descs[descs.size() - 1].emplace_back(stod(num));
}
}
for (std::vector<double> &desc : descs) {
labels.emplace_back(label);
samples.emplace_back(mat(desc));
}
}
ovo_trainer trainer;
// make the binary trainers and set some parameters
krr_trainer<rbf_kernel> rbf_trainer;
svm_nu_trainer<poly_kernel> poly_trainer;
poly_trainer.set_kernel(poly_kernel(0.1, 1, 2));
rbf_trainer.set_kernel(rbf_kernel(0.1));
// Now tell the one_vs_one_trainer that, by default, it should use the rbf_trainer
// to solve the individual binary classification subproblems.
trainer.set_trainer(rbf_trainer);
// We can also get more specific. Here we tell the one_vs_one_trainer to use the
// poly_trainer to solve the class 1 vs class 2 subproblem. All the others will
// still be solved with the rbf_trainer.
trainer.set_trainer(poly_trainer, 1, 2);
cout << "cross validation: \n" << cross_validate_multiclass_trainer(trainer, samples, labels, 5) << endl;
one_vs_one_decision_function<ovo_trainer> df = trainer.train(samples, labels);
cout << "predicted label: " << df(samples[0]) << ", true label: " << labels[0] << endl;
one_vs_one_decision_function<ovo_trainer,
decision_function<poly_kernel>, // This is the output of the poly_trainer
decision_function<rbf_kernel> // This is the output of the rbf_trainer
> df2;
df2 = df;
serialize("df.dat") << df2;
}
if (key == 'r' && !faces.empty()) {
one_vs_one_decision_function<ovo_trainer,
decision_function<poly_kernel>, // This is the output of the poly_trainer
decision_function<rbf_kernel> // This is the output of the rbf_trainer
> df;
deserialize("df.dat") >> df;
std::vector<matrix<float, 0, 1>> face_descriptorsF = net(faces);
std::vector<sample_type> face_descriptors;
for(auto &desc : face_descriptorsF) {
std::vector<double> descVec;
for (unsigned int r = 0; r < desc.nr(); r += 1) {
descVec.emplace_back(desc(r,0));
}
face_descriptors.emplace_back(mat(descVec));
}
int i = 0;
for(sample_type &desc : face_descriptors) {
log(INFO, i, ":", df(desc));
i++;
auto &dfs = df.get_binary_decision_functions();
// run all the classifiers over the sample
for(auto &d : dfs) {
const auto score = d.second(desc);
log(INFO, score);
}
}
waitKey(0);
}
}
}
// ----------------------------------------------------------------------------------------
std::vector<matrix<rgb_pixel>> jitter_image(
const matrix<rgb_pixel> &img
) {
// All this function does is make 100 copies of img, all slightly jittered by being
// zoomed, rotated, and translated a little bit differently. They are also randomly
// mirrored left to right.
thread_local dlib::rand rnd;
std::vector<matrix<rgb_pixel>> crops;
for (int i = 0; i < 100; ++i)
crops.push_back(jitter_image(img, rnd));
return crops;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment