Created
July 20, 2019 13:12
-
-
Save prostoiChelovek/aa3def0175c800e2e3e31ad2f71d6d57 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cmake_minimum_required(VERSION 3.11) | |
project(dlibFaces) | |
set(CMAKE_CXX_STANDARD 11) | |
set(CMAKE_CXX_STANDARD_REQUIRED ON) | |
add_subdirectory($ENV{HOME}/dlib dlib_build) | |
find_package(OpenCV REQUIRED) | |
include_directories(${OpenCV_INCLUDE_DIRS}) | |
set(SOURCE_FILES main.cpp faces/FaceRecognizer.cpp faces/FaceRecognizer.h faces/utils.hpp) | |
add_executable(dlibFaces ${SOURCE_FILES}) | |
target_link_libraries(dlibFaces dlib ${OpenCV_LIBS} ) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <vector> | |
#include <ctime> | |
#include <fstream> | |
#include <sstream> | |
#include <sys/stat.h> | |
#include <dirent.h> | |
#include <bits/stdc++.h> | |
#include <dlib/dnn.h> | |
#include <dlib/gui_widgets.h> | |
#include <dlib/clustering.h> | |
#include <dlib/image_io.h> | |
#include <dlib/image_processing/frontal_face_detector.h> | |
#include <dlib/opencv.h> | |
#include <dlib/svm_threaded.h> | |
#include <opencv2/opencv.hpp> | |
// https://github.com/prostoiChelovek/faceDetector | |
#include "faces/FaceRecognizer.h" | |
#include "faces/utils.hpp" | |
using namespace dlib; | |
using namespace std; | |
using namespace cv; | |
const string configFile = "faces/models/deploy.prototxt"; | |
const string weightFile = "faces/models/res10_300x300_ssd_iter_140000_fp16.caffemodel"; | |
typedef matrix<double, 0, 1> sample_type; | |
// The main object in this example program is the one_vs_one_trainer. It is essentially | |
// a container class for regular binary classifier trainer objects. In particular, it | |
// uses the any_trainer object to store any kind of trainer object that implements a | |
// .train(samples,labels) function which returns some kind of learned decision function. | |
// It uses these binary classifiers to construct a voting multiclass classifier. If | |
// there are N classes then it trains N*(N-1)/2 binary classifiers, one for each pair of | |
// labels, which then vote on the label of a sample. | |
// | |
// In this example program we will work with a one_vs_one_trainer object which stores any | |
// kind of trainer that uses our sample_type samples. | |
typedef one_vs_one_trainer<any_trainer<sample_type> > ovo_trainer; | |
typedef polynomial_kernel<sample_type> poly_kernel; | |
typedef radial_basis_kernel<sample_type> rbf_kernel; | |
// ---------------------------------------------------------------------------------------- | |
// The next bit of code defines a ResNet network. It's basically copied | |
// and pasted from the dnn_imagenet_ex.cpp example, except we replaced the loss | |
// layer with loss_metric and made the network somewhat smaller. Go read the introductory | |
// dlib DNN examples to learn what all this stuff means. | |
// | |
// Also, the dnn_metric_learning_on_images_ex.cpp example shows how to train this network. | |
// The dlib_face_recognition_resnet_model_v1 model used by this example was trained using | |
// essentially the code shown in dnn_metric_learning_on_images_ex.cpp except the | |
// mini-batches were made larger (35x15 instead of 5x5), the iterations without progress | |
// was set to 10000, and the training dataset consisted of about 3 million images instead of | |
// 55. Also, the input layer was locked to images of size 150. | |
template<template<int, template<typename> class, int, typename> class block, int N, | |
template<typename> class BN, typename SUBNET> | |
using residual = add_prev1<block<N, BN, 1, tag1<SUBNET>>>; | |
template<template<int, template<typename> class, int, typename> class block, int N, | |
template<typename> class BN, typename SUBNET> | |
using residual_down = add_prev2<avg_pool<2, 2, 2, 2, skip1<tag2<block<N, BN, 2, tag1<SUBNET>>>>>>; | |
template<int N, template<typename> class BN, int stride, typename SUBNET> | |
using block = BN<con<N, 3, 3, 1, 1, relu<BN<con<N, 3, 3, stride, stride, SUBNET>>>>>; | |
template<int N, typename SUBNET> using ares = relu<residual<block, N, affine, SUBNET>>; | |
template<int N, typename SUBNET> using ares_down = relu<residual_down<block, N, affine, SUBNET>>; | |
template<typename SUBNET> using alevel0 = ares_down<256, SUBNET>; | |
template<typename SUBNET> using alevel1 = ares<256, ares<256, ares_down<256, SUBNET>>>; | |
template<typename SUBNET> using alevel2 = ares<128, ares<128, ares_down<128, SUBNET>>>; | |
template<typename SUBNET> using alevel3 = ares<64, ares<64, ares<64, ares_down<64, SUBNET>>>>; | |
template<typename SUBNET> using alevel4 = ares<32, ares<32, ares<32, SUBNET>>>; | |
using anet_type = loss_metric<fc_no_bias<128, avg_pool_everything< | |
alevel0< | |
alevel1< | |
alevel2< | |
alevel3< | |
alevel4< | |
max_pool<3, 3, 2, 2, | |
relu<affine<con< | |
32, 7, 7, 2, 2, | |
input_rgb_image_sized<150> | |
>>>>>>>>>>>>; | |
// ---------------------------------------------------------------------------------------- | |
std::vector<matrix<rgb_pixel>> jitter_image( | |
const matrix<rgb_pixel> &img | |
); | |
static dlib::rectangle openCVRectToDlib(const cv::Rect &r) { | |
return dlib::rectangle((long) r.tl().x, (long) r.tl().y, (long) r.br().x - 1, (long) r.br().y - 1); | |
} | |
template<typename T> | |
Mat dlib2cv(matrix<T> matr) { | |
Mat mat = toMat(matr); | |
Mat bgr; | |
cvtColor(mat, bgr, COLOR_RGB2BGR); | |
return bgr; | |
} | |
std::vector<string> list_directory(const std::string &name, const string &ext = "") { | |
std::vector<string> v; | |
DIR *dirp = opendir(name.c_str()); | |
struct dirent *dp; | |
while ((dp = readdir(dirp)) != nullptr) { | |
string n = dp->d_name; | |
if (!ext.empty() && n.find("." + ext) == string::npos) | |
continue; | |
else | |
v.push_back(n); | |
} | |
closedir(dirp); | |
return v; | |
} | |
bool createDirNotExists(const string &name) { | |
struct stat st{}; | |
if (stat(name.c_str(), &st) == 0) { | |
if (st.st_mode & S_IFDIR != 0) | |
return false; | |
} | |
if (mkdir(name.c_str(), 0777) == -1) { | |
log(ERROR, "Cannot create directory", name, ":", strerror(errno)); | |
return false; | |
} | |
return true; | |
} | |
int main(int argc, char **argv) { | |
// The first thing we are going to do is load all our models. First, since we need to | |
// find faces in the image we will need a face detector: | |
frontal_face_detector detector = get_frontal_face_detector(); | |
// We will also use a face landmarking model to align faces to a standard pose: (see face_landmark_detection_ex.cpp for an introduction) | |
shape_predictor sp; | |
deserialize("shape_predictor_5_face_landmarks.dat") >> sp; | |
// And finally we load the DNN responsible for face recognition. | |
anet_type net; | |
deserialize("dlib_face_recognition_resnet_model_v1.dat") >> net; | |
std::vector<string> names; | |
fstream namesFs("names.txt", ios::in | ios::out | ios::app); | |
string str; | |
while (getline(namesFs, str)) { | |
if (!str.empty()) | |
names.push_back(str); | |
} | |
string faceDescsPath = "faceDescriptors"; | |
createDirNotExists(faceDescsPath); | |
FaceRecognizer::FaceRecognizer recognizer; | |
if (!recognizer.readNet(configFile, weightFile)) { | |
return EXIT_FAILURE; | |
} | |
VideoCapture cap(0); | |
std::vector<matrix<rgb_pixel>> faces; | |
while (cap.isOpened()) { | |
Mat cvImg; | |
cap >> cvImg; | |
resize(cvImg, cvImg, Size(640, 480)); | |
cv_image<bgr_pixel> img = cvImg; | |
faces.clear(); | |
recognizer.detectFaces(cvImg); | |
std::vector<dlib::rectangle> dFaces; | |
for (const auto &f : recognizer.faces) { | |
dFaces.emplace_back(openCVRectToDlib(f.rect)); | |
} | |
// Run the face detector on the image of our action heroes, and for each face extract a | |
// copy that has been normalized to 150x150 pixels in size and appropriately rotated | |
// and centered. | |
int i = 0; | |
for (dlib::rectangle face : dFaces) { | |
full_object_detection shape = sp(img, face); | |
matrix<rgb_pixel> face_chip; | |
extract_image_chip(img, get_face_chip_details(shape, 150, 0.25), face_chip); | |
for (int j = 0; j < shape.num_parts(); j++) { | |
dlib::point pt = shape.part(j); | |
circle(cvImg, Point(pt.x(), pt.y()), 4, Scalar(0, 255, 0), FILLED); | |
} | |
imshow(to_string(i), dlib2cv(face_chip)); | |
i++; | |
waitKey(1); | |
faces.push_back(move(face_chip)); | |
} | |
recognizer.draw(cvImg); | |
imshow("result", cvImg); | |
char key = waitKey(6); | |
if (key == 27) | |
break; | |
if (key == 's' && !faces.empty()) { | |
clock_t begin = clock(); | |
// This call asks the DNN to convert each face image in faces into a 128D vector. | |
// In this 128D vector space, images from the same person will be close to each other | |
// but vectors from different people will be far apart. So we can use these vectors to | |
// identify if a pair of images are from the same person or from different people. | |
std::vector<matrix<float, 0, 1>> face_descriptors = net(faces); | |
clock_t end = clock(); | |
double elapsed_secs = double(end - begin) / CLOCKS_PER_SEC; | |
cout << elapsed_secs << endl; | |
for (int i = 0; i < face_descriptors.size(); i++) { | |
cout << "whose is " << i << "th faces?" << endl; | |
string name; | |
getline(cin, name); | |
if (std::find(names.begin(), names.end(), name) == names.end()) { | |
names.emplace_back(name); | |
ofstream namesFs("names.txt", ios::app); | |
namesFs << name << endl; | |
namesFs.flush(); | |
} | |
int index = std::distance(names.begin(), find(names.begin(), names.end(), name)); | |
string path = faceDescsPath + "/" + to_string(index) + ".csv"; | |
ofstream descFs(path, ios::app); | |
stringstream descSS; | |
for (float d : face_descriptors[i]) { | |
descSS << d << " "; | |
} | |
string descStr = descSS.str(); | |
descStr.pop_back(); | |
descFs << descStr << endl; | |
descFs.flush(); | |
cout << "Face descriptor for " << name << " written to " << path << endl; | |
} | |
} | |
if (key == 't') { | |
// Now we make objects to contain our samples and their respective labels. | |
std::vector<sample_type> samples; | |
std::vector<double> labels; | |
std::vector<string> files = list_directory(faceDescsPath, "csv"); | |
for (const string &file : files) { | |
string labelStr = file.substr(0, file.find(".csv")); | |
double label = stod(labelStr); | |
ifstream descFS(faceDescsPath + "/" + file); | |
std::vector<string> lines; | |
while (getline(descFS, str)) { | |
if (!str.empty()) | |
lines.push_back(str); | |
} | |
std::vector<std::vector<double>> descs; | |
for (string &line : lines) { | |
descs.emplace_back(std::vector<double>{}); | |
std::vector<string> nums = split(line, " "); | |
for (string &num : nums) { | |
descs[descs.size() - 1].emplace_back(stod(num)); | |
} | |
} | |
for (std::vector<double> &desc : descs) { | |
labels.emplace_back(label); | |
samples.emplace_back(mat(desc)); | |
} | |
} | |
ovo_trainer trainer; | |
// make the binary trainers and set some parameters | |
krr_trainer<rbf_kernel> rbf_trainer; | |
svm_nu_trainer<poly_kernel> poly_trainer; | |
poly_trainer.set_kernel(poly_kernel(0.1, 1, 2)); | |
rbf_trainer.set_kernel(rbf_kernel(0.1)); | |
// Now tell the one_vs_one_trainer that, by default, it should use the rbf_trainer | |
// to solve the individual binary classification subproblems. | |
trainer.set_trainer(rbf_trainer); | |
// We can also get more specific. Here we tell the one_vs_one_trainer to use the | |
// poly_trainer to solve the class 1 vs class 2 subproblem. All the others will | |
// still be solved with the rbf_trainer. | |
trainer.set_trainer(poly_trainer, 1, 2); | |
cout << "cross validation: \n" << cross_validate_multiclass_trainer(trainer, samples, labels, 5) << endl; | |
one_vs_one_decision_function<ovo_trainer> df = trainer.train(samples, labels); | |
cout << "predicted label: " << df(samples[0]) << ", true label: " << labels[0] << endl; | |
one_vs_one_decision_function<ovo_trainer, | |
decision_function<poly_kernel>, // This is the output of the poly_trainer | |
decision_function<rbf_kernel> // This is the output of the rbf_trainer | |
> df2; | |
df2 = df; | |
serialize("df.dat") << df2; | |
} | |
if (key == 'r' && !faces.empty()) { | |
one_vs_one_decision_function<ovo_trainer, | |
decision_function<poly_kernel>, // This is the output of the poly_trainer | |
decision_function<rbf_kernel> // This is the output of the rbf_trainer | |
> df; | |
deserialize("df.dat") >> df; | |
std::vector<matrix<float, 0, 1>> face_descriptorsF = net(faces); | |
std::vector<sample_type> face_descriptors; | |
for(auto &desc : face_descriptorsF) { | |
std::vector<double> descVec; | |
for (unsigned int r = 0; r < desc.nr(); r += 1) { | |
descVec.emplace_back(desc(r,0)); | |
} | |
face_descriptors.emplace_back(mat(descVec)); | |
} | |
int i = 0; | |
for(sample_type &desc : face_descriptors) { | |
log(INFO, i, ":", df(desc)); | |
i++; | |
auto &dfs = df.get_binary_decision_functions(); | |
// run all the classifiers over the sample | |
for(auto &d : dfs) { | |
const auto score = d.second(desc); | |
log(INFO, score); | |
} | |
} | |
waitKey(0); | |
} | |
} | |
} | |
// ---------------------------------------------------------------------------------------- | |
std::vector<matrix<rgb_pixel>> jitter_image( | |
const matrix<rgb_pixel> &img | |
) { | |
// All this function does is make 100 copies of img, all slightly jittered by being | |
// zoomed, rotated, and translated a little bit differently. They are also randomly | |
// mirrored left to right. | |
thread_local dlib::rand rnd; | |
std::vector<matrix<rgb_pixel>> crops; | |
for (int i = 0; i < 100; ++i) | |
crops.push_back(jitter_image(img, rnd)); | |
return crops; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment