Created
October 5, 2019 19:11
-
-
Save berak/8de8b5f59bf814495e3314376096c80b to your computer and use it in GitHub Desktop.
resnet-34_kinetics action recognition
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <fstream> | |
#include <iostream> | |
#include <sstream> | |
#include <opencv2/dnn.hpp> | |
#include <opencv2/imgproc.hpp> | |
#include <opencv2/highgui.hpp> | |
using namespace cv; | |
using namespace dnn; | |
// spacing between samples | |
const double SAMPLE_DIST = 0.2; | |
// the input blob structure for the resnet-34_kinetics action recognition, | |
// (stolen from test_onnx_importer.cpp) | |
// | |
Mat blob5D(const std::vector<Mat> &images) { | |
Mat blob0 = blobFromImages(images, 1.0, Size(112, 112), Scalar(114.7748, 107.7354, 99.4750), true, true); | |
LayerParams lp; | |
int order[] = {1, 0, 2, 3}; | |
lp.set("order", DictValue::arrayInt<int*>(&order[0], 4)); | |
permute.addLayerToPrev("perm", "Permute", lp); | |
permute.setInput(blob0); | |
Mat input0 = permute.forward().clone(); | |
int dims[] = {1, 3, 16, 112, 112}; | |
return input0.reshape(0, 5, &dims[0]); | |
} | |
int main(int argc, char** argv) { | |
const std::string modelName = "c:/data/dnn/resnet-34_kinetics.onnx"; | |
const std::string className = "c:/data/dnn/action_recongnition_kinetics.txt"; | |
std::vector<std::string> classes; | |
std::ifstream ifs(className.c_str()); | |
if (ifs.is_open()) { | |
std::string line; | |
while (std::getline(ifs, line)) { | |
classes.push_back(line); | |
} | |
} | |
Net net = readNet(modelName); | |
VideoCapture cap(0); | |
// Process frames. | |
std::vector<Mat> frames; | |
Mat frame; | |
int64 t0 = getTickCount(); | |
int64 dt = SAMPLE_DIST * getTickFrequency(); | |
while (waitKey(10) < 0) { | |
cap >> frame; | |
if (frame.empty()) { | |
return 0; | |
} | |
int64 t1 = getTickCount(); | |
if ((t1 - t0 > dt) && (frames.size() < 16)) { | |
frames.push_back(frame); | |
std::cout << frames.size() << "\r"; | |
t0 = t1; | |
} | |
if (frames.size() == 16) { | |
//! [Create a 5D(!) blob from a batch of 16 frames] | |
Mat blob = blob5D(frames); | |
net.setInput(blob); | |
Mat prob = net.forward(); | |
//! [Get the class with the highest score] | |
Point classIdPoint; | |
double confidence; | |
minMaxLoc(prob.reshape(1, 1), 0, &confidence, 0, &classIdPoint); | |
int classId = classIdPoint.x; | |
std::string cls = ""; | |
if (classes.size()) { | |
cls = classes[classId]; | |
} | |
std::cout << format("%4d %3.3f %s", classId, confidence, cls.c_str()) << std::endl; | |
frames.clear(); | |
} | |
imshow("AR", frame); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment