Skip to content

Instantly share code, notes, and snippets.

@Dobiasd
Last active March 24, 2016 20:27
Show Gist options
  • Save Dobiasd/9234a8fe7ba958f79227 to your computer and use it in GitHub Desktop.
Save Dobiasd/9234a8fe7ba958f79227 to your computer and use it in GitHub Desktop.
#include <opencv2/opencv.hpp>
#include <functional>
#include <iostream>
#include <map>
#include <vector>
#include <string>
using namespace std;
using namespace cv;
bool GetVideoProperties( const string& filePath, double& fps, Size& frameSize, int& frameCount )
{
VideoCapture inputVideo;
string fileName( filePath );
inputVideo.open( fileName );
if ( !inputVideo.isOpened() )
return false;
frameCount = 0;
for (;;)
{
Mat frame;
if ( !inputVideo.read( frame ) )
break;
if ( frame.empty() )
return false;
if ( frame.channels() != 3 )
return false;
frameSize = frame.size();
++frameCount;
}
fps = inputVideo.get( CV_CAP_PROP_FPS );
return true;
}
Mat NumToNNValuesSimple( int num, int maxValue )
{
Mat result( Size( maxValue, 1 ), CV_32F, Scalar( 0.0f ) );
result.at<float>( Point( num, 0 ) ) = 1.0f;
return result;
}
Mat NumToNNValuesFraction( int num, int maxValue )
{
float fraction = static_cast<float>(num) / static_cast<float>(maxValue);
Mat result( Size( 1, 1 ), CV_32F );
result.at<float>( Point( 0, 0 ) ) = fraction;
return result;
}
Mat NumToNNValuesBinary( int num, int maxValue )
{
int neededValues = static_cast<int>(log2( maxValue )) + 1;
assert( num <= maxValue );
Mat result( Size( neededValues, 1 ), CV_32F );
for ( int i = 0; i < neededValues; ++i )
{
float nnValue = static_cast<float>((num >> i) % 2);
result.at<float>( Point( i, 0 ) ) = nnValue;
}
return result;
}
Mat ImageToNNValues( const Mat& img )
{
Mat result = img.reshape( 1, 1 );
result.convertTo( result, CV_32F );
result /= 256.0;
return result;
}
Mat NNValuesToImage( const Mat& values, const Size& frameSize )
{
Mat result = values.reshape( 3, frameSize.height );
result *= 256.0;
result.convertTo( result, CV_8UC3 );
return result;
}
typedef function<Mat( int, int )> NumToNNValuesFunc;
Mat ReconstructFrame( Ptr<ml::ANN_MLP> nnPtr, const Size& frameSize, int frameNum, int frameCount, const NumToNNValuesFunc& numToNNValues )
{
auto input = numToNNValues( frameNum, frameCount );
Mat output;
nnPtr->predict( input, output );
auto frame = NNValuesToImage( output, frameSize );
return frame;
}
bool ReconstructMovie( const string& nnFilePath, const Size& frameSize, int frameCount, double fps, const string& filePath, const NumToNNValuesFunc& numToNNValues )
{
cout << "Loading neural network" << endl;
Ptr<ml::ANN_MLP> nnPtr = nnPtr->load<ml::ANN_MLP>( nnFilePath );
if ( !nnPtr )
return false;
VideoWriter outputVideo;
int fourcc = CV_FOURCC('H','2','6','4');
outputVideo.open( filePath, fourcc, fps, frameSize );
if ( !outputVideo.isOpened() )
{
cout << "Could not open the output video for write: " << filePath << endl;
return false;
}
for ( int frameNum = 0; frameNum < frameCount; ++frameNum )
{
if ( frameNum == 0 || frameNum == frameCount - 1 || frameNum % 100 == 0 )
cout << "Reconstructing frame " << frameNum + 1 << " of " << frameCount << endl;
auto frame = ReconstructFrame( nnPtr, frameSize, frameNum, frameCount, numToNNValues );
outputVideo << frame;
}
cout << "Saved " << filePath << endl;
return true;
}
int main( int argc, char* argv[] )
{
if ( argc < 2 )
{
cout << "Please provide the input video filepath." << endl;
cout << "[filepath].nnvc and [filepath].nn.mp4 will then be written." << endl;
return 1;
}
string inFilePath = argv[1];
string nnFilePath = inFilePath + ".nncv";
string outFilePath = inFilePath + ".nn.mp4";
int maxIters = 1000;
double epsilon = 0.00000000001;
//NumToNNValuesFunc numToNNValues = &NumToNNValuesBinary;
//NumToNNValuesFunc numToNNValues = &NumToNNValuesFraction;
NumToNNValuesFunc numToNNValues = &NumToNNValuesSimple;
double fps = 0.0;
Size frameSize;
int frameCount = 0;
GetVideoProperties( inFilePath, fps, frameSize, frameCount );
cout << inFilePath << " - fps: " << fps << " - frameSize: " << frameSize << " - frameCount: " << frameCount << endl;
vector<int> layerSizes;
int inputLayerSize = numToNNValues( 0, frameCount ).cols;
int outputLayerSize = frameSize.area() * 3;
int hiddenLayerSize = static_cast<int>(sqrt( frameCount )) + 1;
layerSizes.push_back( inputLayerSize );
layerSizes.push_back( hiddenLayerSize );
layerSizes.push_back( hiddenLayerSize );
layerSizes.push_back( outputLayerSize );
Ptr<ml::ANN_MLP> nnPtr = ml::ANN_MLP::create();
nnPtr->setLayerSizes( layerSizes );
nnPtr->setActivationFunction( ml::ANN_MLP::SIGMOID_SYM );
nnPtr->setTrainMethod( ml::ANN_MLP::RPROP, 0.1, FLT_EPSILON );
nnPtr->setTermCriteria( TermCriteria( TermCriteria::MAX_ITER + TermCriteria::EPS, maxIters, epsilon ) );
Mat samples( Size( inputLayerSize, frameCount ), CV_32F );
Mat responses( Size( outputLayerSize, frameCount ), CV_32F );
VideoCapture inputVideo;
string fileName( inFilePath );
inputVideo.open( inFilePath );
if ( !inputVideo.isOpened() )
return false;
for ( int frameNum = 0; frameNum < frameCount; ++frameNum )
{
if ( frameNum == 0 || frameNum == frameCount - 1 || frameNum % 100 == 0 )
cout << "Loading frame " << frameNum + 1 << " of " << frameCount << endl;
Mat frame;
if ( !inputVideo.read( frame ) )
break;
if ( frame.empty() )
return 1;
auto imageNNValues = ImageToNNValues( frame );
auto frameNumNNValues = numToNNValues( frameNum, frameCount );
frameNumNNValues.copyTo( samples.row( frameNum ) );
imageNNValues.copyTo( responses.row( frameNum ) );
}
cout << "Training neural network" << endl;
nnPtr->train( samples, ml::ROW_SAMPLE, responses );
cout << "Saving neural network" << endl;
nnPtr->save( nnFilePath );
ReconstructMovie( nnFilePath, frameSize, frameCount, fps, outFilePath, numToNNValues );
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment