Dobiasd/VideoCompression_with_NeuralNetworks.cpp Secret

## VideoCompression_with_NeuralNetworks.cpp
#include <opencv2/opencv.hpp>
#include <functional>
#include <iostream>
#include <map>
#include <vector>
#include <string>

using namespace std;
using namespace cv;

bool GetVideoProperties( const string& filePath, double& fps, Size& frameSize, int& frameCount )
{
    VideoCapture inputVideo;
    string fileName( filePath );
    inputVideo.open( fileName );
    if ( !inputVideo.isOpened() )
        return false;
    frameCount = 0;
    for (;;)
    {
        Mat frame;
        if ( !inputVideo.read( frame ) )
            break;
        if ( frame.empty() )
            return false;
        if ( frame.channels() != 3 )
            return false;
        frameSize = frame.size();
        ++frameCount;
    }
    fps = inputVideo.get( CV_CAP_PROP_FPS );
    return true;
}

Mat NumToNNValuesSimple( int num, int maxValue )
{
    Mat result( Size( maxValue, 1 ), CV_32F, Scalar( 0.0f ) );
    result.at<float>( Point( num, 0 ) ) = 1.0f;
    return result;
}

Mat NumToNNValuesFraction( int num, int maxValue )
{
    float fraction = static_cast<float>(num) / static_cast<float>(maxValue);
    Mat result( Size( 1, 1 ), CV_32F );
    result.at<float>( Point( 0, 0 ) ) = fraction;
    return result;
}

Mat NumToNNValuesBinary( int num, int maxValue )
{
    int neededValues = static_cast<int>(log2( maxValue )) + 1;
    assert( num <= maxValue );
    Mat result( Size( neededValues, 1 ), CV_32F );
    for ( int i = 0; i < neededValues; ++i )
    {
        float nnValue = static_cast<float>((num >> i) % 2);
        result.at<float>( Point( i, 0 ) ) = nnValue;
    }
    return result;
}

Mat ImageToNNValues( const Mat& img )
{
    Mat result = img.reshape( 1, 1 );
    result.convertTo( result, CV_32F );
    result /= 256.0;
    return result;
}

Mat NNValuesToImage( const Mat& values, const Size& frameSize )
{
    Mat result = values.reshape( 3, frameSize.height );
    result *= 256.0;
    result.convertTo( result, CV_8UC3 );
    return result;
}

typedef function<Mat( int, int )> NumToNNValuesFunc;

Mat ReconstructFrame( Ptr<ml::ANN_MLP> nnPtr, const Size& frameSize, int frameNum, int frameCount, const NumToNNValuesFunc& numToNNValues )
{
    auto input = numToNNValues( frameNum, frameCount );
    Mat output;
    nnPtr->predict( input, output );
    auto frame = NNValuesToImage( output, frameSize );
    return frame;
}

bool ReconstructMovie( const string& nnFilePath, const Size& frameSize, int frameCount, double fps, const string& filePath, const NumToNNValuesFunc& numToNNValues )
{
    cout << "Loading neural network" << endl;
    Ptr<ml::ANN_MLP> nnPtr = nnPtr->load<ml::ANN_MLP>( nnFilePath );
    if ( !nnPtr )
        return false;

    VideoWriter outputVideo;
    int fourcc = CV_FOURCC('H','2','6','4');
    outputVideo.open( filePath, fourcc, fps, frameSize );

    if ( !outputVideo.isOpened() )
    {
        cout << "Could not open the output video for write: " << filePath << endl;
        return false;
    }

    for ( int frameNum = 0; frameNum < frameCount; ++frameNum )
    {
        if ( frameNum == 0 || frameNum == frameCount - 1 || frameNum % 100 == 0 )
            cout << "Reconstructing frame " << frameNum + 1 << " of " << frameCount << endl;
        auto frame = ReconstructFrame( nnPtr, frameSize, frameNum, frameCount, numToNNValues );
        outputVideo << frame;
    }

    cout << "Saved " << filePath << endl;
    return true;
}

int main( int argc, char* argv[] )
{
    if ( argc < 2 )
    {
        cout << "Please provide the input video filepath." << endl;
        cout << "[filepath].nnvc and [filepath].nn.mp4 will then be written." << endl;
        return 1;
    }

    string inFilePath = argv[1];

    string nnFilePath = inFilePath + ".nncv";
    string outFilePath = inFilePath + ".nn.mp4";
    int maxIters = 1000;
    double epsilon = 0.00000000001;

    //NumToNNValuesFunc numToNNValues = &NumToNNValuesBinary;
    //NumToNNValuesFunc numToNNValues = &NumToNNValuesFraction;
    NumToNNValuesFunc numToNNValues = &NumToNNValuesSimple;

    double fps = 0.0;
    Size frameSize;
    int frameCount = 0;
    GetVideoProperties( inFilePath, fps, frameSize, frameCount );

    cout << inFilePath << " - fps: " << fps << " - frameSize: " << frameSize << " - frameCount: " << frameCount << endl;

    vector<int> layerSizes;

    int inputLayerSize = numToNNValues( 0, frameCount ).cols;
    int outputLayerSize = frameSize.area() * 3;
    int hiddenLayerSize = static_cast<int>(sqrt( frameCount )) + 1;
    layerSizes.push_back( inputLayerSize );
    layerSizes.push_back( hiddenLayerSize );
    layerSizes.push_back( hiddenLayerSize );
    layerSizes.push_back( outputLayerSize );

    Ptr<ml::ANN_MLP> nnPtr = ml::ANN_MLP::create();
    nnPtr->setLayerSizes( layerSizes );
    nnPtr->setActivationFunction( ml::ANN_MLP::SIGMOID_SYM );
    nnPtr->setTrainMethod( ml::ANN_MLP::RPROP, 0.1, FLT_EPSILON );
    nnPtr->setTermCriteria( TermCriteria( TermCriteria::MAX_ITER + TermCriteria::EPS, maxIters, epsilon ) );

    Mat samples( Size( inputLayerSize, frameCount ), CV_32F );
    Mat responses( Size( outputLayerSize, frameCount ), CV_32F );

    VideoCapture inputVideo;
    string fileName( inFilePath );
    inputVideo.open( inFilePath );
    if ( !inputVideo.isOpened() )
        return false;

    for ( int frameNum = 0; frameNum < frameCount; ++frameNum )
    {
        if ( frameNum == 0 || frameNum == frameCount - 1 || frameNum % 100 == 0 )
            cout << "Loading frame " << frameNum + 1 << " of " << frameCount << endl;
        Mat frame;
        if ( !inputVideo.read( frame ) )
            break;
        if ( frame.empty() )
            return 1;
        auto imageNNValues = ImageToNNValues( frame );
        auto frameNumNNValues = numToNNValues( frameNum, frameCount );
        frameNumNNValues.copyTo( samples.row( frameNum ) );
        imageNNValues.copyTo( responses.row( frameNum ) );
    }

    cout << "Training neural network" << endl;
    nnPtr->train( samples, ml::ROW_SAMPLE, responses );

    cout << "Saving neural network" << endl;
    nnPtr->save( nnFilePath );

    ReconstructMovie( nnFilePath, frameSize, frameCount, fps, outFilePath, numToNNValues );
}
	#include <opencv2/opencv.hpp>
	#include <functional>
	#include <iostream>
	#include <map>
	#include <vector>
	#include <string>

	using namespace std;
	using namespace cv;

	bool GetVideoProperties( const string& filePath, double& fps, Size& frameSize, int& frameCount )
	{
	VideoCapture inputVideo;
	string fileName( filePath );
	inputVideo.open( fileName );
	if ( !inputVideo.isOpened() )
	return false;
	frameCount = 0;
	for (;;)
	{
	Mat frame;
	if ( !inputVideo.read( frame ) )
	break;
	if ( frame.empty() )
	return false;
	if ( frame.channels() != 3 )
	return false;
	frameSize = frame.size();
	++frameCount;
	}
	fps = inputVideo.get( CV_CAP_PROP_FPS );
	return true;
	}

	Mat NumToNNValuesSimple( int num, int maxValue )
	{
	Mat result( Size( maxValue, 1 ), CV_32F, Scalar( 0.0f ) );
	result.at<float>( Point( num, 0 ) ) = 1.0f;
	return result;
	}

	Mat NumToNNValuesFraction( int num, int maxValue )
	{
	float fraction = static_cast<float>(num) / static_cast<float>(maxValue);
	Mat result( Size( 1, 1 ), CV_32F );
	result.at<float>( Point( 0, 0 ) ) = fraction;
	return result;
	}

	Mat NumToNNValuesBinary( int num, int maxValue )
	{
	int neededValues = static_cast<int>(log2( maxValue )) + 1;
	assert( num <= maxValue );
	Mat result( Size( neededValues, 1 ), CV_32F );
	for ( int i = 0; i < neededValues; ++i )
	{
	float nnValue = static_cast<float>((num >> i) % 2);
	result.at<float>( Point( i, 0 ) ) = nnValue;
	}
	return result;
	}

	Mat ImageToNNValues( const Mat& img )
	{
	Mat result = img.reshape( 1, 1 );
	result.convertTo( result, CV_32F );
	result /= 256.0;
	return result;
	}

	Mat NNValuesToImage( const Mat& values, const Size& frameSize )
	{
	Mat result = values.reshape( 3, frameSize.height );
	result *= 256.0;
	result.convertTo( result, CV_8UC3 );
	return result;
	}

	typedef function<Mat( int, int )> NumToNNValuesFunc;

	Mat ReconstructFrame( Ptr<ml::ANN_MLP> nnPtr, const Size& frameSize, int frameNum, int frameCount, const NumToNNValuesFunc& numToNNValues )
	{
	auto input = numToNNValues( frameNum, frameCount );
	Mat output;
	nnPtr->predict( input, output );
	auto frame = NNValuesToImage( output, frameSize );
	return frame;
	}

	bool ReconstructMovie( const string& nnFilePath, const Size& frameSize, int frameCount, double fps, const string& filePath, const NumToNNValuesFunc& numToNNValues )
	{
	cout << "Loading neural network" << endl;
	Ptr<ml::ANN_MLP> nnPtr = nnPtr->load<ml::ANN_MLP>( nnFilePath );
	if ( !nnPtr )
	return false;

	VideoWriter outputVideo;
	int fourcc = CV_FOURCC('H','2','6','4');
	outputVideo.open( filePath, fourcc, fps, frameSize );

	if ( !outputVideo.isOpened() )
	{
	cout << "Could not open the output video for write: " << filePath << endl;
	return false;
	}

	for ( int frameNum = 0; frameNum < frameCount; ++frameNum )
	{
	if ( frameNum == 0 \|\| frameNum == frameCount - 1 \|\| frameNum % 100 == 0 )
	cout << "Reconstructing frame " << frameNum + 1 << " of " << frameCount << endl;
	auto frame = ReconstructFrame( nnPtr, frameSize, frameNum, frameCount, numToNNValues );
	outputVideo << frame;
	}

	cout << "Saved " << filePath << endl;
	return true;
	}

	int main( int argc, char* argv[] )
	{
	if ( argc < 2 )
	{
	cout << "Please provide the input video filepath." << endl;
	cout << "[filepath].nnvc and [filepath].nn.mp4 will then be written." << endl;
	return 1;
	}

	string inFilePath = argv[1];

	string nnFilePath = inFilePath + ".nncv";
	string outFilePath = inFilePath + ".nn.mp4";
	int maxIters = 1000;
	double epsilon = 0.00000000001;

	//NumToNNValuesFunc numToNNValues = &NumToNNValuesBinary;
	//NumToNNValuesFunc numToNNValues = &NumToNNValuesFraction;
	NumToNNValuesFunc numToNNValues = &NumToNNValuesSimple;

	double fps = 0.0;
	Size frameSize;
	int frameCount = 0;
	GetVideoProperties( inFilePath, fps, frameSize, frameCount );

	cout << inFilePath << " - fps: " << fps << " - frameSize: " << frameSize << " - frameCount: " << frameCount << endl;

	vector<int> layerSizes;

	int inputLayerSize = numToNNValues( 0, frameCount ).cols;
	int outputLayerSize = frameSize.area() * 3;
	int hiddenLayerSize = static_cast<int>(sqrt( frameCount )) + 1;
	layerSizes.push_back( inputLayerSize );
	layerSizes.push_back( hiddenLayerSize );
	layerSizes.push_back( hiddenLayerSize );
	layerSizes.push_back( outputLayerSize );

	Ptr<ml::ANN_MLP> nnPtr = ml::ANN_MLP::create();
	nnPtr->setLayerSizes( layerSizes );
	nnPtr->setActivationFunction( ml::ANN_MLP::SIGMOID_SYM );
	nnPtr->setTrainMethod( ml::ANN_MLP::RPROP, 0.1, FLT_EPSILON );
	nnPtr->setTermCriteria( TermCriteria( TermCriteria::MAX_ITER + TermCriteria::EPS, maxIters, epsilon ) );

	Mat samples( Size( inputLayerSize, frameCount ), CV_32F );
	Mat responses( Size( outputLayerSize, frameCount ), CV_32F );

	VideoCapture inputVideo;
	string fileName( inFilePath );
	inputVideo.open( inFilePath );
	if ( !inputVideo.isOpened() )
	return false;

	for ( int frameNum = 0; frameNum < frameCount; ++frameNum )
	{
	if ( frameNum == 0 \|\| frameNum == frameCount - 1 \|\| frameNum % 100 == 0 )
	cout << "Loading frame " << frameNum + 1 << " of " << frameCount << endl;
	Mat frame;
	if ( !inputVideo.read( frame ) )
	break;
	if ( frame.empty() )
	return 1;
	auto imageNNValues = ImageToNNValues( frame );
	auto frameNumNNValues = numToNNValues( frameNum, frameCount );
	frameNumNNValues.copyTo( samples.row( frameNum ) );
	imageNNValues.copyTo( responses.row( frameNum ) );
	}

	cout << "Training neural network" << endl;
	nnPtr->train( samples, ml::ROW_SAMPLE, responses );

	cout << "Saving neural network" << endl;
	nnPtr->save( nnFilePath );

	ReconstructMovie( nnFilePath, frameSize, frameCount, fps, outFilePath, numToNNValues );
	}