jcausey-astate/three_layer_NN_with_eigen.cpp

## three_layer_NN_with_eigen.cpp
/**
* EXAMPLE FROM: https://iamtrask.github.io/2015/07/12/basic-python-network/
*  I'm replicating the "3 Layer Neural Network" code using C++ and Eigen below.
*  This network is trying to learn to recognize an XOR in the first two elements
*  of a 3-value vector, the third value doesn't matter.
*/

#include <iostream>
#include <fstream>
#include <ctime>
#include <cmath>
#include "Eigen/Core"  // change to <Eigen/Core> if you fully installed Eigen.
using Eigen::MatrixXd;

/**
 * Returns a matrix of ones in the same shape as the paramter `m`.
 *
 * @param m  example matrix
 * @return   a matrix of ones in the same shape as the paramter `m`.
 */
MatrixXd ones_like(const MatrixXd& m){
    return MatrixXd::Ones(m.rows(), m.cols());
}

/**
 * The sigmoid function, and its derivative.
 *
 * @param x         A matrix to apply sigmoid to.
 * @param deriv     flag indicating if the derivative should be computed
 *
 * @return  sigmoid(x) if deriv == false, or the derivative of sigmoid(x) otherwise
 */
MatrixXd sigmoid(const MatrixXd& x, bool deriv=false){
    if (deriv)
        return x.cwiseProduct(ones_like(x) - x);
    return 1. / (1. + x.array().exp());
}

int main(int argc, char** argv) {
    srand(2018);        // seeds the PRNG with a fixed value, so same "random" on each run.
    //srand(time(0));   // un-comment this to get different results each run.
    double learning_rate = .5; // Adjust to change speed of learning.  Valid range is (0,1]
    int n_epochs = 100000;     // Default # of epochs (user can specify on command line as well).
    // User can supply # of epochs on the command line:  ./a.out  60000
    if(argc > 1){
        n_epochs = atoi(argv[1]);
    }

    MatrixXd X{4,3};    // 4 x 3 matrix of input values
    X << 0, 0, 1,       // Eigen uses << with a comma-separated
         0, 1, 1,       // list of values to initialize
         1, 0, 1,       // with known values.  This is a bit odd,
         1, 1, 1;       // but pretty easy to work with.

    MatrixXd y{4,1};    // 4 x 1 matrix of truth values
    y << 0, 1, 1, 0;

    MatrixXd syn0 = 2. * MatrixXd::Random(3,4) - MatrixXd::Ones(3,4); // Weights for first fully-connected layer
    MatrixXd syn1 = 2. * MatrixXd::Random(4,1) - MatrixXd::Ones(4,1); // Weights for second fully-connected layer

    std::cout << "X:\n" << X << "\n\n";
    std::cout << "y:\n" << y << "\n\n";
    std::cout << "syn0:\n" << syn0 << "\n\n";
    std::cout << "syn1:\n" << syn1 << "\n\n";

    for(int j = 0; j < n_epochs; ++j){
        MatrixXd l0 = X;                  // forward pass -- input layer (layer 0)
        MatrixXd l1 = sigmoid(l0 * syn0); // forward pass -- layer 1
        MatrixXd l2 = sigmoid(l1 * syn1); // forward pass -- layer 2
        // std::cout << "l1\n" << l1 << "\n";
        // std::cout << "l2\n" << l2 << '\n';

        // How much did we miss the target?
        MatrixXd l2_error = y - l2;
        if (j % 10000 == 0 || n_epochs - j == 1){
            // std::cout << "Predicted:\n" << l2 << "\n";
            // std::cout << "Error:\n" << l2_error << '\n';
            std::cout << "Ep: " << j << " Error: " <<  l2_error.array().abs().mean() << '\n';
        }

        // Backprop the error:
        MatrixXd l2_delta = l2_error.cwiseProduct(sigmoid(l2, true));   // determine how much to adjust l2
        MatrixXd l1_error = l2_delta * syn1.transpose();                // how much error from l1?
        MatrixXd l1_delta = l1_error.cwiseProduct(sigmoid(l1, true));   // determine how much to adjust l1

        // if (j % 10000 == 0){
        //     std::cout << "l1_delta\n" << l1_delta << "\n";
        //     std::cout << "l2_delta\n" << l2_delta << '\n';
        // }

        // Apply the backprop adjustments:
        syn1 += learning_rate * l1.transpose() * l2_delta;
        syn0 += learning_rate * l0.transpose() * l1_delta;
        // std::cout << "syn0:\n" << syn0 << "\n";
        // std::cout << "syn1:\n" << syn1 << "\n";
    }

    // Save weights:
    std::ofstream fout{"weights.txt"};
    fout << syn0 << "\n\n" << syn1;
    fout.close();

    return 0;
}
	/**
	* EXAMPLE FROM: https://iamtrask.github.io/2015/07/12/basic-python-network/
	* I'm replicating the "3 Layer Neural Network" code using C++ and Eigen below.
	* This network is trying to learn to recognize an XOR in the first two elements
	* of a 3-value vector, the third value doesn't matter.
	*/

	#include <iostream>
	#include <fstream>
	#include <ctime>
	#include <cmath>
	#include "Eigen/Core" // change to <Eigen/Core> if you fully installed Eigen.
	using Eigen::MatrixXd;

	/**
	* Returns a matrix of ones in the same shape as the paramter `m`.
	*
	* @param m example matrix
	* @return a matrix of ones in the same shape as the paramter `m`.
	*/
	MatrixXd ones_like(const MatrixXd& m){
	return MatrixXd::Ones(m.rows(), m.cols());
	}

	/**
	* The sigmoid function, and its derivative.
	*
	* @param x A matrix to apply sigmoid to.
	* @param deriv flag indicating if the derivative should be computed
	*
	* @return sigmoid(x) if deriv == false, or the derivative of sigmoid(x) otherwise
	*/
	MatrixXd sigmoid(const MatrixXd& x, bool deriv=false){
	if (deriv)
	return x.cwiseProduct(ones_like(x) - x);
	return 1. / (1. + x.array().exp());
	}

	int main(int argc, char** argv) {
	srand(2018); // seeds the PRNG with a fixed value, so same "random" on each run.
	//srand(time(0)); // un-comment this to get different results each run.
	double learning_rate = .5; // Adjust to change speed of learning. Valid range is (0,1]
	int n_epochs = 100000; // Default # of epochs (user can specify on command line as well).
	// User can supply # of epochs on the command line: ./a.out 60000
	if(argc > 1){
	n_epochs = atoi(argv[1]);
	}

	MatrixXd X{4,3}; // 4 x 3 matrix of input values
	X << 0, 0, 1, // Eigen uses << with a comma-separated
	0, 1, 1, // list of values to initialize
	1, 0, 1, // with known values. This is a bit odd,
	1, 1, 1; // but pretty easy to work with.

	MatrixXd y{4,1}; // 4 x 1 matrix of truth values
	y << 0, 1, 1, 0;

	MatrixXd syn0 = 2. * MatrixXd::Random(3,4) - MatrixXd::Ones(3,4); // Weights for first fully-connected layer
	MatrixXd syn1 = 2. * MatrixXd::Random(4,1) - MatrixXd::Ones(4,1); // Weights for second fully-connected layer

	std::cout << "X:\n" << X << "\n\n";
	std::cout << "y:\n" << y << "\n\n";
	std::cout << "syn0:\n" << syn0 << "\n\n";
	std::cout << "syn1:\n" << syn1 << "\n\n";

	for(int j = 0; j < n_epochs; ++j){
	MatrixXd l0 = X; // forward pass -- input layer (layer 0)
	MatrixXd l1 = sigmoid(l0 * syn0); // forward pass -- layer 1
	MatrixXd l2 = sigmoid(l1 * syn1); // forward pass -- layer 2
	// std::cout << "l1\n" << l1 << "\n";
	// std::cout << "l2\n" << l2 << '\n';

	// How much did we miss the target?
	MatrixXd l2_error = y - l2;
	if (j % 10000 == 0 \|\| n_epochs - j == 1){
	// std::cout << "Predicted:\n" << l2 << "\n";
	// std::cout << "Error:\n" << l2_error << '\n';
	std::cout << "Ep: " << j << " Error: " << l2_error.array().abs().mean() << '\n';
	}

	// Backprop the error:
	MatrixXd l2_delta = l2_error.cwiseProduct(sigmoid(l2, true)); // determine how much to adjust l2
	MatrixXd l1_error = l2_delta * syn1.transpose(); // how much error from l1?
	MatrixXd l1_delta = l1_error.cwiseProduct(sigmoid(l1, true)); // determine how much to adjust l1

	// if (j % 10000 == 0){
	// std::cout << "l1_delta\n" << l1_delta << "\n";
	// std::cout << "l2_delta\n" << l2_delta << '\n';
	// }

	// Apply the backprop adjustments:
	syn1 += learning_rate * l1.transpose() * l2_delta;
	syn0 += learning_rate * l0.transpose() * l1_delta;
	// std::cout << "syn0:\n" << syn0 << "\n";
	// std::cout << "syn1:\n" << syn1 << "\n";
	}

	// Save weights:
	std::ofstream fout{"weights.txt"};
	fout << syn0 << "\n\n" << syn1;
	fout.close();

	return 0;
	}