Aakash-kaushik/main.cpp

## main.cpp
int main()
{
  constexpr double RATIO = 0.1; // ratio to divide the data in train and val set.
  constexpr int MAX_ITERATIONS = 0; // set to zero to allow infinite iterations.
  constexpr double STEP_SIZE = 1.2e-3;// step size for Adam optimizer.
  constexpr int BATCH_SIZE = 50;
  constexpr size_t EPOCH_I = 2;
  cout << "Reading data ..." << endl;
  mat tempDataset;
  data::Load("../data/mnist_train.csv", tempDataset, true);
  mat dataset =
      tempDataset.submat(0, 1, tempDataset.n_rows - 1, tempDataset.n_cols - 1);
  mat train, valid;
  data::Split(dataset, train, valid, RATIO);
  const mat trainX = train.submat(1, 0, train.n_rows - 1, train.n_cols - 1);
  const mat validX = valid.submat(1, 0, valid.n_rows - 1, valid.n_cols - 1);
  const mat trainY = train.row(0) + 1;
  const mat validY = valid.row(0) + 1;
  FFN<NegativeLogLikelihood<>, RandomInitialization> model;

  // Layers schema.
  // 28x28x1 --- conv (6 filters of size 5x5. stride = 1) ---> 24x24x6
  // 24x24x6 --------------- Leaky ReLU ---------------------> 24x24x6
  // 24x24x6 --- max pooling (over 2x2 fields. stride = 2) --> 12x12x6
  // 12x12x6 --- conv (16 filters of size 5x5. stride = 1) --> 8x8x16
  // 8x8x16  --------------- Leaky ReLU ---------------------> 8x8x16
  // 8x8x16  --- max pooling (over 2x2 fields. stride = 2) --> 4x4x16
  // 4x4x16  ------------------- Dense ----------------------> 10
  model.Add<Convolution<>>(1,  // Number of input activation maps.
                           6,  // Number of output activation maps.
                           5,  // Filter width.
                           5,  // Filter height.
                           1,  // Stride along width.
                           1,  // Stride along height.
                           0,  // Padding width.
                           0,  // Padding height.
                           28, // Input width.
                           28  // Input height.
  );
  model.Add<ReLULayer<>>();
  model.Add<MaxPooling<>>(2, // Width of field.
                          2, // Height of field.
                          2, // Stride along width.
                          2, // Stride along height.
                          true);
  model.Add<Convolution<>>(6,  // Number of input activation maps.
                           16, // Number of output activation maps.
                           5,  // Filter width.
                           5,  // Filter height.
                           1,  // Stride along width.
                           1,  // Stride along height.
                           0,  // Padding width.
                           0,  // Padding height.
                           12, // Input width.
                           12  // Input height.
  );

  model.Add<ReLULayer<>>();
  model.Add<MaxPooling<>>(2, 2, 2, 2, true);
  model.Add<Linear<>>(16 * 4 * 4, 10);
  model.Add<LogSoftMax<>>();

  cout << "Start training ..." << endl;

  ens::Adam optimizer(
      STEP_SIZE,  // Step size of the optimizer.
      BATCH_SIZE, // Batch size. Number of data points that are used in each
                  // iteration.
      0.9,        // Exponential decay rate for the first moment estimates.
      0.999, // Exponential decay rate for the weighted infinity norm estimates.
      1e-8,  // Value used to initialise the mean squared gradient parameter.
      MAX_ITERATIONS, // Max number of iterations.
      1e-8,           // Tolerance.
      true);

  model.Train(trainX,
              trainY,
              optimizer,
              ens::PrintLoss(),
              ens::ProgressBar(),
              ens::EarlyStopAtMinLoss(EPOCH_I),
              ens::EarlyStopAtMinLoss(
                  [&](const arma::mat& /* param */)
                  {
                    double validationLoss = model.Evaluate(validX, validY);
                    std::cout << "Validation loss: " << validationLoss
                        << "." << std::endl;
                    return validationLoss;
                  }));

  mat predOut;
  model.Predict(trainX, predOut);
  arma::Row<size_t> predLabels = getLabels(predOut);
  double trainAccuracy =
      arma::accu(predLabels == trainY) / ( double )trainY.n_elem * 100;
  model.Predict(validX, predOut);
  predLabels = getLabels(predOut);
  double validAccuracy =
      arma::accu(predLabels == validY) / ( double )validY.n_elem * 100;

  std::cout << "Accuracy: train = " << trainAccuracy << "%,"
            << "\t valid = " << validAccuracy << "%" << std::endl;

  mlpack::data::Save("model.bin", "model", model, false);

  std::cout << "Predicting ..." << std::endl;
  data::Load("../data/mnist_test.csv", tempDataset, true);
  mat testX =
      tempDataset.submat(0, 1, tempDataset.n_rows - 1, tempDataset.n_cols - 1);

  mat testPredOut;
  model.Predict(testX, testPredOut);
  Row<size_t> testPred = getLabels(testPredOut);
  std::cout << "Saving predicted labels to \"results.csv.\"..." << std::endl;
  testPred.save("results.csv", arma::csv_ascii);
  std::cout << "Neural network model is saved to \"model.bin\"" << std::endl;
  std::cout << "Finished" << std::endl;
}
	int main()
	{
	constexpr double RATIO = 0.1; // ratio to divide the data in train and val set.
	constexpr int MAX_ITERATIONS = 0; // set to zero to allow infinite iterations.
	constexpr double STEP_SIZE = 1.2e-3;// step size for Adam optimizer.
	constexpr int BATCH_SIZE = 50;
	constexpr size_t EPOCH_I = 2;
	cout << "Reading data ..." << endl;
	mat tempDataset;
	data::Load("../data/mnist_train.csv", tempDataset, true);
	mat dataset =
	tempDataset.submat(0, 1, tempDataset.n_rows - 1, tempDataset.n_cols - 1);
	mat train, valid;
	data::Split(dataset, train, valid, RATIO);
	const mat trainX = train.submat(1, 0, train.n_rows - 1, train.n_cols - 1);
	const mat validX = valid.submat(1, 0, valid.n_rows - 1, valid.n_cols - 1);
	const mat trainY = train.row(0) + 1;
	const mat validY = valid.row(0) + 1;
	FFN<NegativeLogLikelihood<>, RandomInitialization> model;

	// Layers schema.
	// 28x28x1 --- conv (6 filters of size 5x5. stride = 1) ---> 24x24x6
	// 24x24x6 --------------- Leaky ReLU ---------------------> 24x24x6
	// 24x24x6 --- max pooling (over 2x2 fields. stride = 2) --> 12x12x6
	// 12x12x6 --- conv (16 filters of size 5x5. stride = 1) --> 8x8x16
	// 8x8x16 --------------- Leaky ReLU ---------------------> 8x8x16
	// 8x8x16 --- max pooling (over 2x2 fields. stride = 2) --> 4x4x16
	// 4x4x16 ------------------- Dense ----------------------> 10
	model.Add<Convolution<>>(1, // Number of input activation maps.
	6, // Number of output activation maps.
	5, // Filter width.
	5, // Filter height.
	1, // Stride along width.
	1, // Stride along height.
	0, // Padding width.
	0, // Padding height.
	28, // Input width.
	28 // Input height.
	);
	model.Add<ReLULayer<>>();
	model.Add<MaxPooling<>>(2, // Width of field.
	2, // Height of field.
	2, // Stride along width.
	2, // Stride along height.
	true);
	model.Add<Convolution<>>(6, // Number of input activation maps.
	16, // Number of output activation maps.
	5, // Filter width.
	5, // Filter height.
	1, // Stride along width.
	1, // Stride along height.
	0, // Padding width.
	0, // Padding height.
	12, // Input width.
	12 // Input height.
	);

	model.Add<ReLULayer<>>();
	model.Add<MaxPooling<>>(2, 2, 2, 2, true);
	model.Add<Linear<>>(16 * 4 * 4, 10);
	model.Add<LogSoftMax<>>();

	cout << "Start training ..." << endl;

	ens::Adam optimizer(
	STEP_SIZE, // Step size of the optimizer.
	BATCH_SIZE, // Batch size. Number of data points that are used in each
	// iteration.
	0.9, // Exponential decay rate for the first moment estimates.
	0.999, // Exponential decay rate for the weighted infinity norm estimates.
	1e-8, // Value used to initialise the mean squared gradient parameter.
	MAX_ITERATIONS, // Max number of iterations.
	1e-8, // Tolerance.
	true);

	model.Train(trainX,
	trainY,
	optimizer,
	ens::PrintLoss(),
	ens::ProgressBar(),
	ens::EarlyStopAtMinLoss(EPOCH_I),
	ens::EarlyStopAtMinLoss(
	[&](const arma::mat& /* param */)
	{
	double validationLoss = model.Evaluate(validX, validY);
	std::cout << "Validation loss: " << validationLoss
	<< "." << std::endl;
	return validationLoss;
	}));

	mat predOut;
	model.Predict(trainX, predOut);
	arma::Row<size_t> predLabels = getLabels(predOut);
	double trainAccuracy =
	arma::accu(predLabels == trainY) / ( double )trainY.n_elem * 100;
	model.Predict(validX, predOut);
	predLabels = getLabels(predOut);
	double validAccuracy =
	arma::accu(predLabels == validY) / ( double )validY.n_elem * 100;

	std::cout << "Accuracy: train = " << trainAccuracy << "%,"
	<< "\t valid = " << validAccuracy << "%" << std::endl;

	mlpack::data::Save("model.bin", "model", model, false);

	std::cout << "Predicting ..." << std::endl;
	data::Load("../data/mnist_test.csv", tempDataset, true);
	mat testX =
	tempDataset.submat(0, 1, tempDataset.n_rows - 1, tempDataset.n_cols - 1);

	mat testPredOut;
	model.Predict(testX, testPredOut);
	Row<size_t> testPred = getLabels(testPredOut);
	std::cout << "Saving predicted labels to \"results.csv.\"..." << std::endl;
	testPred.save("results.csv", arma::csv_ascii);
	std::cout << "Neural network model is saved to \"model.bin\"" << std::endl;
	std::cout << "Finished" << std::endl;
	}