zoq/dropconnect_layer.hpp

## dropconnect_layer.hpp
/**
 * @file dropout_layer.hpp
 * @author Palash Ahuja
 *
 * Definition of the DropConnectLayer class, which implements a regularizer that
 * randomly sets the connections to zero. Preventing units from co-adapting.
 */
#ifndef __MLPACK_METHODS_ANN_LAYER_DROPCONNECT_LAYER_HPP
#define __MLPACK_METHODS_ANN_LAYER_DROPCONNECT_LAYER_HPP

#include <mlpack/core.hpp>

namespace mlpack {
namespace ann/** Artificial Neural Network. */ {
    /**
     *  The DropConnect layer is a regularizer that randomly with probability ratio
     *  sets the connection values to zero and scales the remaining elements by factor 1 /
     *  (1 - ratio). If rescale is true the input is scaled with 1 / (1-p) when
     *  deterministic is false. In the deterministic mode (during testing), the layer
     *  just scales the output.
     *
     *  Note: During training you should set deterministic to false and during
     *  testing you should set deterministic to true.
     *
     *  For more information, see the following.
     *  @inproceedings{icml2013_wan13,
     *  Publisher = {JMLR Workshop and Conference Proceedings},
     *  Title = {Regularization of Neural Networks using DropConnect},
     *  Url = {http://jmlr.org/proceedings/papers/v28/wan13.pdf},
     *  Booktitle = {Proceedings of the 30th International Conference on Machine Learning (ICML-13)},
     *  Author = {Li Wan and Matthew Zeiler and Sixin Zhang and Yann L. Cun and Rob Fergus},
     *  Number = {3},
     *  Month = may,
     *  Volume = {28},
     *  Editor = {Sanjoy Dasgupta and David Mcallester},
     *  Year = {2013},
     *  Pages = {1058-1066},
     *  Abstract = {We introduce DropConnect, a generalization of DropOut, for regularizing large fully-connected layers within neural networks.    *  When training with Dropout, a randomly selected subset of activations are set to zero within each layer. DropConnect instead sets a      * randomly selected subset of weights within the network to zero. Each unit thus receives input from a random subset of units in the previous layer. We derive a bound on the generalization performance of both Dropout and DropConnect. We then evaluate DropConnect on a range of datasets, comparing to Dropout, and show state-of-the-art results on several image recoginition benchmarks can be obtained by aggregating multiple DropConnect-trained models.}
     * }
     */

template<
          typename InputLayer = LinearLayer<>,
          typename InputDataType = arma::mat,
          typename OutputDataType = arma::mat
>
  class DropConnectLayer {
    public:

    /**
     * Create the DropConnectLayer object using the specified ratio and rescale
     * parameter.
     *
     * @param ratio The probability of setting a connection to zero.
     * @param rescale If true the input is rescaled when deterministic is False.
     */
    template<typename InputLayerType>
    DropConnectLayer(InputLayerType &&inputLayer,
                     const double ratio = 0.5,
                     const bool rescale = true) :
            baseLayer(std::forward<InputLayerType>(inputLayer)),
            ratio(ratio),
            scale(1.0),
            rescale(rescale),
            seq(0),
            run(true)
    {
    }

    /**
    * Ordinary feed forward pass of the DropConnect layer.
    *
    * @param input Input data used for evaluating the specified function.
    * @param output Resulting output activation.
    */
    template<typename eT>
    void Forward(const arma::Mat <eT> &input, arma::Mat <eT> &output) {


      // The DropConnect mask will not be multiplied in the deterministic mode
      // (during testing).
      if (deterministic) {
        // if (!rescale) {
        //   baseLayer.Forward(input, output);
        // }
        // else {
          baseLayer.Forward(input, output);


          // output = output;
        // }
      }
      else {
        // Scale with input / (1 - ratio) and set values to zero with probability
        // ratio.
        mask = arma::randu < arma::Mat < eT > > (baseLayer.Weights().n_rows, baseLayer.Weights().n_cols);
        mask.transform([&](double val) { return (val > ratio); });

        // Save weights for denoising.
        denoise = baseLayer.Weights();

        baseLayer.Weights() = baseLayer.Weights() % mask;

        baseLayer.Forward(input, output);


      }
    }

    // /**
    //   * Ordinary feed forward pass of the DropConnect layer.
    //   *
    //   * @param input Input data used for evaluating the specified function.
    //   * @param output Resulting output activation.
    //   */
    // template<typename eT>
    // void Forward(const arma::Cube <eT> &input, arma::Mat <eT> &output) {
    //   // The DropConnect mask will not be multiplied in the deterministic mode
    //   // (during testing).
    //   if (deterministic) {
    //     if (!rescale) {
    //       baseLayer.Forward(input, output);
    //     }
    //     else {
    //       baseLayer.Forward(input, output);
    //       output = output * scale;
    //     }
    //   }
    //   else {
    //     // Scale with input / (1 - ratio) and set values to zero with probability
    //     // ratio.
    //     mask = arma::randu < arma::Mat < eT > > (baseLayer.Weights().n_rows, baseLayer.Weights().n_cols);
    //     mask.transform([&](double val) { return (val > ratio); });

    //     std::cout << baseLayer.Weights() << std::endl;

    //     baseLayer.Weights() = baseLayer.Weights() % mask;


    //     std::cout << baseLayer.Weights() << std::endl;

    //     exit(0);


    //     baseLayer.Forward(input, output);
    //     output = output * scale;
    //   }
    // }

    /**
     * Ordinary feed backward pass of the DropConnect layer.
     *
     * @param input The propagated input activation.
     * @param gy The backpropagated error.
     * @param g The calculated gradient.
     */
    template<typename DataType>
    void Backward(const DataType & input,
                  const DataType &gy,
                  DataType &g)
    {
      baseLayer.Backward(input, gy, g);
    }

    template<typename eT, typename GradientDataType>
    void Gradient(const arma::Mat<eT>& d, GradientDataType& g)
    {
      baseLayer.Gradient(d, g);

      // Denoise the weights.
      baseLayer.Weights() = denoise;
    }

    //! Get the weights.
    OutputDataType const& Weights() const { return baseLayer.Weights(); }
    //! Modify the weights.
    OutputDataType& Weights() { return baseLayer.Weights(); }

    //! Get the input layer.
    InputLayer &Layer() const { return baseLayer; }

    //! Modify the base layer.
    InputLayer &Layer() { return baseLayer; }

    //! Get the input parameter.
    InputDataType &InputParameter() const { return baseLayer.InputParameter(); }

    //! Modify the input parameter.
    InputDataType &InputParameter() { return baseLayer.InputParameter(); }

    //! Get the output parameter.
    OutputDataType &OutputParameter() const { return baseLayer.OutputParameter(); }

    //! Modify the output parameter.
    OutputDataType &OutputParameter() { return baseLayer.OutputParameter(); }

    //! Get the detla.
    OutputDataType &Delta() const { return baseLayer.Delta(); }

    //! Modify the delta.
    OutputDataType &Delta() { return baseLayer.Delta(); }

    //! Get the gradient.
    OutputDataType const& Gradient() const { return baseLayer.Gradient(); }
    //! Modify the gradient.
    OutputDataType& Gradient() { return baseLayer.Gradient(); }

    //! The value of the deterministic parameter.
    bool Deterministic() const { return deterministic; }

    //! Modify the value of the deterministic parameter.
    bool &Deterministic() { return deterministic; }

    //! The probability of setting a value to zero.
    double Ratio() const { return ratio; }

    //! Modify the probability of setting a value to zero.
    void Ratio(const double r) {
      ratio = r;
      scale = 1.0 / (1.0 - ratio);
    }

    //! The value of the rescale parameter.
    bool Rescale() const { return rescale; }

    //! Modify the value of the rescale parameter.
    bool &Rescale() { return rescale; }

    private:
    //! Locally-stored layer object.
    InputLayer baseLayer;

    //! Locally-stored delta object.
    OutputDataType delta;

    //! Locally-stored input parameter object.
    InputDataType inputParameter;

    //! Locally-stored output parameter object.
    OutputDataType outputParameter;

    //! Locally-stored mast object.
    OutputDataType mask;

    //! The probability of setting a value to zero.
    double ratio;

    //! The scale fraction.
    double scale;

    //! If true dropout and scaling is disabled, see notes above.
    bool deterministic;

    //! If true the input is rescaled when deterministic is False.
    bool rescale;

    int seq;

    bool run;


    arma::mat denoise;
  }; // class DropConnectLayer.
//! Layer Traits for the DropConnectLayer
template <
  typename InputLayer,
  typename InputDataType,
  typename OutputDataType
>
class LayerTraits<DropConnectLayer<InputLayer, InputDataType, OutputDataType> >
{
 public:
  static const bool IsBinary = false;
  static const bool IsOutputLayer = false;
  static const bool IsBiasLayer = false;
  static const bool IsLSTMLayer = false;
  static const bool IsConnection = true;
};

/**
* Standard DropConnect Layer
*/
// template <
//   typename InputLayer,
//   typename InputDataType = arma::cube,
//   typename OutputDataType = arma::cube
// >
// using DropConnectLayer2D = DropConnectLayer<InputLayer, InputDataType, OutputDataType>
}  // namespace ann
}  // namespace mlpack
#endif

## rmsprop_impl.hpp
/**
 * @file rmsprop_impl.hpp
 * @author Ryan Curtin
 * @author Marcus Edel
 *
 * Implementation of the RMSprop optimizer.
 */
#ifndef __MLPACK_CORE_OPTIMIZERS_RMSPROP_RMSPROP_IMPL_HPP
#define __MLPACK_CORE_OPTIMIZERS_RMSPROP_RMSPROP_IMPL_HPP

// In case it hasn't been included yet.
#include "rmsprop.hpp"

namespace mlpack {
namespace optimization {

template<typename DecomposableFunctionType>
RMSprop<DecomposableFunctionType>::RMSprop(DecomposableFunctionType& function,
                                           const double stepSize,
                                           const double alpha,
                                           const double eps,
                                           const size_t maxIterations,
                                           const double tolerance,
                                           const bool shuffle) :
    function(function),
    stepSize(stepSize),
    alpha(alpha),
    eps(eps),
    maxIterations(maxIterations),
    tolerance(tolerance),
    shuffle(shuffle)
{ /* Nothing to do. */ }

//! Optimize the function (minimize).
template<typename DecomposableFunctionType>
double RMSprop<DecomposableFunctionType>::Optimize(arma::mat& iterate)
{
  // Find the number of functions to use.
  const size_t numFunctions = function.NumFunctions();

  // This is used only if shuffle is true.
  arma::Col<size_t> visitationOrder;
  if (shuffle)
    visitationOrder = arma::shuffle(arma::linspace<arma::Col<size_t>>(0,
        (numFunctions - 1), numFunctions));

  // To keep track of where we are and how things are going.
  size_t currentFunction = 0;
  double overallObjective = 0;
  double lastObjective = DBL_MAX;

  // Calculate the first objective function.
  for (size_t i = 0; i < numFunctions; ++i)
    overallObjective += function.Evaluate(iterate, i, true);

  // Now iterate!
  arma::mat gradient(iterate.n_rows, iterate.n_cols);

  // Leaky sum of squares of parameter gradient.
  arma::mat meanSquaredGradient = arma::zeros<arma::mat>(iterate.n_rows,
      iterate.n_cols);

  for (size_t i = 1; i != maxIterations; ++i, ++currentFunction)
  {
    // Is this iteration the start of a sequence?
    if ((currentFunction % numFunctions) == 0)
    {
      // Output current objective function.
      Log::Info << "RMSprop: iteration " << i << ", objective "
          << overallObjective << "." << std::endl;

      if (std::isnan(overallObjective) || std::isinf(overallObjective))
      {
        Log::Warn << "RMSprop: converged to " << overallObjective
            << "; terminating with failure. Try a smaller step size?"
            << std::endl;
        return overallObjective;
      }

      if (std::abs(lastObjective - overallObjective) < tolerance)
      {
        Log::Info << "RMSprop: minimized within tolerance " << tolerance << "; "
            << "terminating optimization." << std::endl;
        return overallObjective;
      }

      // Reset the counter variables.
      lastObjective = overallObjective;
      overallObjective = 0;
      currentFunction = 0;

      if (shuffle) // Determine order of visitation.
        visitationOrder = arma::shuffle(visitationOrder);
    }

        // Now add that to the overall objective function.
    if (shuffle)
      overallObjective += function.Evaluate(iterate,
          visitationOrder[currentFunction], false);
    else
      overallObjective += function.Evaluate(iterate, currentFunction, false);

    // Evaluate the gradient for this iteration.
    if (shuffle)
      function.Gradient(iterate, visitationOrder[currentFunction], gradient);
    else
      function.Gradient(iterate, currentFunction, gradient);

    // And update the iterate.
    meanSquaredGradient *= alpha;
    meanSquaredGradient += (1 - alpha) * (gradient % gradient);
    iterate -= stepSize * gradient / (arma::sqrt(meanSquaredGradient) + eps);


  }

  Log::Info << "RMSprop: maximum iterations (" << maxIterations << ") reached; "
      << "terminating optimization." << std::endl;
  // Calculate final objective.
  overallObjective = 0;
  for (size_t i = 0; i < numFunctions; ++i)
    overallObjective += function.Evaluate(iterate, i, true);
  return overallObjective;
}

} // namespace optimization
} // namespace mlpack

#endif
	/**
	* @file dropout_layer.hpp
	* @author Palash Ahuja
	*
	* Definition of the DropConnectLayer class, which implements a regularizer that
	* randomly sets the connections to zero. Preventing units from co-adapting.
	*/
	#ifndef __MLPACK_METHODS_ANN_LAYER_DROPCONNECT_LAYER_HPP
	#define __MLPACK_METHODS_ANN_LAYER_DROPCONNECT_LAYER_HPP

	#include <mlpack/core.hpp>

	namespace mlpack {
	namespace ann/** Artificial Neural Network. */ {
	/**
	* The DropConnect layer is a regularizer that randomly with probability ratio
	* sets the connection values to zero and scales the remaining elements by factor 1 /
	* (1 - ratio). If rescale is true the input is scaled with 1 / (1-p) when
	* deterministic is false. In the deterministic mode (during testing), the layer
	* just scales the output.
	*
	* Note: During training you should set deterministic to false and during
	* testing you should set deterministic to true.
	*
	* For more information, see the following.
	* @inproceedings{icml2013_wan13,
	* Publisher = {JMLR Workshop and Conference Proceedings},
	* Title = {Regularization of Neural Networks using DropConnect},
	* Url = {http://jmlr.org/proceedings/papers/v28/wan13.pdf},
	* Booktitle = {Proceedings of the 30th International Conference on Machine Learning (ICML-13)},
	* Author = {Li Wan and Matthew Zeiler and Sixin Zhang and Yann L. Cun and Rob Fergus},
	* Number = {3},
	* Month = may,
	* Volume = {28},
	* Editor = {Sanjoy Dasgupta and David Mcallester},
	* Year = {2013},
	* Pages = {1058-1066},
	* Abstract = {We introduce DropConnect, a generalization of DropOut, for regularizing large fully-connected layers within neural networks. * When training with Dropout, a randomly selected subset of activations are set to zero within each layer. DropConnect instead sets a * randomly selected subset of weights within the network to zero. Each unit thus receives input from a random subset of units in the previous layer. We derive a bound on the generalization performance of both Dropout and DropConnect. We then evaluate DropConnect on a range of datasets, comparing to Dropout, and show state-of-the-art results on several image recoginition benchmarks can be obtained by aggregating multiple DropConnect-trained models.}
	* }
	*/

	template<
	typename InputLayer = LinearLayer<>,
	typename InputDataType = arma::mat,
	typename OutputDataType = arma::mat
	>
	class DropConnectLayer {
	public:

	/**
	* Create the DropConnectLayer object using the specified ratio and rescale
	* parameter.
	*
	* @param ratio The probability of setting a connection to zero.
	* @param rescale If true the input is rescaled when deterministic is False.
	*/
	template<typename InputLayerType>
	DropConnectLayer(InputLayerType &&inputLayer,
	const double ratio = 0.5,
	const bool rescale = true) :
	baseLayer(std::forward<InputLayerType>(inputLayer)),
	ratio(ratio),
	scale(1.0),
	rescale(rescale),
	seq(0),
	run(true)
	{
	}

	/**
	* Ordinary feed forward pass of the DropConnect layer.
	*
	* @param input Input data used for evaluating the specified function.
	* @param output Resulting output activation.
	*/
	template<typename eT>
	void Forward(const arma::Mat <eT> &input, arma::Mat <eT> &output) {



	// The DropConnect mask will not be multiplied in the deterministic mode
	// (during testing).
	if (deterministic) {
	// if (!rescale) {
	// baseLayer.Forward(input, output);
	// }
	// else {
	baseLayer.Forward(input, output);


	// output = output;
	// }
	}
	else {
	// Scale with input / (1 - ratio) and set values to zero with probability
	// ratio.
	mask = arma::randu < arma::Mat < eT > > (baseLayer.Weights().n_rows, baseLayer.Weights().n_cols);
	mask.transform([&](double val) { return (val > ratio); });

	// Save weights for denoising.
	denoise = baseLayer.Weights();

	baseLayer.Weights() = baseLayer.Weights() % mask;

	baseLayer.Forward(input, output);




	}
	}

	// /**
	// * Ordinary feed forward pass of the DropConnect layer.
	// *
	// * @param input Input data used for evaluating the specified function.
	// * @param output Resulting output activation.
	// */
	// template<typename eT>
	// void Forward(const arma::Cube <eT> &input, arma::Mat <eT> &output) {
	// // The DropConnect mask will not be multiplied in the deterministic mode
	// // (during testing).
	// if (deterministic) {
	// if (!rescale) {
	// baseLayer.Forward(input, output);
	// }
	// else {
	// baseLayer.Forward(input, output);
	// output = output * scale;
	// }
	// }
	// else {
	// // Scale with input / (1 - ratio) and set values to zero with probability
	// // ratio.
	// mask = arma::randu < arma::Mat < eT > > (baseLayer.Weights().n_rows, baseLayer.Weights().n_cols);
	// mask.transform([&](double val) { return (val > ratio); });

	// std::cout << baseLayer.Weights() << std::endl;

	// baseLayer.Weights() = baseLayer.Weights() % mask;


	// std::cout << baseLayer.Weights() << std::endl;

	// exit(0);


	// baseLayer.Forward(input, output);
	// output = output * scale;
	// }
	// }

	/**
	* Ordinary feed backward pass of the DropConnect layer.
	*
	* @param input The propagated input activation.
	* @param gy The backpropagated error.
	* @param g The calculated gradient.
	*/
	template<typename DataType>
	void Backward(const DataType & input,
	const DataType &gy,
	DataType &g)
	{
	baseLayer.Backward(input, gy, g);
	}

	template<typename eT, typename GradientDataType>
	void Gradient(const arma::Mat<eT>& d, GradientDataType& g)
	{
	baseLayer.Gradient(d, g);

	// Denoise the weights.
	baseLayer.Weights() = denoise;
	}

	//! Get the weights.
	OutputDataType const& Weights() const { return baseLayer.Weights(); }
	//! Modify the weights.
	OutputDataType& Weights() { return baseLayer.Weights(); }

	//! Get the input layer.
	InputLayer &Layer() const { return baseLayer; }

	//! Modify the base layer.
	InputLayer &Layer() { return baseLayer; }

	//! Get the input parameter.
	InputDataType &InputParameter() const { return baseLayer.InputParameter(); }

	//! Modify the input parameter.
	InputDataType &InputParameter() { return baseLayer.InputParameter(); }

	//! Get the output parameter.
	OutputDataType &OutputParameter() const { return baseLayer.OutputParameter(); }

	//! Modify the output parameter.
	OutputDataType &OutputParameter() { return baseLayer.OutputParameter(); }

	//! Get the detla.
	OutputDataType &Delta() const { return baseLayer.Delta(); }

	//! Modify the delta.
	OutputDataType &Delta() { return baseLayer.Delta(); }

	//! Get the gradient.
	OutputDataType const& Gradient() const { return baseLayer.Gradient(); }
	//! Modify the gradient.
	OutputDataType& Gradient() { return baseLayer.Gradient(); }

	//! The value of the deterministic parameter.
	bool Deterministic() const { return deterministic; }

	//! Modify the value of the deterministic parameter.
	bool &Deterministic() { return deterministic; }

	//! The probability of setting a value to zero.
	double Ratio() const { return ratio; }

	//! Modify the probability of setting a value to zero.
	void Ratio(const double r) {
	ratio = r;
	scale = 1.0 / (1.0 - ratio);
	}

	//! The value of the rescale parameter.
	bool Rescale() const { return rescale; }

	//! Modify the value of the rescale parameter.
	bool &Rescale() { return rescale; }

	private:
	//! Locally-stored layer object.
	InputLayer baseLayer;

	//! Locally-stored delta object.
	OutputDataType delta;

	//! Locally-stored input parameter object.
	InputDataType inputParameter;

	//! Locally-stored output parameter object.
	OutputDataType outputParameter;

	//! Locally-stored mast object.
	OutputDataType mask;

	//! The probability of setting a value to zero.
	double ratio;

	//! The scale fraction.
	double scale;

	//! If true dropout and scaling is disabled, see notes above.
	bool deterministic;

	//! If true the input is rescaled when deterministic is False.
	bool rescale;

	int seq;

	bool run;


	arma::mat denoise;
	}; // class DropConnectLayer.
	//! Layer Traits for the DropConnectLayer
	template <
	typename InputLayer,
	typename InputDataType,
	typename OutputDataType
	>
	class LayerTraits<DropConnectLayer<InputLayer, InputDataType, OutputDataType> >
	{
	public:
	static const bool IsBinary = false;
	static const bool IsOutputLayer = false;
	static const bool IsBiasLayer = false;
	static const bool IsLSTMLayer = false;
	static const bool IsConnection = true;
	};

	/**
	* Standard DropConnect Layer
	*/
	// template <
	// typename InputLayer,
	// typename InputDataType = arma::cube,
	// typename OutputDataType = arma::cube
	// >
	// using DropConnectLayer2D = DropConnectLayer<InputLayer, InputDataType, OutputDataType>
	} // namespace ann
	} // namespace mlpack
	#endif
	/**
	* @file rmsprop_impl.hpp
	* @author Ryan Curtin
	* @author Marcus Edel
	*
	* Implementation of the RMSprop optimizer.
	*/
	#ifndef __MLPACK_CORE_OPTIMIZERS_RMSPROP_RMSPROP_IMPL_HPP
	#define __MLPACK_CORE_OPTIMIZERS_RMSPROP_RMSPROP_IMPL_HPP

	// In case it hasn't been included yet.
	#include "rmsprop.hpp"

	namespace mlpack {
	namespace optimization {

	template<typename DecomposableFunctionType>
	RMSprop<DecomposableFunctionType>::RMSprop(DecomposableFunctionType& function,
	const double stepSize,
	const double alpha,
	const double eps,
	const size_t maxIterations,
	const double tolerance,
	const bool shuffle) :
	function(function),
	stepSize(stepSize),
	alpha(alpha),
	eps(eps),
	maxIterations(maxIterations),
	tolerance(tolerance),
	shuffle(shuffle)
	{ /* Nothing to do. */ }

	//! Optimize the function (minimize).
	template<typename DecomposableFunctionType>
	double RMSprop<DecomposableFunctionType>::Optimize(arma::mat& iterate)
	{
	// Find the number of functions to use.
	const size_t numFunctions = function.NumFunctions();

	// This is used only if shuffle is true.
	arma::Col<size_t> visitationOrder;
	if (shuffle)
	visitationOrder = arma::shuffle(arma::linspace<arma::Col<size_t>>(0,
	(numFunctions - 1), numFunctions));

	// To keep track of where we are and how things are going.
	size_t currentFunction = 0;
	double overallObjective = 0;
	double lastObjective = DBL_MAX;

	// Calculate the first objective function.
	for (size_t i = 0; i < numFunctions; ++i)
	overallObjective += function.Evaluate(iterate, i, true);

	// Now iterate!
	arma::mat gradient(iterate.n_rows, iterate.n_cols);

	// Leaky sum of squares of parameter gradient.
	arma::mat meanSquaredGradient = arma::zeros<arma::mat>(iterate.n_rows,
	iterate.n_cols);

	for (size_t i = 1; i != maxIterations; ++i, ++currentFunction)
	{
	// Is this iteration the start of a sequence?
	if ((currentFunction % numFunctions) == 0)
	{
	// Output current objective function.
	Log::Info << "RMSprop: iteration " << i << ", objective "
	<< overallObjective << "." << std::endl;

	if (std::isnan(overallObjective) \|\| std::isinf(overallObjective))
	{
	Log::Warn << "RMSprop: converged to " << overallObjective
	<< "; terminating with failure. Try a smaller step size?"
	<< std::endl;
	return overallObjective;
	}

	if (std::abs(lastObjective - overallObjective) < tolerance)
	{
	Log::Info << "RMSprop: minimized within tolerance " << tolerance << "; "
	<< "terminating optimization." << std::endl;
	return overallObjective;
	}

	// Reset the counter variables.
	lastObjective = overallObjective;
	overallObjective = 0;
	currentFunction = 0;

	if (shuffle) // Determine order of visitation.
	visitationOrder = arma::shuffle(visitationOrder);
	}

	// Now add that to the overall objective function.
	if (shuffle)
	overallObjective += function.Evaluate(iterate,
	visitationOrder[currentFunction], false);
	else
	overallObjective += function.Evaluate(iterate, currentFunction, false);

	// Evaluate the gradient for this iteration.
	if (shuffle)
	function.Gradient(iterate, visitationOrder[currentFunction], gradient);
	else
	function.Gradient(iterate, currentFunction, gradient);

	// And update the iterate.
	meanSquaredGradient *= alpha;
	meanSquaredGradient += (1 - alpha) * (gradient % gradient);
	iterate -= stepSize * gradient / (arma::sqrt(meanSquaredGradient) + eps);


	}

	Log::Info << "RMSprop: maximum iterations (" << maxIterations << ") reached; "
	<< "terminating optimization." << std::endl;
	// Calculate final objective.
	overallObjective = 0;
	for (size_t i = 0; i < numFunctions; ++i)
	overallObjective += function.Evaluate(iterate, i, true);
	return overallObjective;
	}

	} // namespace optimization
	} // namespace mlpack

	#endif