Skip to content

Instantly share code, notes, and snippets.

@zoq
Created March 9, 2016 01:34
Show Gist options
  • Save zoq/4fc4c7061a698f82cf47 to your computer and use it in GitHub Desktop.
Save zoq/4fc4c7061a698f82cf47 to your computer and use it in GitHub Desktop.
dropconnect_layer.hpp
/**
* @file dropout_layer.hpp
* @author Palash Ahuja
*
* Definition of the DropConnectLayer class, which implements a regularizer that
* randomly sets the connections to zero. Preventing units from co-adapting.
*/
#ifndef __MLPACK_METHODS_ANN_LAYER_DROPCONNECT_LAYER_HPP
#define __MLPACK_METHODS_ANN_LAYER_DROPCONNECT_LAYER_HPP
#include <mlpack/core.hpp>
namespace mlpack {
namespace ann/** Artificial Neural Network. */ {
/**
* The DropConnect layer is a regularizer that randomly with probability ratio
* sets the connection values to zero and scales the remaining elements by factor 1 /
* (1 - ratio). If rescale is true the input is scaled with 1 / (1-p) when
* deterministic is false. In the deterministic mode (during testing), the layer
* just scales the output.
*
* Note: During training you should set deterministic to false and during
* testing you should set deterministic to true.
*
* For more information, see the following.
* @inproceedings{icml2013_wan13,
* Publisher = {JMLR Workshop and Conference Proceedings},
* Title = {Regularization of Neural Networks using DropConnect},
* Url = {http://jmlr.org/proceedings/papers/v28/wan13.pdf},
* Booktitle = {Proceedings of the 30th International Conference on Machine Learning (ICML-13)},
* Author = {Li Wan and Matthew Zeiler and Sixin Zhang and Yann L. Cun and Rob Fergus},
* Number = {3},
* Month = may,
* Volume = {28},
* Editor = {Sanjoy Dasgupta and David Mcallester},
* Year = {2013},
* Pages = {1058-1066},
* Abstract = {We introduce DropConnect, a generalization of DropOut, for regularizing large fully-connected layers within neural networks. * When training with Dropout, a randomly selected subset of activations are set to zero within each layer. DropConnect instead sets a * randomly selected subset of weights within the network to zero. Each unit thus receives input from a random subset of units in the previous layer. We derive a bound on the generalization performance of both Dropout and DropConnect. We then evaluate DropConnect on a range of datasets, comparing to Dropout, and show state-of-the-art results on several image recoginition benchmarks can be obtained by aggregating multiple DropConnect-trained models.}
* }
*/
template<
typename InputLayer = LinearLayer<>,
typename InputDataType = arma::mat,
typename OutputDataType = arma::mat
>
class DropConnectLayer {
public:
/**
* Create the DropConnectLayer object using the specified ratio and rescale
* parameter.
*
* @param ratio The probability of setting a connection to zero.
* @param rescale If true the input is rescaled when deterministic is False.
*/
template<typename InputLayerType>
DropConnectLayer(InputLayerType &&inputLayer,
const double ratio = 0.5,
const bool rescale = true) :
baseLayer(std::forward<InputLayerType>(inputLayer)),
ratio(ratio),
scale(1.0),
rescale(rescale),
seq(0),
run(true)
{
}
/**
* Ordinary feed forward pass of the DropConnect layer.
*
* @param input Input data used for evaluating the specified function.
* @param output Resulting output activation.
*/
template<typename eT>
void Forward(const arma::Mat <eT> &input, arma::Mat <eT> &output) {
// The DropConnect mask will not be multiplied in the deterministic mode
// (during testing).
if (deterministic) {
// if (!rescale) {
// baseLayer.Forward(input, output);
// }
// else {
baseLayer.Forward(input, output);
// output = output;
// }
}
else {
// Scale with input / (1 - ratio) and set values to zero with probability
// ratio.
mask = arma::randu < arma::Mat < eT > > (baseLayer.Weights().n_rows, baseLayer.Weights().n_cols);
mask.transform([&](double val) { return (val > ratio); });
// Save weights for denoising.
denoise = baseLayer.Weights();
baseLayer.Weights() = baseLayer.Weights() % mask;
baseLayer.Forward(input, output);
}
}
// /**
// * Ordinary feed forward pass of the DropConnect layer.
// *
// * @param input Input data used for evaluating the specified function.
// * @param output Resulting output activation.
// */
// template<typename eT>
// void Forward(const arma::Cube <eT> &input, arma::Mat <eT> &output) {
// // The DropConnect mask will not be multiplied in the deterministic mode
// // (during testing).
// if (deterministic) {
// if (!rescale) {
// baseLayer.Forward(input, output);
// }
// else {
// baseLayer.Forward(input, output);
// output = output * scale;
// }
// }
// else {
// // Scale with input / (1 - ratio) and set values to zero with probability
// // ratio.
// mask = arma::randu < arma::Mat < eT > > (baseLayer.Weights().n_rows, baseLayer.Weights().n_cols);
// mask.transform([&](double val) { return (val > ratio); });
// std::cout << baseLayer.Weights() << std::endl;
// baseLayer.Weights() = baseLayer.Weights() % mask;
// std::cout << baseLayer.Weights() << std::endl;
// exit(0);
// baseLayer.Forward(input, output);
// output = output * scale;
// }
// }
/**
* Ordinary feed backward pass of the DropConnect layer.
*
* @param input The propagated input activation.
* @param gy The backpropagated error.
* @param g The calculated gradient.
*/
template<typename DataType>
void Backward(const DataType & input,
const DataType &gy,
DataType &g)
{
baseLayer.Backward(input, gy, g);
}
template<typename eT, typename GradientDataType>
void Gradient(const arma::Mat<eT>& d, GradientDataType& g)
{
baseLayer.Gradient(d, g);
// Denoise the weights.
baseLayer.Weights() = denoise;
}
//! Get the weights.
OutputDataType const& Weights() const { return baseLayer.Weights(); }
//! Modify the weights.
OutputDataType& Weights() { return baseLayer.Weights(); }
//! Get the input layer.
InputLayer &Layer() const { return baseLayer; }
//! Modify the base layer.
InputLayer &Layer() { return baseLayer; }
//! Get the input parameter.
InputDataType &InputParameter() const { return baseLayer.InputParameter(); }
//! Modify the input parameter.
InputDataType &InputParameter() { return baseLayer.InputParameter(); }
//! Get the output parameter.
OutputDataType &OutputParameter() const { return baseLayer.OutputParameter(); }
//! Modify the output parameter.
OutputDataType &OutputParameter() { return baseLayer.OutputParameter(); }
//! Get the detla.
OutputDataType &Delta() const { return baseLayer.Delta(); }
//! Modify the delta.
OutputDataType &Delta() { return baseLayer.Delta(); }
//! Get the gradient.
OutputDataType const& Gradient() const { return baseLayer.Gradient(); }
//! Modify the gradient.
OutputDataType& Gradient() { return baseLayer.Gradient(); }
//! The value of the deterministic parameter.
bool Deterministic() const { return deterministic; }
//! Modify the value of the deterministic parameter.
bool &Deterministic() { return deterministic; }
//! The probability of setting a value to zero.
double Ratio() const { return ratio; }
//! Modify the probability of setting a value to zero.
void Ratio(const double r) {
ratio = r;
scale = 1.0 / (1.0 - ratio);
}
//! The value of the rescale parameter.
bool Rescale() const { return rescale; }
//! Modify the value of the rescale parameter.
bool &Rescale() { return rescale; }
private:
//! Locally-stored layer object.
InputLayer baseLayer;
//! Locally-stored delta object.
OutputDataType delta;
//! Locally-stored input parameter object.
InputDataType inputParameter;
//! Locally-stored output parameter object.
OutputDataType outputParameter;
//! Locally-stored mast object.
OutputDataType mask;
//! The probability of setting a value to zero.
double ratio;
//! The scale fraction.
double scale;
//! If true dropout and scaling is disabled, see notes above.
bool deterministic;
//! If true the input is rescaled when deterministic is False.
bool rescale;
int seq;
bool run;
arma::mat denoise;
}; // class DropConnectLayer.
//! Layer Traits for the DropConnectLayer
template <
typename InputLayer,
typename InputDataType,
typename OutputDataType
>
class LayerTraits<DropConnectLayer<InputLayer, InputDataType, OutputDataType> >
{
public:
static const bool IsBinary = false;
static const bool IsOutputLayer = false;
static const bool IsBiasLayer = false;
static const bool IsLSTMLayer = false;
static const bool IsConnection = true;
};
/**
* Standard DropConnect Layer
*/
// template <
// typename InputLayer,
// typename InputDataType = arma::cube,
// typename OutputDataType = arma::cube
// >
// using DropConnectLayer2D = DropConnectLayer<InputLayer, InputDataType, OutputDataType>
} // namespace ann
} // namespace mlpack
#endif
/**
* @file rmsprop_impl.hpp
* @author Ryan Curtin
* @author Marcus Edel
*
* Implementation of the RMSprop optimizer.
*/
#ifndef __MLPACK_CORE_OPTIMIZERS_RMSPROP_RMSPROP_IMPL_HPP
#define __MLPACK_CORE_OPTIMIZERS_RMSPROP_RMSPROP_IMPL_HPP
// In case it hasn't been included yet.
#include "rmsprop.hpp"
namespace mlpack {
namespace optimization {
template<typename DecomposableFunctionType>
RMSprop<DecomposableFunctionType>::RMSprop(DecomposableFunctionType& function,
const double stepSize,
const double alpha,
const double eps,
const size_t maxIterations,
const double tolerance,
const bool shuffle) :
function(function),
stepSize(stepSize),
alpha(alpha),
eps(eps),
maxIterations(maxIterations),
tolerance(tolerance),
shuffle(shuffle)
{ /* Nothing to do. */ }
//! Optimize the function (minimize).
template<typename DecomposableFunctionType>
double RMSprop<DecomposableFunctionType>::Optimize(arma::mat& iterate)
{
// Find the number of functions to use.
const size_t numFunctions = function.NumFunctions();
// This is used only if shuffle is true.
arma::Col<size_t> visitationOrder;
if (shuffle)
visitationOrder = arma::shuffle(arma::linspace<arma::Col<size_t>>(0,
(numFunctions - 1), numFunctions));
// To keep track of where we are and how things are going.
size_t currentFunction = 0;
double overallObjective = 0;
double lastObjective = DBL_MAX;
// Calculate the first objective function.
for (size_t i = 0; i < numFunctions; ++i)
overallObjective += function.Evaluate(iterate, i, true);
// Now iterate!
arma::mat gradient(iterate.n_rows, iterate.n_cols);
// Leaky sum of squares of parameter gradient.
arma::mat meanSquaredGradient = arma::zeros<arma::mat>(iterate.n_rows,
iterate.n_cols);
for (size_t i = 1; i != maxIterations; ++i, ++currentFunction)
{
// Is this iteration the start of a sequence?
if ((currentFunction % numFunctions) == 0)
{
// Output current objective function.
Log::Info << "RMSprop: iteration " << i << ", objective "
<< overallObjective << "." << std::endl;
if (std::isnan(overallObjective) || std::isinf(overallObjective))
{
Log::Warn << "RMSprop: converged to " << overallObjective
<< "; terminating with failure. Try a smaller step size?"
<< std::endl;
return overallObjective;
}
if (std::abs(lastObjective - overallObjective) < tolerance)
{
Log::Info << "RMSprop: minimized within tolerance " << tolerance << "; "
<< "terminating optimization." << std::endl;
return overallObjective;
}
// Reset the counter variables.
lastObjective = overallObjective;
overallObjective = 0;
currentFunction = 0;
if (shuffle) // Determine order of visitation.
visitationOrder = arma::shuffle(visitationOrder);
}
// Now add that to the overall objective function.
if (shuffle)
overallObjective += function.Evaluate(iterate,
visitationOrder[currentFunction], false);
else
overallObjective += function.Evaluate(iterate, currentFunction, false);
// Evaluate the gradient for this iteration.
if (shuffle)
function.Gradient(iterate, visitationOrder[currentFunction], gradient);
else
function.Gradient(iterate, currentFunction, gradient);
// And update the iterate.
meanSquaredGradient *= alpha;
meanSquaredGradient += (1 - alpha) * (gradient % gradient);
iterate -= stepSize * gradient / (arma::sqrt(meanSquaredGradient) + eps);
}
Log::Info << "RMSprop: maximum iterations (" << maxIterations << ") reached; "
<< "terminating optimization." << std::endl;
// Calculate final objective.
overallObjective = 0;
for (size_t i = 0; i < numFunctions; ++i)
overallObjective += function.Evaluate(iterate, i, true);
return overallObjective;
}
} // namespace optimization
} // namespace mlpack
#endif
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment