Created
March 9, 2016 01:34
-
-
Save zoq/4fc4c7061a698f82cf47 to your computer and use it in GitHub Desktop.
dropconnect_layer.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* @file dropout_layer.hpp | |
* @author Palash Ahuja | |
* | |
* Definition of the DropConnectLayer class, which implements a regularizer that | |
* randomly sets the connections to zero. Preventing units from co-adapting. | |
*/ | |
#ifndef __MLPACK_METHODS_ANN_LAYER_DROPCONNECT_LAYER_HPP | |
#define __MLPACK_METHODS_ANN_LAYER_DROPCONNECT_LAYER_HPP | |
#include <mlpack/core.hpp> | |
namespace mlpack { | |
namespace ann/** Artificial Neural Network. */ { | |
/** | |
* The DropConnect layer is a regularizer that randomly with probability ratio | |
* sets the connection values to zero and scales the remaining elements by factor 1 / | |
* (1 - ratio). If rescale is true the input is scaled with 1 / (1-p) when | |
* deterministic is false. In the deterministic mode (during testing), the layer | |
* just scales the output. | |
* | |
* Note: During training you should set deterministic to false and during | |
* testing you should set deterministic to true. | |
* | |
* For more information, see the following. | |
* @inproceedings{icml2013_wan13, | |
* Publisher = {JMLR Workshop and Conference Proceedings}, | |
* Title = {Regularization of Neural Networks using DropConnect}, | |
* Url = {http://jmlr.org/proceedings/papers/v28/wan13.pdf}, | |
* Booktitle = {Proceedings of the 30th International Conference on Machine Learning (ICML-13)}, | |
* Author = {Li Wan and Matthew Zeiler and Sixin Zhang and Yann L. Cun and Rob Fergus}, | |
* Number = {3}, | |
* Month = may, | |
* Volume = {28}, | |
* Editor = {Sanjoy Dasgupta and David Mcallester}, | |
* Year = {2013}, | |
* Pages = {1058-1066}, | |
* Abstract = {We introduce DropConnect, a generalization of DropOut, for regularizing large fully-connected layers within neural networks. * When training with Dropout, a randomly selected subset of activations are set to zero within each layer. DropConnect instead sets a * randomly selected subset of weights within the network to zero. Each unit thus receives input from a random subset of units in the previous layer. We derive a bound on the generalization performance of both Dropout and DropConnect. We then evaluate DropConnect on a range of datasets, comparing to Dropout, and show state-of-the-art results on several image recoginition benchmarks can be obtained by aggregating multiple DropConnect-trained models.} | |
* } | |
*/ | |
template< | |
typename InputLayer = LinearLayer<>, | |
typename InputDataType = arma::mat, | |
typename OutputDataType = arma::mat | |
> | |
class DropConnectLayer { | |
public: | |
/** | |
* Create the DropConnectLayer object using the specified ratio and rescale | |
* parameter. | |
* | |
* @param ratio The probability of setting a connection to zero. | |
* @param rescale If true the input is rescaled when deterministic is False. | |
*/ | |
template<typename InputLayerType> | |
DropConnectLayer(InputLayerType &&inputLayer, | |
const double ratio = 0.5, | |
const bool rescale = true) : | |
baseLayer(std::forward<InputLayerType>(inputLayer)), | |
ratio(ratio), | |
scale(1.0), | |
rescale(rescale), | |
seq(0), | |
run(true) | |
{ | |
} | |
/** | |
* Ordinary feed forward pass of the DropConnect layer. | |
* | |
* @param input Input data used for evaluating the specified function. | |
* @param output Resulting output activation. | |
*/ | |
template<typename eT> | |
void Forward(const arma::Mat <eT> &input, arma::Mat <eT> &output) { | |
// The DropConnect mask will not be multiplied in the deterministic mode | |
// (during testing). | |
if (deterministic) { | |
// if (!rescale) { | |
// baseLayer.Forward(input, output); | |
// } | |
// else { | |
baseLayer.Forward(input, output); | |
// output = output; | |
// } | |
} | |
else { | |
// Scale with input / (1 - ratio) and set values to zero with probability | |
// ratio. | |
mask = arma::randu < arma::Mat < eT > > (baseLayer.Weights().n_rows, baseLayer.Weights().n_cols); | |
mask.transform([&](double val) { return (val > ratio); }); | |
// Save weights for denoising. | |
denoise = baseLayer.Weights(); | |
baseLayer.Weights() = baseLayer.Weights() % mask; | |
baseLayer.Forward(input, output); | |
} | |
} | |
// /** | |
// * Ordinary feed forward pass of the DropConnect layer. | |
// * | |
// * @param input Input data used for evaluating the specified function. | |
// * @param output Resulting output activation. | |
// */ | |
// template<typename eT> | |
// void Forward(const arma::Cube <eT> &input, arma::Mat <eT> &output) { | |
// // The DropConnect mask will not be multiplied in the deterministic mode | |
// // (during testing). | |
// if (deterministic) { | |
// if (!rescale) { | |
// baseLayer.Forward(input, output); | |
// } | |
// else { | |
// baseLayer.Forward(input, output); | |
// output = output * scale; | |
// } | |
// } | |
// else { | |
// // Scale with input / (1 - ratio) and set values to zero with probability | |
// // ratio. | |
// mask = arma::randu < arma::Mat < eT > > (baseLayer.Weights().n_rows, baseLayer.Weights().n_cols); | |
// mask.transform([&](double val) { return (val > ratio); }); | |
// std::cout << baseLayer.Weights() << std::endl; | |
// baseLayer.Weights() = baseLayer.Weights() % mask; | |
// std::cout << baseLayer.Weights() << std::endl; | |
// exit(0); | |
// baseLayer.Forward(input, output); | |
// output = output * scale; | |
// } | |
// } | |
/** | |
* Ordinary feed backward pass of the DropConnect layer. | |
* | |
* @param input The propagated input activation. | |
* @param gy The backpropagated error. | |
* @param g The calculated gradient. | |
*/ | |
template<typename DataType> | |
void Backward(const DataType & input, | |
const DataType &gy, | |
DataType &g) | |
{ | |
baseLayer.Backward(input, gy, g); | |
} | |
template<typename eT, typename GradientDataType> | |
void Gradient(const arma::Mat<eT>& d, GradientDataType& g) | |
{ | |
baseLayer.Gradient(d, g); | |
// Denoise the weights. | |
baseLayer.Weights() = denoise; | |
} | |
//! Get the weights. | |
OutputDataType const& Weights() const { return baseLayer.Weights(); } | |
//! Modify the weights. | |
OutputDataType& Weights() { return baseLayer.Weights(); } | |
//! Get the input layer. | |
InputLayer &Layer() const { return baseLayer; } | |
//! Modify the base layer. | |
InputLayer &Layer() { return baseLayer; } | |
//! Get the input parameter. | |
InputDataType &InputParameter() const { return baseLayer.InputParameter(); } | |
//! Modify the input parameter. | |
InputDataType &InputParameter() { return baseLayer.InputParameter(); } | |
//! Get the output parameter. | |
OutputDataType &OutputParameter() const { return baseLayer.OutputParameter(); } | |
//! Modify the output parameter. | |
OutputDataType &OutputParameter() { return baseLayer.OutputParameter(); } | |
//! Get the detla. | |
OutputDataType &Delta() const { return baseLayer.Delta(); } | |
//! Modify the delta. | |
OutputDataType &Delta() { return baseLayer.Delta(); } | |
//! Get the gradient. | |
OutputDataType const& Gradient() const { return baseLayer.Gradient(); } | |
//! Modify the gradient. | |
OutputDataType& Gradient() { return baseLayer.Gradient(); } | |
//! The value of the deterministic parameter. | |
bool Deterministic() const { return deterministic; } | |
//! Modify the value of the deterministic parameter. | |
bool &Deterministic() { return deterministic; } | |
//! The probability of setting a value to zero. | |
double Ratio() const { return ratio; } | |
//! Modify the probability of setting a value to zero. | |
void Ratio(const double r) { | |
ratio = r; | |
scale = 1.0 / (1.0 - ratio); | |
} | |
//! The value of the rescale parameter. | |
bool Rescale() const { return rescale; } | |
//! Modify the value of the rescale parameter. | |
bool &Rescale() { return rescale; } | |
private: | |
//! Locally-stored layer object. | |
InputLayer baseLayer; | |
//! Locally-stored delta object. | |
OutputDataType delta; | |
//! Locally-stored input parameter object. | |
InputDataType inputParameter; | |
//! Locally-stored output parameter object. | |
OutputDataType outputParameter; | |
//! Locally-stored mast object. | |
OutputDataType mask; | |
//! The probability of setting a value to zero. | |
double ratio; | |
//! The scale fraction. | |
double scale; | |
//! If true dropout and scaling is disabled, see notes above. | |
bool deterministic; | |
//! If true the input is rescaled when deterministic is False. | |
bool rescale; | |
int seq; | |
bool run; | |
arma::mat denoise; | |
}; // class DropConnectLayer. | |
//! Layer Traits for the DropConnectLayer | |
template < | |
typename InputLayer, | |
typename InputDataType, | |
typename OutputDataType | |
> | |
class LayerTraits<DropConnectLayer<InputLayer, InputDataType, OutputDataType> > | |
{ | |
public: | |
static const bool IsBinary = false; | |
static const bool IsOutputLayer = false; | |
static const bool IsBiasLayer = false; | |
static const bool IsLSTMLayer = false; | |
static const bool IsConnection = true; | |
}; | |
/** | |
* Standard DropConnect Layer | |
*/ | |
// template < | |
// typename InputLayer, | |
// typename InputDataType = arma::cube, | |
// typename OutputDataType = arma::cube | |
// > | |
// using DropConnectLayer2D = DropConnectLayer<InputLayer, InputDataType, OutputDataType> | |
} // namespace ann | |
} // namespace mlpack | |
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* @file rmsprop_impl.hpp | |
* @author Ryan Curtin | |
* @author Marcus Edel | |
* | |
* Implementation of the RMSprop optimizer. | |
*/ | |
#ifndef __MLPACK_CORE_OPTIMIZERS_RMSPROP_RMSPROP_IMPL_HPP | |
#define __MLPACK_CORE_OPTIMIZERS_RMSPROP_RMSPROP_IMPL_HPP | |
// In case it hasn't been included yet. | |
#include "rmsprop.hpp" | |
namespace mlpack { | |
namespace optimization { | |
template<typename DecomposableFunctionType> | |
RMSprop<DecomposableFunctionType>::RMSprop(DecomposableFunctionType& function, | |
const double stepSize, | |
const double alpha, | |
const double eps, | |
const size_t maxIterations, | |
const double tolerance, | |
const bool shuffle) : | |
function(function), | |
stepSize(stepSize), | |
alpha(alpha), | |
eps(eps), | |
maxIterations(maxIterations), | |
tolerance(tolerance), | |
shuffle(shuffle) | |
{ /* Nothing to do. */ } | |
//! Optimize the function (minimize). | |
template<typename DecomposableFunctionType> | |
double RMSprop<DecomposableFunctionType>::Optimize(arma::mat& iterate) | |
{ | |
// Find the number of functions to use. | |
const size_t numFunctions = function.NumFunctions(); | |
// This is used only if shuffle is true. | |
arma::Col<size_t> visitationOrder; | |
if (shuffle) | |
visitationOrder = arma::shuffle(arma::linspace<arma::Col<size_t>>(0, | |
(numFunctions - 1), numFunctions)); | |
// To keep track of where we are and how things are going. | |
size_t currentFunction = 0; | |
double overallObjective = 0; | |
double lastObjective = DBL_MAX; | |
// Calculate the first objective function. | |
for (size_t i = 0; i < numFunctions; ++i) | |
overallObjective += function.Evaluate(iterate, i, true); | |
// Now iterate! | |
arma::mat gradient(iterate.n_rows, iterate.n_cols); | |
// Leaky sum of squares of parameter gradient. | |
arma::mat meanSquaredGradient = arma::zeros<arma::mat>(iterate.n_rows, | |
iterate.n_cols); | |
for (size_t i = 1; i != maxIterations; ++i, ++currentFunction) | |
{ | |
// Is this iteration the start of a sequence? | |
if ((currentFunction % numFunctions) == 0) | |
{ | |
// Output current objective function. | |
Log::Info << "RMSprop: iteration " << i << ", objective " | |
<< overallObjective << "." << std::endl; | |
if (std::isnan(overallObjective) || std::isinf(overallObjective)) | |
{ | |
Log::Warn << "RMSprop: converged to " << overallObjective | |
<< "; terminating with failure. Try a smaller step size?" | |
<< std::endl; | |
return overallObjective; | |
} | |
if (std::abs(lastObjective - overallObjective) < tolerance) | |
{ | |
Log::Info << "RMSprop: minimized within tolerance " << tolerance << "; " | |
<< "terminating optimization." << std::endl; | |
return overallObjective; | |
} | |
// Reset the counter variables. | |
lastObjective = overallObjective; | |
overallObjective = 0; | |
currentFunction = 0; | |
if (shuffle) // Determine order of visitation. | |
visitationOrder = arma::shuffle(visitationOrder); | |
} | |
// Now add that to the overall objective function. | |
if (shuffle) | |
overallObjective += function.Evaluate(iterate, | |
visitationOrder[currentFunction], false); | |
else | |
overallObjective += function.Evaluate(iterate, currentFunction, false); | |
// Evaluate the gradient for this iteration. | |
if (shuffle) | |
function.Gradient(iterate, visitationOrder[currentFunction], gradient); | |
else | |
function.Gradient(iterate, currentFunction, gradient); | |
// And update the iterate. | |
meanSquaredGradient *= alpha; | |
meanSquaredGradient += (1 - alpha) * (gradient % gradient); | |
iterate -= stepSize * gradient / (arma::sqrt(meanSquaredGradient) + eps); | |
} | |
Log::Info << "RMSprop: maximum iterations (" << maxIterations << ") reached; " | |
<< "terminating optimization." << std::endl; | |
// Calculate final objective. | |
overallObjective = 0; | |
for (size_t i = 0; i < numFunctions; ++i) | |
overallObjective += function.Evaluate(iterate, i, true); | |
return overallObjective; | |
} | |
} // namespace optimization | |
} // namespace mlpack | |
#endif |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment