Created
August 13, 2017 16:34
-
-
Save rcurtin/bdc867d28ed60e28b8174da9fe9faeb5 to your computer and use it in GitHub Desktop.
patch for AliasLayer proof-of-concept
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
From 93d8d0c6e62adaa4bce7b7bd6e5ed64d9bd2cff4 Mon Sep 17 00:00:00 2001 | |
From: Ryan Curtin <ryan@ratml.org> | |
Date: Sun, 13 Aug 2017 12:30:48 -0400 | |
Subject: [PATCH] Partial alias layer support. | |
Still needs: | |
* Support for other methods that only some layers have. | |
* "Fake" parameters matrix so gradient updates work correctly? | |
* Fixing to pass the simple test I wrote. | |
--- | |
src/mlpack/methods/ann/ffn.hpp | 2 +- | |
src/mlpack/methods/ann/layer/CMakeLists.txt | 2 + | |
src/mlpack/methods/ann/layer/alias_layer.hpp | 87 +++++++++++++++ | |
src/mlpack/methods/ann/layer/alias_layer_impl.hpp | 125 ++++++++++++++++++++++ | |
src/mlpack/methods/ann/layer/layer_types.hpp | 6 ++ | |
src/mlpack/tests/feedforward_network_test.cpp | 31 ++++++ | |
6 files changed, 252 insertions(+), 1 deletion(-) | |
create mode 100644 src/mlpack/methods/ann/layer/alias_layer.hpp | |
create mode 100644 src/mlpack/methods/ann/layer/alias_layer_impl.hpp | |
diff --git a/src/mlpack/methods/ann/ffn.hpp b/src/mlpack/methods/ann/ffn.hpp | |
index 793a397..4c6af59 100644 | |
--- a/src/mlpack/methods/ann/ffn.hpp | |
+++ b/src/mlpack/methods/ann/ffn.hpp | |
@@ -200,7 +200,7 @@ class FFN | |
arma::mat& Parameters() { return parameter; } | |
/** | |
- * Reset the module infomration (weights/parameters). | |
+ * Reset the module information (weights/parameters). | |
*/ | |
void ResetParameters(); | |
diff --git a/src/mlpack/methods/ann/layer/CMakeLists.txt b/src/mlpack/methods/ann/layer/CMakeLists.txt | |
index caa1cf1..31c8f89 100644 | |
--- a/src/mlpack/methods/ann/layer/CMakeLists.txt | |
+++ b/src/mlpack/methods/ann/layer/CMakeLists.txt | |
@@ -5,6 +5,8 @@ set(SOURCES | |
add_impl.hpp | |
add_merge.hpp | |
add_merge_impl.hpp | |
+ alias_layer.hpp | |
+ alias_layer_impl.hpp | |
base_layer.hpp | |
concat.hpp | |
concat_impl.hpp | |
diff --git a/src/mlpack/methods/ann/layer/alias_layer.hpp b/src/mlpack/methods/ann/layer/alias_layer.hpp | |
new file mode 100644 | |
index 0000000..8f2d93b | |
--- /dev/null | |
+++ b/src/mlpack/methods/ann/layer/alias_layer.hpp | |
@@ -0,0 +1,87 @@ | |
+/** | |
+ * @file alias_layer.hpp | |
+ * @author Ryan Curtin | |
+ * | |
+ * This is an alias layer for another layer so that parameters can be shared | |
+ * between multiple networks. However, it is not threadsafe---so you cannot | |
+ * share parameters between networks in separate threads (although... it should | |
+ * be pretty easy to adapt this class, you just need to add locks). | |
+ */ | |
+#ifndef MLPACK_METHODS_ANN_LAYER_ALIAS_LAYER_HPP | |
+#define MLPACK_METHODS_ANN_LAYER_ALIAS_LAYER_HPP | |
+ | |
+#include <mlpack/prereqs.hpp> | |
+ | |
+namespace mlpack { | |
+namespace ann { | |
+ | |
+class AliasLayer | |
+{ | |
+ public: | |
+ /** | |
+ * Construct the AliasLayer as an alias of the given layer. When destructed, | |
+ * this class will not destruct the aliased layer. | |
+ * | |
+ * @param layer Layer to be aliased. | |
+ */ | |
+ template<typename LayerType> | |
+ AliasLayer(LayerType& layer); | |
+ | |
+ /** | |
+ * Destroy the alias layer. This does not destroy the aliased layer. | |
+ */ | |
+ ~AliasLayer(); | |
+ | |
+ /** | |
+ * Reset the parameters of the layer. | |
+ */ | |
+ void Reset(); | |
+ | |
+ /** | |
+ * Perform a forward pass of the aliased layer. | |
+ */ | |
+ template<typename eT> | |
+ void Forward(arma::Mat<eT>&& input, arma::Mat<eT>&& output); | |
+ | |
+ /** | |
+ * Perform a backwards pass of the aliased layer. | |
+ */ | |
+ template<typename eT> | |
+ void Backward(arma::Mat<eT>&& input, | |
+ arma::Mat<eT>&& gy, | |
+ arma::Mat<eT>&& g); | |
+ | |
+ /** | |
+ * Calculate the gradient of the aliased layer using the delta and the input | |
+ * activation. | |
+ */ | |
+ template<typename eT> | |
+ void Gradient(const arma::Mat<eT>&& input, | |
+ arma::Mat<eT>&& error, | |
+ arma::Mat<eT>&& gradient); | |
+ | |
+ const arma::mat& OutputParameter() const { return weights; } | |
+ arma::mat& OutputParameter() { return weights; } | |
+ | |
+ const arma::mat& Delta() const; | |
+ arma::mat& Delta(); | |
+ | |
+ private: | |
+ // The layer being aliased. We have to store it as a void pointer because | |
+ // we can't store it as a LayerTypes like we might like to. | |
+ void* layer; | |
+ // If true, then we own the LayerTypes (boost::variant) and need to delete it. | |
+ bool ownsVariant; | |
+ // Fake weights. | |
+ arma::mat weights; | |
+}; | |
+ | |
+} // namespace ann | |
+} // namespace mlpack | |
+ | |
+// Include implementation, but only if we are not in layer_types.hpp | |
+#ifndef MLPACK_IN_LAYER_TYPES_HPP | |
+#include "alias_layer_impl.hpp" | |
+#endif | |
+ | |
+#endif | |
diff --git a/src/mlpack/methods/ann/layer/alias_layer_impl.hpp b/src/mlpack/methods/ann/layer/alias_layer_impl.hpp | |
new file mode 100644 | |
index 0000000..e16fd3b | |
--- /dev/null | |
+++ b/src/mlpack/methods/ann/layer/alias_layer_impl.hpp | |
@@ -0,0 +1,125 @@ | |
+/** | |
+ * @file alias_layer_impl.hpp | |
+ * @author Ryan Curtin | |
+ * | |
+ * This is an alias layer for another layer so that parameters can be shared | |
+ * between multiple networks. However, it is not threadsafe---so you cannot | |
+ * share parameters between networks in separate threads (although... it should | |
+ * be pretty easy to adapt this class, you just need to add locks). | |
+ */ | |
+#ifndef MLPACK_METHODS_ANN_LAYER_ALIAS_LAYER_IMPL_HPP | |
+#define MLPACK_METHODS_ANN_LAYER_ALIAS_LAYER_IMPL_HPP | |
+ | |
+#include "alias_layer.hpp" | |
+ | |
+#include <mlpack/methods/ann/layer/layer_types.hpp> | |
+#include <mlpack/methods/ann/visitor/reset_visitor.hpp> | |
+#include <mlpack/methods/ann/visitor/forward_visitor.hpp> | |
+#include <mlpack/methods/ann/visitor/backward_visitor.hpp> | |
+#include <mlpack/methods/ann/visitor/gradient_visitor.hpp> | |
+#include <mlpack/methods/ann/visitor/delta_visitor.hpp> | |
+ | |
+namespace mlpack { | |
+namespace ann { | |
+ | |
+/** | |
+ * Create the alias layer. | |
+ */ | |
+template<typename LayerType> | |
+AliasLayer::AliasLayer(LayerType& layer) : ownsVariant(false) | |
+{ | |
+ // Do we need to create our own LayerTypes holder for the layer? | |
+ if (std::is_same<LayerType, LayerTypes>::value) | |
+ { | |
+ this->layer = (void*) &layer; | |
+ } | |
+ else | |
+ { | |
+ // Hopefully the type is ok... | |
+ LayerTypes* l = new LayerTypes(&layer); | |
+ this->layer = (void*) l; | |
+ ownsVariant = true; | |
+ } | |
+} | |
+ | |
+/** | |
+ * Destroy the alias layer. | |
+ */ | |
+inline AliasLayer::~AliasLayer() | |
+{ | |
+ if (ownsVariant) | |
+ { | |
+ // Set the LayerTypes to NULL so that its destructor doesn't delete the | |
+ // layer, then delete it. | |
+ LayerTypes* l = (LayerTypes*) layer; | |
+ // Use a type that's part of LayerTypes. We have to specify the template | |
+ // parameters, because linear.hpp may not have been included yet (and we may | |
+ // only have a forward declaration). | |
+ (*l) = (Linear<arma::mat, arma::mat>*) NULL; | |
+ delete l; | |
+ } | |
+} | |
+ | |
+/** | |
+ * Reset the parameters of the layer. | |
+ */ | |
+inline void AliasLayer::Reset() | |
+{ | |
+ // Create a new visitor and call the layer's Reset() function. | |
+ boost::apply_visitor(ResetVisitor(), *((LayerTypes*) layer)); | |
+} | |
+ | |
+/** | |
+ * Perform a forward pass of the aliased layer. | |
+ */ | |
+template<typename eT> | |
+void AliasLayer::Forward(arma::Mat<eT>&& input, arma::Mat<eT>&& output) | |
+{ | |
+ // Create a new visitor and call the layer's Forward() function. | |
+ boost::apply_visitor(ForwardVisitor(std::move(input), std::move(output)), | |
+ *((LayerTypes*) layer)); | |
+} | |
+ | |
+/** | |
+ * Perform a backwards pass of the aliased layer. | |
+ */ | |
+template<typename eT> | |
+void AliasLayer::Backward(arma::Mat<eT>&& input, | |
+ arma::Mat<eT>&& gy, | |
+ arma::Mat<eT>&& g) | |
+{ | |
+ // Create a new visitor and call the layer's Backward() function. | |
+ boost::apply_visitor(BackwardVisitor(std::move(input), std::move(gy), | |
+ std::move(g)), *((LayerTypes*) layer)); | |
+} | |
+ | |
+/** | |
+ * Calculate the gradient of the aliased layer using the delta and the input | |
+ * activation. | |
+ */ | |
+template<typename eT> | |
+void AliasLayer::Gradient(const arma::Mat<eT>&& input, | |
+ arma::Mat<eT>&& error, | |
+ arma::Mat<eT>&& gradient) | |
+{ | |
+ boost::apply_visitor(GradientVisitor(std::move(input), std::move(error), | |
+ std::move(gradient)), *((LayerTypes*) layer)); | |
+} | |
+ | |
+inline const arma::mat& AliasLayer::Delta() const | |
+{ | |
+ boost::apply_visitor(DeltaVisitor(), *((LayerTypes*) layer)); | |
+} | |
+ | |
+inline arma::mat& AliasLayer::Delta() | |
+{ | |
+ boost::apply_visitor(DeltaVisitor(), *((LayerTypes*) layer)); | |
+} | |
+ | |
+} // namespace ann | |
+} // namespace mlpack | |
+ | |
+// Include implementation. | |
+#include "alias_layer_impl.hpp" | |
+ | |
+#endif | |
diff --git a/src/mlpack/methods/ann/layer/layer_types.hpp b/src/mlpack/methods/ann/layer/layer_types.hpp | |
index 9a31e1c..0a78d30 100644 | |
--- a/src/mlpack/methods/ann/layer/layer_types.hpp | |
+++ b/src/mlpack/methods/ann/layer/layer_types.hpp | |
@@ -16,6 +16,9 @@ | |
// Layer modules. | |
#include <mlpack/methods/ann/layer/add.hpp> | |
+#define MLPACK_IN_LAYER_TYPES_HPP // alias_layer_impl.hpp can't be included yet. | |
+#include <mlpack/methods/ann/layer/alias_layer.hpp> | |
+#undef MLPACK_IN_LAYER_TYPES_HPP | |
#include <mlpack/methods/ann/layer/base_layer.hpp> | |
#include <mlpack/methods/ann/layer/constant.hpp> | |
#include <mlpack/methods/ann/layer/cross_entropy_error.hpp> | |
@@ -79,6 +82,7 @@ class RecurrentAttention; | |
using LayerTypes = boost::variant< | |
Add<arma::mat, arma::mat>*, | |
AddMerge<arma::mat, arma::mat>*, | |
+ AliasLayer*, | |
BaseLayer<LogisticFunction, arma::mat, arma::mat>*, | |
BaseLayer<IdentityFunction, arma::mat, arma::mat>*, | |
BaseLayer<TanhFunction, arma::mat, arma::mat>*, | |
@@ -120,4 +124,6 @@ using LayerTypes = boost::variant< | |
} // namespace ann | |
} // namespace mlpack | |
+#include <mlpack/methods/ann/layer/alias_layer_impl.hpp> | |
+ | |
#endif | |
diff --git a/src/mlpack/tests/feedforward_network_test.cpp b/src/mlpack/tests/feedforward_network_test.cpp | |
index ed76e03..d1afefa 100644 | |
--- a/src/mlpack/tests/feedforward_network_test.cpp | |
+++ b/src/mlpack/tests/feedforward_network_test.cpp | |
@@ -499,4 +499,35 @@ BOOST_AUTO_TEST_CASE(FFNMiscTest) | |
movedModel = std::move(copiedModel); | |
} | |
+/** | |
+ * Test that we can successfully have an alias layer. | |
+ */ | |
+BOOST_AUTO_TEST_CASE(AliasLayerTest) | |
+{ | |
+ arma::mat dataset(10, 100, arma::fill::randu); | |
+ arma::mat responses(1, 100, arma::fill::randu); | |
+ | |
+ FFN<MeanSquaredError<>> network; | |
+ Linear<> layer(10, 10); | |
+ network.Add<AliasLayer>(layer); | |
+ network.Add<Linear<>>(10, 1); | |
+ | |
+ const arma::mat oldMatrix = layer.Parameters(); | |
+ std::cout << oldMatrix.t(); | |
+ | |
+ const size_t maxEpochs = 1; | |
+ RMSProp opt(0.01, 0.88, 1e-8, maxEpochs * dataset.n_cols, -1); | |
+ network.Train(dataset, responses, opt); | |
+ | |
+ std::cout << "-----\n"; | |
+ std::cout << layer.Parameters().t(); | |
+ std::cout << "-----\n"; | |
+ std::cout << network.Parameters().t(); | |
+ | |
+ // Make sure the layer's parameters have changed. | |
+ BOOST_REQUIRE_EQUAL(oldMatrix.n_elem, layer.Parameters().n_elem); | |
+ for (size_t i = 0; i < oldMatrix.n_elem; ++i) | |
+ BOOST_REQUIRE_GE(std::abs(oldMatrix[i] - layer.Parameters()[i]), 1e-5); | |
+} | |
+ | |
BOOST_AUTO_TEST_SUITE_END(); | |
-- | |
2.5.4 (Apple Git-61) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment