Skip to content

Instantly share code, notes, and snippets.

@rcurtin
Created August 13, 2017 16:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rcurtin/bdc867d28ed60e28b8174da9fe9faeb5 to your computer and use it in GitHub Desktop.
Save rcurtin/bdc867d28ed60e28b8174da9fe9faeb5 to your computer and use it in GitHub Desktop.
patch for AliasLayer proof-of-concept
From 93d8d0c6e62adaa4bce7b7bd6e5ed64d9bd2cff4 Mon Sep 17 00:00:00 2001
From: Ryan Curtin <ryan@ratml.org>
Date: Sun, 13 Aug 2017 12:30:48 -0400
Subject: [PATCH] Partial alias layer support.
Still needs:
* Support for other methods that only some layers have.
* "Fake" parameters matrix so gradient updates work correctly?
* Fixing to pass the simple test I wrote.
---
src/mlpack/methods/ann/ffn.hpp | 2 +-
src/mlpack/methods/ann/layer/CMakeLists.txt | 2 +
src/mlpack/methods/ann/layer/alias_layer.hpp | 87 +++++++++++++++
src/mlpack/methods/ann/layer/alias_layer_impl.hpp | 125 ++++++++++++++++++++++
src/mlpack/methods/ann/layer/layer_types.hpp | 6 ++
src/mlpack/tests/feedforward_network_test.cpp | 31 ++++++
6 files changed, 252 insertions(+), 1 deletion(-)
create mode 100644 src/mlpack/methods/ann/layer/alias_layer.hpp
create mode 100644 src/mlpack/methods/ann/layer/alias_layer_impl.hpp
diff --git a/src/mlpack/methods/ann/ffn.hpp b/src/mlpack/methods/ann/ffn.hpp
index 793a397..4c6af59 100644
--- a/src/mlpack/methods/ann/ffn.hpp
+++ b/src/mlpack/methods/ann/ffn.hpp
@@ -200,7 +200,7 @@ class FFN
arma::mat& Parameters() { return parameter; }
/**
- * Reset the module infomration (weights/parameters).
+ * Reset the module information (weights/parameters).
*/
void ResetParameters();
diff --git a/src/mlpack/methods/ann/layer/CMakeLists.txt b/src/mlpack/methods/ann/layer/CMakeLists.txt
index caa1cf1..31c8f89 100644
--- a/src/mlpack/methods/ann/layer/CMakeLists.txt
+++ b/src/mlpack/methods/ann/layer/CMakeLists.txt
@@ -5,6 +5,8 @@ set(SOURCES
add_impl.hpp
add_merge.hpp
add_merge_impl.hpp
+ alias_layer.hpp
+ alias_layer_impl.hpp
base_layer.hpp
concat.hpp
concat_impl.hpp
diff --git a/src/mlpack/methods/ann/layer/alias_layer.hpp b/src/mlpack/methods/ann/layer/alias_layer.hpp
new file mode 100644
index 0000000..8f2d93b
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/alias_layer.hpp
@@ -0,0 +1,87 @@
+/**
+ * @file alias_layer.hpp
+ * @author Ryan Curtin
+ *
+ * This is an alias layer for another layer so that parameters can be shared
+ * between multiple networks. However, it is not threadsafe---so you cannot
+ * share parameters between networks in separate threads (although... it should
+ * be pretty easy to adapt this class, you just need to add locks).
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_ALIAS_LAYER_HPP
+#define MLPACK_METHODS_ANN_LAYER_ALIAS_LAYER_HPP
+
+#include <mlpack/prereqs.hpp>
+
+namespace mlpack {
+namespace ann {
+
+class AliasLayer
+{
+ public:
+ /**
+ * Construct the AliasLayer as an alias of the given layer. When destructed,
+ * this class will not destruct the aliased layer.
+ *
+ * @param layer Layer to be aliased.
+ */
+ template<typename LayerType>
+ AliasLayer(LayerType& layer);
+
+ /**
+ * Destroy the alias layer. This does not destroy the aliased layer.
+ */
+ ~AliasLayer();
+
+ /**
+ * Reset the parameters of the layer.
+ */
+ void Reset();
+
+ /**
+ * Perform a forward pass of the aliased layer.
+ */
+ template<typename eT>
+ void Forward(arma::Mat<eT>&& input, arma::Mat<eT>&& output);
+
+ /**
+ * Perform a backwards pass of the aliased layer.
+ */
+ template<typename eT>
+ void Backward(arma::Mat<eT>&& input,
+ arma::Mat<eT>&& gy,
+ arma::Mat<eT>&& g);
+
+ /**
+ * Calculate the gradient of the aliased layer using the delta and the input
+ * activation.
+ */
+ template<typename eT>
+ void Gradient(const arma::Mat<eT>&& input,
+ arma::Mat<eT>&& error,
+ arma::Mat<eT>&& gradient);
+
+ const arma::mat& OutputParameter() const { return weights; }
+ arma::mat& OutputParameter() { return weights; }
+
+ const arma::mat& Delta() const;
+ arma::mat& Delta();
+
+ private:
+ // The layer being aliased. We have to store it as a void pointer because
+ // we can't store it as a LayerTypes like we might like to.
+ void* layer;
+ // If true, then we own the LayerTypes (boost::variant) and need to delete it.
+ bool ownsVariant;
+ // Fake weights.
+ arma::mat weights;
+};
+
+} // namespace ann
+} // namespace mlpack
+
+// Include implementation, but only if we are not in layer_types.hpp
+#ifndef MLPACK_IN_LAYER_TYPES_HPP
+#include "alias_layer_impl.hpp"
+#endif
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/alias_layer_impl.hpp b/src/mlpack/methods/ann/layer/alias_layer_impl.hpp
new file mode 100644
index 0000000..e16fd3b
--- /dev/null
+++ b/src/mlpack/methods/ann/layer/alias_layer_impl.hpp
@@ -0,0 +1,125 @@
+/**
+ * @file alias_layer_impl.hpp
+ * @author Ryan Curtin
+ *
+ * This is an alias layer for another layer so that parameters can be shared
+ * between multiple networks. However, it is not threadsafe---so you cannot
+ * share parameters between networks in separate threads (although... it should
+ * be pretty easy to adapt this class, you just need to add locks).
+ */
+#ifndef MLPACK_METHODS_ANN_LAYER_ALIAS_LAYER_IMPL_HPP
+#define MLPACK_METHODS_ANN_LAYER_ALIAS_LAYER_IMPL_HPP
+
+#include "alias_layer.hpp"
+
+#include <mlpack/methods/ann/layer/layer_types.hpp>
+#include <mlpack/methods/ann/visitor/reset_visitor.hpp>
+#include <mlpack/methods/ann/visitor/forward_visitor.hpp>
+#include <mlpack/methods/ann/visitor/backward_visitor.hpp>
+#include <mlpack/methods/ann/visitor/gradient_visitor.hpp>
+#include <mlpack/methods/ann/visitor/delta_visitor.hpp>
+
+namespace mlpack {
+namespace ann {
+
+/**
+ * Create the alias layer.
+ */
+template<typename LayerType>
+AliasLayer::AliasLayer(LayerType& layer) : ownsVariant(false)
+{
+ // Do we need to create our own LayerTypes holder for the layer?
+ if (std::is_same<LayerType, LayerTypes>::value)
+ {
+ this->layer = (void*) &layer;
+ }
+ else
+ {
+ // Hopefully the type is ok...
+ LayerTypes* l = new LayerTypes(&layer);
+ this->layer = (void*) l;
+ ownsVariant = true;
+ }
+}
+
+/**
+ * Destroy the alias layer.
+ */
+inline AliasLayer::~AliasLayer()
+{
+ if (ownsVariant)
+ {
+ // Set the LayerTypes to NULL so that its destructor doesn't delete the
+ // layer, then delete it.
+ LayerTypes* l = (LayerTypes*) layer;
+ // Use a type that's part of LayerTypes. We have to specify the template
+ // parameters, because linear.hpp may not have been included yet (and we may
+ // only have a forward declaration).
+ (*l) = (Linear<arma::mat, arma::mat>*) NULL;
+ delete l;
+ }
+}
+
+/**
+ * Reset the parameters of the layer.
+ */
+inline void AliasLayer::Reset()
+{
+ // Create a new visitor and call the layer's Reset() function.
+ boost::apply_visitor(ResetVisitor(), *((LayerTypes*) layer));
+}
+
+/**
+ * Perform a forward pass of the aliased layer.
+ */
+template<typename eT>
+void AliasLayer::Forward(arma::Mat<eT>&& input, arma::Mat<eT>&& output)
+{
+ // Create a new visitor and call the layer's Forward() function.
+ boost::apply_visitor(ForwardVisitor(std::move(input), std::move(output)),
+ *((LayerTypes*) layer));
+}
+
+/**
+ * Perform a backwards pass of the aliased layer.
+ */
+template<typename eT>
+void AliasLayer::Backward(arma::Mat<eT>&& input,
+ arma::Mat<eT>&& gy,
+ arma::Mat<eT>&& g)
+{
+ // Create a new visitor and call the layer's Backward() function.
+ boost::apply_visitor(BackwardVisitor(std::move(input), std::move(gy),
+ std::move(g)), *((LayerTypes*) layer));
+}
+
+/**
+ * Calculate the gradient of the aliased layer using the delta and the input
+ * activation.
+ */
+template<typename eT>
+void AliasLayer::Gradient(const arma::Mat<eT>&& input,
+ arma::Mat<eT>&& error,
+ arma::Mat<eT>&& gradient)
+{
+ boost::apply_visitor(GradientVisitor(std::move(input), std::move(error),
+ std::move(gradient)), *((LayerTypes*) layer));
+}
+
+inline const arma::mat& AliasLayer::Delta() const
+{
+ boost::apply_visitor(DeltaVisitor(), *((LayerTypes*) layer));
+}
+
+inline arma::mat& AliasLayer::Delta()
+{
+ boost::apply_visitor(DeltaVisitor(), *((LayerTypes*) layer));
+}
+
+} // namespace ann
+} // namespace mlpack
+
+// Include implementation.
+#include "alias_layer_impl.hpp"
+
+#endif
diff --git a/src/mlpack/methods/ann/layer/layer_types.hpp b/src/mlpack/methods/ann/layer/layer_types.hpp
index 9a31e1c..0a78d30 100644
--- a/src/mlpack/methods/ann/layer/layer_types.hpp
+++ b/src/mlpack/methods/ann/layer/layer_types.hpp
@@ -16,6 +16,9 @@
// Layer modules.
#include <mlpack/methods/ann/layer/add.hpp>
+#define MLPACK_IN_LAYER_TYPES_HPP // alias_layer_impl.hpp can't be included yet.
+#include <mlpack/methods/ann/layer/alias_layer.hpp>
+#undef MLPACK_IN_LAYER_TYPES_HPP
#include <mlpack/methods/ann/layer/base_layer.hpp>
#include <mlpack/methods/ann/layer/constant.hpp>
#include <mlpack/methods/ann/layer/cross_entropy_error.hpp>
@@ -79,6 +82,7 @@ class RecurrentAttention;
using LayerTypes = boost::variant<
Add<arma::mat, arma::mat>*,
AddMerge<arma::mat, arma::mat>*,
+ AliasLayer*,
BaseLayer<LogisticFunction, arma::mat, arma::mat>*,
BaseLayer<IdentityFunction, arma::mat, arma::mat>*,
BaseLayer<TanhFunction, arma::mat, arma::mat>*,
@@ -120,4 +124,6 @@ using LayerTypes = boost::variant<
} // namespace ann
} // namespace mlpack
+#include <mlpack/methods/ann/layer/alias_layer_impl.hpp>
+
#endif
diff --git a/src/mlpack/tests/feedforward_network_test.cpp b/src/mlpack/tests/feedforward_network_test.cpp
index ed76e03..d1afefa 100644
--- a/src/mlpack/tests/feedforward_network_test.cpp
+++ b/src/mlpack/tests/feedforward_network_test.cpp
@@ -499,4 +499,35 @@ BOOST_AUTO_TEST_CASE(FFNMiscTest)
movedModel = std::move(copiedModel);
}
+/**
+ * Test that we can successfully have an alias layer.
+ */
+BOOST_AUTO_TEST_CASE(AliasLayerTest)
+{
+ arma::mat dataset(10, 100, arma::fill::randu);
+ arma::mat responses(1, 100, arma::fill::randu);
+
+ FFN<MeanSquaredError<>> network;
+ Linear<> layer(10, 10);
+ network.Add<AliasLayer>(layer);
+ network.Add<Linear<>>(10, 1);
+
+ const arma::mat oldMatrix = layer.Parameters();
+ std::cout << oldMatrix.t();
+
+ const size_t maxEpochs = 1;
+ RMSProp opt(0.01, 0.88, 1e-8, maxEpochs * dataset.n_cols, -1);
+ network.Train(dataset, responses, opt);
+
+ std::cout << "-----\n";
+ std::cout << layer.Parameters().t();
+ std::cout << "-----\n";
+ std::cout << network.Parameters().t();
+
+ // Make sure the layer's parameters have changed.
+ BOOST_REQUIRE_EQUAL(oldMatrix.n_elem, layer.Parameters().n_elem);
+ for (size_t i = 0; i < oldMatrix.n_elem; ++i)
+ BOOST_REQUIRE_GE(std::abs(oldMatrix[i] - layer.Parameters()[i]), 1e-5);
+}
+
BOOST_AUTO_TEST_SUITE_END();
--
2.5.4 (Apple Git-61)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment