Last active
August 30, 2018 09:07
-
-
Save n-Guard/50a64f4ab837b06777263758b15e6118 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#pragma once | |
#include "fdeep/layers/layer.hpp" | |
#include <string> | |
#include <functional> | |
namespace fdeep | |
{ | |
namespace internal | |
{ | |
class lstm_layer : public layer | |
{ | |
public: | |
explicit lstm_layer(const std::string& name, | |
std::size_t n_units, | |
std::string& activation, | |
std::string& recurrent_activation, | |
bool use_bias, | |
bool return_sequences, | |
const RowMajorMatrixXf& W, | |
const RowMajorMatrixXf& U, | |
const RowMajorMatrixXf& bias) | |
: layer(name), | |
n_units_(n_units), | |
activation_(activation), | |
recurrent_activation_(recurrent_activation), | |
use_bias_(use_bias), | |
return_sequences_(return_sequences), | |
W_(W), | |
U_(U), | |
bias_(bias) | |
{ | |
assertion(bias_.size() == n_units_ * 4, "invalid bias size"); | |
} | |
protected: | |
tensor3s apply_impl(const tensor3s& inputs) const override final | |
{ | |
assertion(inputs.size() == 1, "invalid number of input tensors"); | |
const auto& input = inputs.front(); | |
return {lstm_impl(input, W_, U_, bias_, activation_, recurrent_activation_)}; | |
} | |
private: | |
static float_type relu_activation(float_type x) | |
{ | |
return std::max<float_type>(x, 0); | |
} | |
static float_type sigmoid_activation(float_type x) | |
{ | |
return 1 / (1 + std::exp(-x)); | |
} | |
static float_type hard_sigmoid_activation(float_type x) | |
{ | |
return static_cast<float_type>(std::min(1.0, std::max(0.0, (0.2 * x) + 0.5))); | |
} | |
static float_type tanh_activation(float_type x) | |
{ | |
return std::tanh(x); | |
} | |
std::function<float_type(float_type)> get_activation_func(const std::string& activation_func_name) const | |
{ | |
if (activation_func_name == "relu") | |
return relu_activation; | |
else if (activation_func_name == "sigmoid") | |
return sigmoid_activation; | |
else if (activation_func_name == "hard_sigmoid") | |
return hard_sigmoid_activation; | |
else if (activation_func_name == "tanh") | |
return tanh_activation; | |
raise_error("activation function '" + activation_func_name + "' not yet implemented"); | |
return {}; | |
} | |
tensor3 lstm_impl(const tensor3& input, | |
const RowMajorMatrixXf& W, | |
const RowMajorMatrixXf& U, | |
const RowMajorMatrixXf& bias, | |
const std::string& activation, | |
const std::string& recurrent_activation) const | |
{ | |
// initialize cell output states h, and cell memory states c for t-1 with zeros | |
RowMajorMatrixXf h_tm1(1, n_units_); | |
RowMajorMatrixXf c_tm1(1, n_units_); | |
h_tm1.setZero(); | |
c_tm1.setZero(); | |
const std::size_t x_width = input.shape().width_; | |
const std::size_t y_height = input.shape().height_; | |
std::size_t n_output_timesteps; | |
// allocation of output matrix based on return_sequences | |
if (return_sequences_ == true) | |
n_output_timesteps = x_width; | |
else | |
n_output_timesteps = 1; | |
RowMajorMatrixXf result(n_output_timesteps, n_units_); | |
// write input to eigen matrix | |
RowMajorMatrixXf in(x_width, y_height); | |
for (std::size_t a_y = 0; a_y < y_height; ++a_y) | |
for (std::size_t a_x = 0; a_x < x_width; ++a_x) | |
in(Eigen::Index(a_y), Eigen::Index(a_x)) = input.get(0, a_y, a_x); | |
// initialize X | |
RowMajorMatrixXf X(x_width, n_units_ * 4); | |
X = in * W; | |
if (use_bias_) | |
{ | |
// copy bias values to eigen matrix | |
typedef Eigen::Matrix<float_type, 1, Eigen::Dynamic> Vector_Xf; | |
Vector_Xf b(1, n_units_ * 4); | |
b = bias; | |
X.rowwise() += b; | |
} | |
// get activation functions | |
const auto act_func = get_activation_func(activation); | |
const auto act_func_recurrent = get_activation_func(recurrent_activation); | |
// computing LSTM output | |
const Eigen::Index n = Eigen::Index(n_units_); | |
for (Eigen::Index k = 0; k < Eigen::Index(x_width); ++k) | |
{ | |
const RowMajorMatrixXf ifco = h_tm1 * U; | |
// Use of Matrix.block(): Block of size (p,q), starting at (i,j) matrix.block(i,j,p,q); matrix.block<p,q>(i,j); | |
const RowMajorMatrixXf i = (X.block(k, 0, 1, n) + ifco.block(0, 0, 1, n)).unaryExpr(act_func_recurrent); | |
const RowMajorMatrixXf f = (X.block(k, n, 1, n) + ifco.block(0, n, 1, n)).unaryExpr(act_func_recurrent); | |
const RowMajorMatrixXf c_pre = (X.block(k, n * 2, 1, n) + ifco.block(0, n * 2, 1, n)).unaryExpr(act_func); | |
const RowMajorMatrixXf o = (X.block(k, n * 3, 1, n) + ifco.block(0, n * 3, 1, n)).unaryExpr(act_func_recurrent); | |
c_tm1 = f.cwiseProduct(c_tm1) + i.cwiseProduct(c_pre); | |
h_tm1 = o.cwiseProduct(c_tm1.unaryExpr(act_func)); | |
// save every h sequence or just last | |
if (return_sequences_ == true) | |
for (Eigen::Index idx = 0; idx < n; ++idx) | |
result(k, idx) = h_tm1(idx); | |
else if (k == Eigen::Index(x_width) - 1) | |
for (Eigen::Index idx = 0; idx < n; ++idx) | |
result(0, idx) = h_tm1(idx); | |
} | |
return tensor3(shape3(1, n_output_timesteps, n_units_), eigen_mat_to_values(result)); | |
} | |
const std::size_t n_units_; | |
const std::string activation_; | |
const std::string recurrent_activation_; | |
const bool use_bias_; | |
const bool return_sequences_; | |
const RowMajorMatrixXf& W_; | |
const RowMajorMatrixXf& U_; | |
const RowMajorMatrixXf& bias_; | |
}; | |
} // namespace internal | |
} // namespace fdeep |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment