-
-
Save kris-singh/20a234c5560b505e0e5b89904dd8a3d1 to your computer and use it in GitHub Desktop.
#include <chrono> | |
#include <random> | |
#include <mlpack/core.hpp> | |
#include <mlpack/prereqs.hpp> | |
#include <mlpack/methods/ann/layer/layer.hpp> | |
using namespace mlpack; | |
using namespace mlpack::ann; | |
// Policy Classes for Activation functions | |
template<typename InputType = arma::vec> | |
class MeanFunction | |
{ | |
public: | |
MeanFunction(InputType v): | |
inpvec(v), mean(0) | |
{}; | |
// This should return std_is_float | |
double SomeFunction() | |
{ | |
// Combine in some way | |
for(auto i: inpvec) | |
mean+=i; | |
} | |
private: | |
const arma::vec inpvec; | |
double mean; | |
}; | |
template<typename InputType = arma::vec> | |
class VarianceFunction | |
{ | |
public: | |
VarianceFunction(InputType v): | |
inpvec(v), var(1) | |
{/*Nothing to do here*/} | |
// This should return std_is_float | |
double SomeFunction() | |
{ | |
// Combine in some way | |
for(const auto& i :inpvec) | |
{ | |
var +=i; | |
} | |
return var; | |
} | |
private: | |
// Input Vector | |
const InputType inpvec; | |
// Variance of the normal distribution | |
double var; | |
}; | |
// Example of Activation Function | |
template<typename Mean = MeanFunction<>, typename Var = VarianceFunction<>> | |
class NormalSampler | |
{ | |
public: | |
NormalSampler() | |
{/*Nothing to do here*/} | |
template<typename InputVecType, typename OutputVecType> | |
static void Fn(const InputVecType& x, OutputVecType& y) | |
{ | |
Mean mean(x); | |
Var var(x); | |
unsigned seed = std::chrono::system_clock::now().time_since_epoch().count(); | |
std::default_random_engine generator (seed); | |
std::normal_distribution<double> dist(mean.SomeFunction(), var.SomeFunction()); | |
for (std::size_t idx = 0; idx < y.n_elem; idx++) | |
{ | |
if(dist(generator) > 0.5) | |
y[idx] = 1; | |
else | |
y[idx] = 0; | |
} | |
} | |
}; | |
// Gibbs Layer | |
/* | |
* You can use the BaseLayer class if you define ActivationFunction::Deriv | |
*/ | |
template < | |
class FwdActivationFunction = NormalSampler<>, | |
class BckActivationFunction = NormalSampler<>, | |
typename InputType = arma::mat, | |
typename OutputType = arma::mat | |
> | |
class GibbsLayer | |
{ | |
public: | |
void ForwardPass(const InputType&& input, OutputType&& output) | |
{ | |
FwdActivationFunction::Fn(input, output); | |
} | |
void BackwardPass(const InputType&& input, OutputType&& output) | |
{ | |
BckActivationFunction::Fn(output, input); | |
} | |
bool Deterministic() const { return deterministic; } | |
//! Modify the value of the deterministic parameter. | |
bool& Deterministic() { return deterministic; } | |
private: | |
bool deterministic; | |
}; | |
template < | |
class FwdActivationFunction = NormalSampler<>, | |
class BckActivationFuntion = NormalSampler<>, | |
typename InputDataType = arma::vec, | |
typename OutputDataType = arma::vec | |
> | |
using NormalLayer =GibbsLayer< | |
FwdActivationFunction, BckActivationFuntion, InputDataType, OutputDataType>; | |
// Working Example | |
int main() | |
{ | |
arma::vec input = arma::randu(10); | |
arma::vec output = arma::randu(10); | |
Linear<> layer1(input.n_elem, output.n_elem); | |
SigmoidLayer<> sigmoid; | |
layer1.Reset(); | |
layer1.Forward(std::move(input), std::move(output)); | |
input = output; | |
sigmoid.Forward(std::move(input), std::move(output)); | |
input = output; | |
NormalLayer<> b; | |
b.ForwardPass(std::move(input), std::move(output)); | |
output.print(); | |
} |
Lozhnikov when you mean sequentially you mean to say sigmoid layer ---> Gibbs Layer ------> hidden layer
The parameters for lets the binomial sampler/ normal sampler wiill coming from the sigmoid layer.
But i think in the irc yesterday you talked about the previous layer would be giving 2n variables (n for mean and variance).
So we would need some mechanism to combine these n variables to get the mean and variance hence the mean and variance function.
Also Yes we would you binomial layer in the case of vanilla-rbm but with ssRBM it would change. So we should try an interface that is applicable to both. I am with you on changing lines 67-69
Yeah, I agree with you. But in this case you have to pass a template parameter (i.e. a class with a static function) in order to obtain indices of output variables (since spike and slab variables are sampled using different distributions i.e. you have to apply samplers to different subsets).
Another approach takes into account layer division and layer concatenation (see ann/layer/concat.hpp, ann/layer/join.hpp and ann/layer/select.hpp for example). You can combine these layers in order to build complex structures. I like this approach since it requires only the existing API. But it could be slow since it requires some additional memory allocations and copying. So, maybe your approach is faster.
Sorry can you elaborate on this "in order to obtain indices of output variables (since spike and slab variables are sampled using different distributions i.e. you have to apply samplers to different subsets)" I was not able to understand your point .If you look at the example carefully the input variables and output variables are provided as an argument to the NormalSampler the input would be fed from the previous layer while the output with the hideen layer representation (you can see this in the main function).
I mean that you need some mechanism in order to decide which output variables should be affected by normal sampler. You introduced MeanFunction
and VarianceFunction
in order to choose input variables. I think you should do the same for output variables (in case of the first approach) since you sample visible variables from both spike and slab variables, slab variables from both visible and spike variables and spike variables from visible variables.
Okay i do get you point now. I think i have simple fix for this
We could do this
for (std::size_t idx = 0; idx < y.n_elem; idx++)
{
Mean mean(x(idx));
Var var(x(idx));
std::normal_distribution dist(mean.SomeFunction(), var.SomeFunction());
if(dist(generator) > 0.5)
y[idx] = 1;
else
y[idx] = 0;
}
Now both the output is dependent on the input idx. Is this solution okay for you ??
I don't think that
MeanFunction
andVarianceFunction
are needed. I guess the mean and the variance should be passed to the sampler sequentially. And if I am not mistaken normal distribution is used to obtain real-valued variables. We use binomial distribution for binary variables. It is better to invokemath::RandNormal()
at lines 67-69.