s1998/rbfn.cpp

## rbfn.cpp
// Proposed changes to the API are:

// DBSCAN clustering has a method :
size_t Cluster(const MatType& data,
  arma::mat& centroids);

// KMeans clustering has a method :
void Cluster(const MatType& data,
  size_t clusters,
  arma::mat& centroids,
  other arguments...);

// If it's okay, we can modify this to :
void Cluster(const MatType& data,
 arma::mat& centroids,
 other arguments...);
// Input matrix, output matrix and then function parameters.
// Or please let me know if this change is not accepatable
// I will try to think if there is anoter way (probably meta) to do this.

// A similar change was done to classification and regression models to implement
// cross-validation and hyperparameter tuning module.
void MlMethod(const MatType& data,
  const PredictionType& predictions,
  Args ...);

// This will remove the need of file ‘kmeans_selection.hpp’ in nystroem method and make it
// easier to implement algorithms that rely on finding centroids from the clustering policy.

// The RBFN layer I plan to implement should do follwing :
// 1. Allow to be used at any layer (think layer 2 and onwards) in deep learning
//   (how to find centroids at that layer ? discussed later)
// 2. Allow to be used with any clusteringPolicy.
// 3. Allow user if he chooses to backpropagate error to centroids
//    for gaussain kernel.
// Please let me know if you feel any of them is irrelevant or not needed.

namespace mlpack {
namespace ann /** Artificial Neural Network. */ {

/**
 * Implementation of the RBFN class.
 *
 * @tparam ClusteringPolicy Clustering algorithm to be used
 *                          default is kmeans.
 * @tparam Kernel Kernel to be used,
 *                default is gaussian kernel
 * @tparam modifyCentroids Boolean variable, if set to true,
 *                         backpropagates the error to centroids.
 * @tparam Args Argument type of arguments to be
 *                passed to clusterer.cluster() method.
 * @tparam InputDataType Type of the input data
 * @tparam OutputDataType Type of the output data
 */
template <
    typename ClusteringPolicy = kmeans::KMeans<>,
    typename KernelPolicy = kernel::GaussianKernel,
    typename InputDataType = arma::mat,
    typename OutputDataType = arma::mat,
    typename... Args
>
class RBFN
{
 public:

  /**
   * Create the RBFN object using the specified centroids.
   *
   * @param outSize Number of output units.
   * @param kernel Kernel to be used for RBFN.
   * @param centroids Centroids used for clustering.
   */
  Rbfn(const size_t inSize,
    const size_t outSize,
    bool modifyCentroids,
    Kernel& kernel,
    InputDataType& centroids);

  /**
   * Create the RBFN object using the specified clusteringPolicy.
   *
   * @param outSize Number of output units.
   * @param kernel Kernel to be used for RBFN.
   * @param clusterer ClusteringPolicy used for clustering.
   * @param input Inut data to do clustering on.
   * @param Args... args Arguments to be passed to
   *                clusterer.cluster() method.
   */
  Rbfn(const size_t inSize,
    const size_t outSize,
    bool modifyCentroids,
    Kernel& kernel,
    ClusteringPolicy& clusterer,
    InputDataType& input,
    Args... args
    );

  /**
   * Create the RBFN object using the specified clusteringPolicy.
   *
   * @param outSize Number of output units.
   * @param kernel Kernel to be used for RBFN.
   * @param clusterer ClusteringPolicy used for clustering.
   * @param input Inut data to do clustering on.
   * @param Args... args Arguments to be passed to
   *                clusterer.cluster() method.
   */
   Rbfn(const size_t inSize,
    const size_t outSize,
    bool modifyCentroids,
    Kernel& kernel,
    ClusteringPolicy& clusterer,
//     int noOfClusters,
//         I had initially thought of this but user can't know exact number for clustering algorithms like dbscan, and so we
//         need to make it generic.
    Args... args
    );

   // I have ignored the following functions and parameters
   // for simplcity of viewing.
   // Reset()   Forward()   Backward()
   // Parameters()  Gradient()
   // weights   weight   bias   gradient

 private:
  //! Locally-stored number of input units.
  //! Number of centroids obtained after clustering.
  size_t noOfClusters;

  //! Locally-stored number of output units.
  size_t outSize;
}; // class rbfn

} // namespace ann
} // namespace mlpack


// Suppose we have training data, we want to use it
// with 50 centroids and 100 output units then,

// To use rbfn as for classification :
FFN<NegativeLogLikelihood<>, RandomInitialization> model;
model.Add<rbfn<> >(100,
  GaussianKernel(),
  centroids);
model.Add<LogSoftMax<> >();

// To use RBFN for prediction:
FFN<MeanSuaredError<>, RandomInitialization> model;
model.Add<rbfn<> >(100,
  GaussianKernel(),
  centroids);

// Third constructor allows this layer to be used
// at any layer (think layer 2 onwards) in deep learning module.
// The rbfn class has a boolean variable hasCentroids.
// First and second constructor find the centroids
// during the object initialization itself and
// set hasCentroids to true.
// The third constructor sets it to false.

// When forward is called, third constructor calls the
// clusteringPolicy with the input it has been passed
// (i.e. the first (mini-)batch that arrives) and uses it
// to set centroids.


// So there are 2 possible difficulties in implementing rbfn as custom layer:
// 1. When we are at deep layer (think layer 2 onwards, we don't know the centroids).
// 2. How to store centroids in the parameter matrix ? (Because everythin in parameter matrix is
// being initialized by network initialization rule.)

// The solution I could think of is based on layer traits.
// The current codebase can't exactly do this since it does not pass the data to initializer.
//   Proposed changes :
//   Firstly add a function to RBFN layer, call it bool first_pass_init().
//   Add a macro in layer traits that checks if a layer has this function.

//   The initialize method in network_init.hpp needs to be changed.
//     Instead of doing this directly:

// Inside the for loop after setting size of parameters.
const size_t weight = boost::apply_visitor(weightSizeVisitor,
            network[i]);
arma::mat tmp = arma::mat(parameter.memptr() + offset,
            weight, 1, false, false);
initializeRule.Initialize(tmp, tmp.n_elem, 1);

//     Call forward function() first.
//     This way rbfn will have the input data and can perform the necessary clustering to set the parameter size.
//  The initial loop that sets the size of master parameter matrix will change accordingly.

// Benefit of this method is that if in future we try to implement LSUV initialzation (https://arxiv.org/pdf/1511.06422.pdf)
// (or any other initialization that depends on the input data too),
// these changes will make it easier to implement.
//

// Coming to the second part, storing centroids,
// We deine a method similar to this:
const size_t weight = boost::apply_visitor(weightSizeVisitor,
            network[i]);
// say
const size_t weightOffset = boost::apply_visitor(weightOffsetVisitor,
            network[i]);

// and then we can have..
arma::mat tmp = arma::mat(parameter.memptr() + offset + weightOffset,
            weight, 1, false, false);
initializeRule.Initialize(tmp, tmp.n_elem, 1);


// To call third constructor,
model.Add<
  rbfn< KMeans,
    GaussianKernel,
    false,
    arma::mat,
    arma::mat,
    size_t,
    bool
  > >
  (100, GaussianKernel(), kmeans(),
  50, // specify no of centres
  50, // argument to cluster() method
  false
  );
	// Proposed changes to the API are:

	// DBSCAN clustering has a method :
	size_t Cluster(const MatType& data,
	arma::mat& centroids);

	// KMeans clustering has a method :
	void Cluster(const MatType& data,
	size_t clusters,
	arma::mat& centroids,
	other arguments...);

	// If it's okay, we can modify this to :
	void Cluster(const MatType& data,
	arma::mat& centroids,
	other arguments...);
	// Input matrix, output matrix and then function parameters.
	// Or please let me know if this change is not accepatable
	// I will try to think if there is anoter way (probably meta) to do this.

	// A similar change was done to classification and regression models to implement
	// cross-validation and hyperparameter tuning module.
	void MlMethod(const MatType& data,
	const PredictionType& predictions,
	Args ...);

	// This will remove the need of file ‘kmeans_selection.hpp’ in nystroem method and make it
	// easier to implement algorithms that rely on finding centroids from the clustering policy.

	// The RBFN layer I plan to implement should do follwing :
	// 1. Allow to be used at any layer (think layer 2 and onwards) in deep learning
	// (how to find centroids at that layer ? discussed later)
	// 2. Allow to be used with any clusteringPolicy.
	// 3. Allow user if he chooses to backpropagate error to centroids
	// for gaussain kernel.
	// Please let me know if you feel any of them is irrelevant or not needed.

	namespace mlpack {
	namespace ann /** Artificial Neural Network. */ {

	/**
	* Implementation of the RBFN class.
	*
	* @tparam ClusteringPolicy Clustering algorithm to be used
	* default is kmeans.
	* @tparam Kernel Kernel to be used,
	* default is gaussian kernel
	* @tparam modifyCentroids Boolean variable, if set to true,
	* backpropagates the error to centroids.
	* @tparam Args Argument type of arguments to be
	* passed to clusterer.cluster() method.
	* @tparam InputDataType Type of the input data
	* @tparam OutputDataType Type of the output data
	*/
	template <
	typename ClusteringPolicy = kmeans::KMeans<>,
	typename KernelPolicy = kernel::GaussianKernel,
	typename InputDataType = arma::mat,
	typename OutputDataType = arma::mat,
	typename... Args
	>
	class RBFN
	{
	public:

	/**
	* Create the RBFN object using the specified centroids.
	*
	* @param outSize Number of output units.
	* @param kernel Kernel to be used for RBFN.
	* @param centroids Centroids used for clustering.
	*/
	Rbfn(const size_t inSize,
	const size_t outSize,
	bool modifyCentroids,
	Kernel& kernel,
	InputDataType& centroids);

	/**
	* Create the RBFN object using the specified clusteringPolicy.
	*
	* @param outSize Number of output units.
	* @param kernel Kernel to be used for RBFN.
	* @param clusterer ClusteringPolicy used for clustering.
	* @param input Inut data to do clustering on.
	* @param Args... args Arguments to be passed to
	* clusterer.cluster() method.
	*/
	Rbfn(const size_t inSize,
	const size_t outSize,
	bool modifyCentroids,
	Kernel& kernel,
	ClusteringPolicy& clusterer,
	InputDataType& input,
	Args... args
	);

	/**
	* Create the RBFN object using the specified clusteringPolicy.
	*
	* @param outSize Number of output units.
	* @param kernel Kernel to be used for RBFN.
	* @param clusterer ClusteringPolicy used for clustering.
	* @param input Inut data to do clustering on.
	* @param Args... args Arguments to be passed to
	* clusterer.cluster() method.
	*/
	Rbfn(const size_t inSize,
	const size_t outSize,
	bool modifyCentroids,
	Kernel& kernel,
	ClusteringPolicy& clusterer,
	// int noOfClusters,
	// I had initially thought of this but user can't know exact number for clustering algorithms like dbscan, and so we
	// need to make it generic.
	Args... args
	);

	// I have ignored the following functions and parameters
	// for simplcity of viewing.
	// Reset() Forward() Backward()
	// Parameters() Gradient()
	// weights weight bias gradient

	private:
	//! Locally-stored number of input units.
	//! Number of centroids obtained after clustering.
	size_t noOfClusters;

	//! Locally-stored number of output units.
	size_t outSize;
	}; // class rbfn

	} // namespace ann
	} // namespace mlpack


	// Suppose we have training data, we want to use it
	// with 50 centroids and 100 output units then,

	// To use rbfn as for classification :
	FFN<NegativeLogLikelihood<>, RandomInitialization> model;
	model.Add<rbfn<> >(100,
	GaussianKernel(),
	centroids);
	model.Add<LogSoftMax<> >();

	// To use RBFN for prediction:
	FFN<MeanSuaredError<>, RandomInitialization> model;
	model.Add<rbfn<> >(100,
	GaussianKernel(),
	centroids);

	// Third constructor allows this layer to be used
	// at any layer (think layer 2 onwards) in deep learning module.
	// The rbfn class has a boolean variable hasCentroids.
	// First and second constructor find the centroids
	// during the object initialization itself and
	// set hasCentroids to true.
	// The third constructor sets it to false.

	// When forward is called, third constructor calls the
	// clusteringPolicy with the input it has been passed
	// (i.e. the first (mini-)batch that arrives) and uses it
	// to set centroids.


	// So there are 2 possible difficulties in implementing rbfn as custom layer:
	// 1. When we are at deep layer (think layer 2 onwards, we don't know the centroids).
	// 2. How to store centroids in the parameter matrix ? (Because everythin in parameter matrix is
	// being initialized by network initialization rule.)

	// The solution I could think of is based on layer traits.
	// The current codebase can't exactly do this since it does not pass the data to initializer.
	// Proposed changes :
	// Firstly add a function to RBFN layer, call it bool first_pass_init().
	// Add a macro in layer traits that checks if a layer has this function.

	// The initialize method in network_init.hpp needs to be changed.
	// Instead of doing this directly:

	// Inside the for loop after setting size of parameters.
	const size_t weight = boost::apply_visitor(weightSizeVisitor,
	network[i]);
	arma::mat tmp = arma::mat(parameter.memptr() + offset,
	weight, 1, false, false);
	initializeRule.Initialize(tmp, tmp.n_elem, 1);

	// Call forward function() first.
	// This way rbfn will have the input data and can perform the necessary clustering to set the parameter size.
	// The initial loop that sets the size of master parameter matrix will change accordingly.

	// Benefit of this method is that if in future we try to implement LSUV initialzation (https://arxiv.org/pdf/1511.06422.pdf)
	// (or any other initialization that depends on the input data too),
	// these changes will make it easier to implement.
	//

	// Coming to the second part, storing centroids,
	// We deine a method similar to this:
	const size_t weight = boost::apply_visitor(weightSizeVisitor,
	network[i]);
	// say
	const size_t weightOffset = boost::apply_visitor(weightOffsetVisitor,
	network[i]);

	// and then we can have..
	arma::mat tmp = arma::mat(parameter.memptr() + offset + weightOffset,
	weight, 1, false, false);
	initializeRule.Initialize(tmp, tmp.n_elem, 1);





	// To call third constructor,
	model.Add<
	rbfn< KMeans,
	GaussianKernel,
	false,
	arma::mat,
	arma::mat,
	size_t,
	bool
	> >
	(100, GaussianKernel(), kmeans(),
	50, // specify no of centres
	50, // argument to cluster() method
	false
	);