ialhashim/DBSCAN.hpp

## c_clustering_library.hpp
// Interface for the The C clustering library

void clusterlibrary::cluster(std::vector< std::vector<float> > & data, int k, int iterations, std::vector<int> & clusterid)
{
	int nrows = data.size();
	int ncolumns = data.front().size();

	float** data_ptr = new float*[nrows];
	for(size_t i = 0; i < data.size(); i++) data_ptr[i] = &data[i][0];

	std::vector< std::vector<int> > mask(nrows, std::vector<int>(ncolumns, 1));
	int ** mask_ptr = new int*[nrows];
	for(size_t i = 0; i < mask.size(); i++) mask_ptr[i] = &mask[i][0];

	std::vector<float> data_weights(data.size(),1.0);
	float * weights = &data_weights[0];

	char distType = 'e';

	std::vector< std::vector<float> > cdata(k, std::vector<float>(ncolumns, 0));
	float ** cdata_ptr = new float*[ncolumns];
	for(size_t i = 0; i < cdata.size(); i++) cdata_ptr[i] = &cdata[i][0];

	std::vector< std::vector<int> > cmask(k, std::vector<int>(ncolumns, 1));
	int ** cmask_ptr = new int*[ncolumns];
	for(size_t i = 0; i < cmask.size(); i++) cmask_ptr[i] = &cmask[i][0];

	float err = 0;
	std::vector< int > tclusterid(nrows, 0);
	std::vector< int > counts(k, 0);
	std::vector< int > mapping(k, 0);

	clusterid.clear();
	clusterid.resize(nrows, 0);

	clusterlib::kmeans(k, nrows, ncolumns, data_ptr, mask_ptr, weights, 0, iterations,
		distType, cdata_ptr, cmask_ptr, &clusterid[0], &err, &tclusterid[0], &counts[0], &mapping[0] );
}

## DBSCAN.hpp
#pragma once

// Code adapted from https://github.com/propanoid/DBSCAN

#include <vector>
#include <algorithm>
#include <omp.h>

// Any basic vector/matrix library should also work
#include <Eigen/Core>

namespace clustering
{
	template<typename Vector, typename Matrix>
	class DBSCAN
	{
	public:
		typedef Vector FeaturesWeights;
		typedef Matrix ClusterData;
		typedef Matrix DistanceMatrix;
		typedef std::vector<unsigned int> Neighbors;
		typedef std::vector<int> Labels;

	private:
		double m_eps;
		size_t m_min_elems;
		double m_dmin;
		double m_dmax;

		Labels m_labels;

	public:

		// 'eps' is the search space for neighbors in the range [0,1], where 0.0 is exactly self and 1.0 is entire dataset
		DBSCAN(double eps, size_t min_elems)
			: m_eps( eps )
			, m_min_elems( min_elems )
			, m_dmin(0.0)
			, m_dmax(0.0)
		{
			reset();
		}

		// Call this to perform clustering, get results by calling 'get_labels()'
		void fit( const ClusterData & C )
		{
			const FeaturesWeights W = std_weights( C.cols() );
			wfit( C, W );
		}

		const Labels & get_labels() const
		{
			return m_labels;
		}

		void reset()
		{
			m_labels.clear();
		}

		void init(double eps, size_t min_elems)
		{
			m_eps = eps;
			m_min_elems = min_elems;
		}

		// Useful for testing
		static ClusterData gen_cluster_data( size_t features_num, size_t elements_num )
		{
			ClusterData cl_d( elements_num, features_num );
			for (size_t i = 0; i < elements_num; ++i)
				for (size_t j = 0; j < features_num; ++j)
					cl_d(i, j) = (-1.0 + rand() * (2.0) / RAND_MAX);
			return cl_d;
		}

		FeaturesWeights std_weights( size_t s )
		{
			// num cols
			FeaturesWeights ws( s );

			for (size_t i = 0; i < s; ++i)
				ws(i) = 1.0;

			return ws;
		}

		void fit_precomputed( const DistanceMatrix & D )
		{
			prepare_labels( D.rows() );
			dbscan( D );
		}

		void wfit( const ClusterData & C, const FeaturesWeights & W )
		{
			prepare_labels( C.rows() );
			const DistanceMatrix D = calc_dist_matrix( C, W );
			dbscan( D );
		}

	private:
		void prepare_labels( size_t s )
		{
			m_labels.resize(s, -1);
		}

		Neighbors find_neighbors(const DistanceMatrix & D, unsigned int pid)
		{
			Neighbors ne;

			for (unsigned int j = 0; j < D.rows(); ++j)
			{
				if 	( D(pid, j) <= m_eps )
				{
					ne.push_back(j);
				}
			}
			return ne;
		}

		const DistanceMatrix calc_dist_matrix( const ClusterData & C, const FeaturesWeights & W )
		{
			ClusterData cl_d = C;

#pragma omp parallel for
			for (int i = 0; i < (int)cl_d.cols(); ++i)
			{
				auto col = cl_d.col(i);

				const auto r = std::minmax_element( col.data(), col.data() + col.size() );

				double data_min = *r.first;
				double data_range = *r.second - *r.first;

				if (data_range == 0.0) { data_range = 1.0; }

				const double scale = 1/data_range;
				const double min = -1.0*data_min*scale;

				col *= scale;
				col += Vector::Constant(col.size(), min);

				cl_d.col(i) = col;
			}

			// rows x rows
			DistanceMatrix d_m( cl_d.rows(), cl_d.rows() );
			Vector d_max( cl_d.rows() );
			Vector d_min( cl_d.rows() );

			for (int i = 0; i < (int)cl_d.rows(); ++i)
			{
#pragma omp parallel for
				for (int j = i; j < (int)cl_d.rows(); ++j)
				{
					d_m(i, j) = 0.0;

					if (i != j)
					{
						Vector U = cl_d.row(i);
						Vector V = cl_d.row(j);

						Vector diff = ( U-V );

						for(int k = 0; k < (int)diff.size(); k++)
						{
							auto e = diff[k];
							d_m(i, j) += fabs(e)*W[k];
						}

						d_m(j, i) = d_m(i, j);
					}
				}

				const auto cur_row = d_m.row(i);
				const auto mm = std::minmax_element( cur_row.data(), cur_row.data() + cur_row.size() );

				d_max(i) = *mm.second;
				d_min(i) = *mm.first;
			}

			m_dmin = *(std::min_element( d_min.data(), d_min.data() + d_min.size() ));
			m_dmax = *(std::max_element( d_max.data(), d_max.data() + d_max.size() ));

			m_eps = (m_dmax - m_dmin) * m_eps + m_dmin;

			return d_m;
		}

		void dbscan( const DistanceMatrix & dm )
		{
			std::vector<unsigned int> visited( dm.rows() );

			unsigned int cluster_id = 0;

			for (unsigned int pid = 0; pid < dm.rows(); ++pid)
			{
				if ( !visited[pid] )
				{
					visited[pid] = 1;

					Neighbors ne = find_neighbors(dm, pid );

					if (ne.size() >= m_min_elems)
					{
						m_labels[pid] = cluster_id;

						for (unsigned int i = 0; i < ne.size(); ++i)
						{
							unsigned int nPid = ne[i];

							if ( !visited[nPid] )
							{
								visited[nPid] = 1;

								Neighbors ne1 = find_neighbors(dm, nPid);

								if ( ne1.size() >= m_min_elems )
								{
									for (const auto & n1 : ne1)
									{
										ne.push_back(n1);
									}
								}
							}

							if ( m_labels[nPid] == -1 )
							{
								m_labels[nPid] = cluster_id;
							}
						}

						++cluster_id;
					}
				}
			}
		}
	};
}

## generalized-kmeans.hpp
// Incomplete..

//Generalized k-means
//Copyright (C) 2013 Balazs Szalkai
//If you use this program in your research, please cite the following article:
//B. Szalkai: An implementation of the relational k-means algorithm. ArXiv e-prints, 2013.

namespace GeneralizedKMeans
{
	typedef Eigen::MatrixXf DistanceMatrix;

	struct Clusters
	{
		DistanceMatrix Matrix;
		int Count;
		int[] ObjectToCluster;
		List<int>[] ClusterToObjects;
		CentroidDistance cd;

		Clusters(DistanceMatrix & matrix, int count)
		{
			Matrix = matrix;
			Count = count;
			ObjectToCluster = new int[Matrix.Count];
			ClusterToObjects = new List<int>[Count];
			for (int i = 0; i < Count; i++) ClusterToObjects[i] = new List<int>();
			Randomize();
		}

		static Random Rand = new Random();

		void Randomize()
		{
			for (int i = 0; i < Count; i++) ClusterToObjects[i].Clear();

			for (int i = 0; i < Matrix.Count; i++)
			{
				int cluster = Rand.Next(Count);
				ObjectToCluster[i] = cluster;
				ClusterToObjects[cluster].Add(i);
			}
		}

		void Make_ClusterToObjects()
		{
			for (int cluster = 0; cluster < Count; cluster++)
				ClusterToObjects[cluster].Clear();

			for (int i = 0; i < Matrix.Count; i++)
				ClusterToObjects[ObjectToCluster[i]].Add(i);
		}

		double[] AdditiveConstant;

		// Call this before using the other members functions
		void Initialize()
		{
			// calculate additive constants for clusters
			AdditiveConstant = new double[clusters.Count];
			for (int cluster = 0; cluster < clusters.Count; cluster++)
			{
				var objects = clusters.ClusterToObjects[cluster];
				double x = 0;
				foreach (var i in objects)
					foreach (var j in objects)
					x += clusters.Matrix.SqrDistance[i, j];
				AdditiveConstant[cluster] = -x / (2 * Utils.Sqr((double)objects.Count));
			}
		}
		// Calculates a squared centroid distance
		double CalculateSqrDist(int obj, int cluster)
		{
			var objects = clusters.ClusterToObjects[cluster];
			double x = 0;
			foreach (int j in objects) x += clusters.Matrix.SqrDistance[obj, j];
			double dist = AdditiveConstant[cluster] + x / objects.Count;
			return dist;
		}
		int GetNearestCluster(int obj)
		{
			int nearestCluster = -1;
			double minSqrDist = 0;
			for (int cluster = 0; cluster < clusters.Count; cluster++)
			{
				double sqrDist = CalculateSqrDist(obj, cluster);
				if (nearestCluster < 0 || sqrDist < minSqrDist)
				{
					minSqrDist = sqrDist;
					nearestCluster = cluster;
				}
			}
			return nearestCluster;
		}
		double GetClusteringValue()
		{
			double result = 0;
			for (int i = 0; i < clusters.Matrix.Count; i++)
				result += CalculateSqrDist(i, clusters.ObjectToCluster[i]);
			return result;
		}

		// Returns how many objects have changed cluster
		int Iterate()
		{
			int changed = 0;
			cd.Initialize();

			// find the nearest centroid for the objects
			int[] New_ObjectToCluster = new int[Matrix.Count];
			for (int i = 0; i < Matrix.Count; i++)
			{
				New_ObjectToCluster[i] = cd.GetNearestCluster(i);
				if (New_ObjectToCluster[i] != ObjectToCluster[i]) changed++;
			}

			// update the configuration
			double oldValue = GetValue();
			int[] Old_ObjectToCluster = ObjectToCluster;
			ObjectToCluster = New_ObjectToCluster;
			Make_ClusterToObjects();
			double newValue = GetValue();

			// clustering got worse (this is possible with some distance matrices) or stayed the same?
			if (oldValue <= newValue)
			{
				// undo iteration
				ObjectToCluster = Old_ObjectToCluster;
				Make_ClusterToObjects();
				return 0;
			}
			return changed;
		}

		double GetValue()
		{
			cd.Initialize();
			return cd.GetClusteringValue();
		}
	}

	void Run( DistanceMatrix & m, int nClusters )
	{
		Clusters bestClusters (m, nClusters);

		int badLuckStreak = 0;
		int blockId = 0;
		int maxBadLuckStreak = 100;

		while (true)
		{
			for (int i = 0; i < nThreads; i++)
			{
				Clusters c = new Clusters(m, nClusters);
				while (true)
				{
					if (c.Iterate() == 0) break;
				}

				Clusters c = clustersArray[i];
				if (c.GetValue() < bestClusters.GetValue())
				{
					bestClusters = c;
					badLuckStreak = 0;
				}
				else
					badLuckStreak++;
			}

			if (badLuckStreak >= maxBadLuckStreak) break;
		}
	}
}

## kmedoids.hpp
// Works well

//////////////////////////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2010, Lawrence Livermore National Security, LLC.
// Produced at the Lawrence Livermore National Laboratory
//////////////////////////////////////////////////////////////////////////////////////////////////
/// @author Todd Gamblin tgamblin@llnl.gov

#include <vector>
#include <sstream>

#include <algorithm>
#include <numeric>
#include <cassert>
#include <cstdlib>
using namespace std;

//#include "random.h"
//#include "bic.h"
//#include "matrix_utils.h"

namespace clustering {
	typedef float Scalar;
	typedef Eigen::Matrix<Scalar,-1,-1> dissimilarity_matrix;

	typedef size_t medoid_id;       ///< More descriptive type for medoid index
	typedef size_t object_id;       ///< More descriptive type for object index

	///
	/// Explicit representation of a clustering.  Instead of a vecto of representative
	/// ids, this has <i>k</i> sets of object_ids indicating which objects are in a
	/// particular cluster.  You can convert a partition to a cluster_list with
	/// to_cluster_list().
	///
	typedef std::vector< std::set<object_id> > cluster_list;

	struct kmedoids{

		/// Gives the index of the object that is the ith medoid.
		/// medoids[i] == index in object array for last call to findClusters()
		std::vector<object_id> medoid_ids;

		/// Gives cluster id (index in medoids) for the ith object.
		/// clusterid[i]          == id of cluster of which object i is a member.
		/// medoids[clusterid[i]] == representative of that cluster.
		std::vector<medoid_id> cluster_ids;

		std::vector<medoid_id> sec_nearest;      /// Index of second closest medoids.  Used by PAM.
		double total_dissimilarity;              /// Total dissimilarity bt/w objects and their medoid
		bool sort_medoids;                       /// Whether medoids should be canonically sorted by object id.
		double epsilon;                          /// Normalized sensitivity for convergence
		size_t init_size;                        /// initial sample size (before 2*k)
		size_t max_reps;                         /// initial sample size (before 2*k)

		typedef std::mt19937 random_type;                /// Type for RNG used in this algorithm

		kmedoids(size_t num_objects)
			: cluster_ids(num_objects, 0),
			total_dissimilarity(std::numeric_limits<double>::infinity()),
			sort_medoids(false),
			epsilon(1e-15),
			init_size(40),
			max_reps(5)
		{ if (num_objects) medoid_ids.resize(1); }

		double average_dissimilarity() const {
			return total_dissimilarity / cluster_ids.size();
		}

		void set_sort_medoids(bool sort) {
			sort_medoids = sort;
		}

		void set_epsilon(double e) {
			epsilon = e;
		}

		void init_medoids(size_t k, const dissimilarity_matrix& distance) {
			medoid_ids.clear();
			// find first object: object minimum dissimilarity to others
			object_id first_medoid = 0;
			double min_dissim = std::numeric_limits<Scalar>::max();
			for (size_t i=0; i < distance.rows(); i++) {
				double total = 0.0;
				for (size_t j=0; j < distance.cols(); j++) {
					total += distance(i,j);
				}
				if (total < min_dissim) {
					min_dissim   = total;
					first_medoid = i;
				}
			}

			// add first object to medoids and compute medoid ids.
			medoid_ids.push_back(first_medoid);
			assign_objects_to_clusters(distance);

			// now select next k-1 objects according to KR's BUILD algorithm
			for (size_t cur_k = 1; cur_k < k; cur_k++) {
				object_id best_obj = 0;
				double max_gain = 0.0;
				for (size_t i=0; i < distance.rows(); i++) {
					if (is_medoid(i)) continue;

					double gain = 0.0;
					#pragma omp parallel for reduction(+:gain)
					for (int j=0; j < (int)distance.rows(); j++) {
						double Dj = distance(j, medoid_ids[cluster_ids[j]]);  // distance from j to its medoid
						gain += max(Dj - distance(i,j), 0.0);                 // gain from selecting i
					}

					if (gain >= max_gain) {   // set the next medoid to the object that
						max_gain = gain;        // maximizes the gain function.
						best_obj = i;
					}
				}

				medoid_ids.push_back(best_obj);
				assign_objects_to_clusters(distance);
			}
		}

		double cost(medoid_id i, object_id h, const dissimilarity_matrix& distance) const {
			double total = 0;
			for (object_id j = 0; j < cluster_ids.size(); j++) {
				object_id mi  = medoid_ids[i];                // object id of medoid i
				double    dhj = distance(h, j);               // distance between object h and object j

				object_id mj1 = medoid_ids[cluster_ids[j]];   // object id of j's nearest medoid
				double    dj1 = distance(mj1, j);             // distance to j's nearest medoid

				// check if distance bt/w medoid i and j is same as j's current nearest medoid.
				if (distance(mi, j) == dj1) {
					double dj2 = std::numeric_limits<Scalar>::max();
					if (medoid_ids.size() > 1) {   // look at 2nd nearest if there's more than one medoid.
						object_id mj2 = medoid_ids[sec_nearest[j]];  // object id of j's 2nd-nearest medoid
						dj2 = distance(mj2, j);                      // distance to j's 2nd-nearest medoid
					}
					total += min(dj2, dhj) - dj1;

				} else if (dhj < dj1) {
					total += dhj - dj1;
				}
			}
			return total;
		}

		/// True if and only if object i is a medoid.
		bool is_medoid(object_id oi) const {
			return medoid_ids[cluster_ids[oi]] == oi;
		}

		void pam(const dissimilarity_matrix& distance, size_t k, const object_id *initial_medoids) {
			if (k > distance.rows()) {
				throw std::logic_error("Attempt to run PAM with more clusters than data.");
			}

			if (distance.rows() != distance.cols()) {
				throw std::logic_error("Error: distance matrix is not square!");
			}

			// first get this the right size.
			cluster_ids.resize(distance.rows());

			// size cluster_ids appropriately and randomly pick initial medoids
			if (initial_medoids) {
				medoid_ids.clear();
				copy(initial_medoids, initial_medoids + k, back_inserter(medoid_ids));
			} else {
				init_medoids(k, distance);
			}

			// set tolerance equal to epsilon times mean magnitude of distances.
			// Note that distances *should* all be non-negative.
			double tolerance = epsilon * distance.sum() / (distance.rows() * distance.cols());

			while (true) {
				// initial cluster setup
				total_dissimilarity = assign_objects_to_clusters(distance);

				//vars to keep track of minimum
				double minTotalCost = std::numeric_limits<Scalar>::max();
				medoid_id minMedoid = 0;
				object_id minObject = 0;

				//iterate over each medoid
				for (medoid_id i=0; i < k; i++) {
					//iterate over all non-medoid objects
					for (object_id h = 0; h < cluster_ids.size(); h++) {
						if (is_medoid(h)) continue;

						//see if the total cost of swapping i & h was less than min
						double curCost = cost(i, h, distance);
						if (curCost < minTotalCost) {
							minTotalCost = curCost;
							minMedoid = i;
							minObject = h;
						}
					}
				}

				// bail if we can't gain anything more (we've converged)
				if (minTotalCost >= -tolerance) break;

				// install the new medoid if we found a beneficial swap
				medoid_ids[minMedoid] = minObject;
				cluster_ids[minObject] = minMedoid;
			}

			//if (sort_medoids) sort();
		}

		/// Assign each object to the cluster with the closest medoid.
		///
		/// @return Total dissimilarity of objects w/their medoids.
		///
		/// @param distance a callable object that computes distances between indices, as a distance
		///                 matrix would.  Algorithms are free to use real distance matrices (as in PAM)
		///                 or to compute lazily (as in CLARA medoid assignment).
		double assign_objects_to_clusters(const dissimilarity_matrix& distance) {
			if (sec_nearest.size() != cluster_ids.size()) {
				sec_nearest.resize(cluster_ids.size());
			}

			// go through and assign each object to nearest medoid, keeping track of total dissimilarity.
			double total_dissimilarity = 0;

			#pragma omp parallel for reduction(+:total_dissimilarity)
			for (int i=0; i < (int)cluster_ids.size(); i++) {
				double    d1, d2;  // smallest, second smallest distance to medoid, respectively
				medoid_id m1, m2;  // index of medoids with distances d1, d2 from object i, respectively

				d1 = d2 = std::numeric_limits<Scalar>::max();
				m1 = m2 = medoid_ids.size();
				for (medoid_id m=0; m < medoid_ids.size(); m++) {
					double d = distance(i, medoid_ids[m]);
					if (d < d1 || medoid_ids[m] == i) {  // prefer the medoid in case of ties.
						d2 = d1;  m2 = m1;
						d1 = d;   m1 = m;
					} else if (d < d2) {
						d2 = d;   m2 = m;
					}
				}

				cluster_ids[i] = m1;
				sec_nearest[i] = m2;
				total_dissimilarity += d1;
			}

			return total_dissimilarity;
		}
	};

} // namespace cluster

## Meanshift.hpp
#pragma once

/* Meanshift non-parametric mode estimator.
 *
 * Code adapted from : Sebastian Nowozin <nowozin@gmail.com>
 */

#define _USE_MATH_DEFINES
#include <math.h>

#include <vector>
#include <map>
#include <iostream>

//#include <gmm/gmm.h>
#include <Eigen/Core>

namespace gmm{
    template<typename V>
    void clear(V & v){
        for(size_t i = 0; i < v.size(); i++)
            v[i] = 0;
    }

    template<typename V, typename W>
    void copy( const V & v, W & w ){
        for(size_t i = 0; i < v.size(); i++)
            w[i] = v[i];
    }

    template<typename V, typename W>
    void add( const V & v, W & w ){
        for(size_t i = 0; i < w.size(); i++)
            w[i] += v[i];
    }

    template<typename V>
    V scaled(const V & v, double s){
        V a = v;
        for(size_t i = 0; i < a.size(); i++)
            a[i] *= s;
        return a;
    }

    template<typename V>
    void scale( V & v, double s ){
        for(size_t i = 0; i < v.size(); i++)
            v[i] *= s;
    }

    template<typename V, typename W>
    double vect_dist2(V & v, W & w){
        Eigen::VectorXd a = Eigen::Map<Eigen::VectorXd>(&v[0], v.size());
        Eigen::VectorXd b = Eigen::Map<Eigen::VectorXd>(&w[0], w.size());
        return (a-b).lpNorm<2>();
    }

    template<typename V>
    double vect_norm2(V & v){
        return Eigen::Map<Eigen::VectorXd>(&v[0], v.size()).norm();
    }
}

namespace clustering {

/* We use the notation of the description of Mean Shift in
 *   [Comaniciu2000], Dorin Comaniciu, Peter Meer,
 *   "Mean Shift: A Robust Approach toward Feature Space Analysis"
 *
 * The implementation is naive and does not exploit fast nearest neighbor
 * lookups or triangle inequalities to speed up the mean shift procedure.
 * Therefore, it is only suitable for low-dimensional input spaces (say, <=
 * 10) with relatively few samples (say, < 1e6).
 */
class Meanshift {
public:
    /* kernel_type selects the kernel profile:
     *   0 for the Epanechnikov kernel profile
     *     k_E(x) = (1 - x) if (0 <= x <= 1), 0 otherwise.
     *   1 for the truncated multivariate normal kernel profile
     *     k_N(x) = exp(-0.5 * x)
     * kernel_bandwidth: The positive bandwidth parameter.
     * mode_tolerance: mode matching tolerance.  Modes which have a L2
     *   distance closer than this value will be treated as being the same.
     */
    Meanshift(int kernel_type, double kernel_bandwidth,
        int dim, double mode_tolerance);

    /* Find modes of an empirical distribution X.
     *
     * X: N vectors of size M, representing N samples from the distribution.
     * modes: Output, will be allocated properly.  Return all modes found.
     * indexmap: N-vector of absolute mode indices.  Each sample point is
     *   assigned to one mode.  The vector must already be properly sized.
     * procedurecount: The mean shift procedure will be run at least this many
     *   times, sampling randomly from X as initialization.  If zero, all
     *   samples from X are used as initializations.
     */
    void FindModes(const std::vector<std::vector<double> >& X,
        std::vector<std::vector<double> >& modes,
        std::vector<int>& indexmap,
        unsigned int procedure_count = 0) const;

    /* Mean Shift Procedure, starting from mode, perform mean shift on the
     * distribution empirically sampled in X.
     *
     * X: N vectors of size M, representing N samples from the distribution.
     * mode: Starting point (for example a point from X).  The result will be
     *    given in mode.
     * visited: N-vector of indicator variables.  If the mean shift procedure
     *    passes through a point in X, the corresponding index in visited will
     *    be set to one.
     *
     * Return the number of iterations used.
     */
    int MeanshiftProcedure(const std::vector<std::vector<double> >& X,
        std::vector<double>& mode, std::vector<unsigned int>& visited) const;

private:
    int dim;	// input space dimension
    int kernel_type;	// 0: Epanechnikov, 1: truncated multivariate normal
    double kernel_c;	// constant normalization factor
    double kernel_bandwidth;
    double mode_tolerance;
};


Meanshift::Meanshift(int kernel_type, double kernel_bandwidth,
    int dim, double mode_tolerance)
    : dim(dim), kernel_type(kernel_type),
        kernel_bandwidth(kernel_bandwidth),
        mode_tolerance(mode_tolerance) {
    assert(kernel_type >= 0 && kernel_type <= 1);
    assert(kernel_bandwidth > 0.0);
    assert(dim > 0);

    // Compute normalization constant
    if (kernel_type == 0) {
        kernel_c = 0.5 * (static_cast<double>(dim) + 2.0) / 1.234;
        // TODO: replace 1.234 with the volume of the d-dimensional unit
        // sphere FIXME
    } else if (kernel_type == 1) {
        kernel_c = pow(2.0 * M_PI, -static_cast<double>(dim)/2.0);
    }
}

void Meanshift::FindModes(const std::vector<std::vector<double> >& X,
    std::vector<std::vector<double> >& modes,
    std::vector<int>& indexmap,
    unsigned int procedure_count) const
{
    //assert(indexmap.size() == X.size());
	indexmap.clear();
	indexmap.resize(X.size(), 0);

    modes.clear();

    if (procedure_count != 0 && procedure_count >= X.size())
        procedure_count = 0;

    // Perform dense mode finding: for each sample in X, perform the mean
    // shift procedure
    std::vector<double> mode(X[0].size());
    std::vector<unsigned int> visited(X.size());
    // [mode_index][sample_index] = number of times sample_index was visited
    //   when arriving at the mode with the mode_index.
    std::vector<std::map<unsigned int, unsigned int> > visit_counts;
    for (unsigned int si = 0; (procedure_count == 0 && si < X.size())
        || (procedure_count != 0 && si < procedure_count); ++si) {
        if (procedure_count == 0) {
            gmm::copy(X[si], mode);
        } else {
            // Pick a random one from X
            unsigned sample_index = rand() % X.size();
            gmm::copy(X[sample_index], mode);
        }
        std::cout << "Sample " << si << " of "
            << (procedure_count == 0 ? X.size() : procedure_count)
            << std::endl;
        gmm::clear(visited);
        MeanshiftProcedure(X, mode, visited);

        // Identify whether this is a novel mode or a known one
        bool found = false;
        unsigned int M_cur = 0;
        for (std::vector<std::vector<double> >::iterator Mi =
            modes.begin(); found == false && Mi != modes.end(); ++Mi) {
            if (gmm::vect_dist2(*Mi, mode) < mode_tolerance) {
                M_cur = Mi - modes.begin();
                found = true;
            }
        }
        if (found == false) {
            // Add novel mode
            modes.push_back(mode);
            // TODO: remove this debug output
            std::cout << modes.size() << " mode"
                << (modes.size() >= 2 ? "s" : "") << std::endl;

            // Add a new mapping of which samples have been visited while
            // approaching the novel mode.
            std::map<unsigned int, unsigned int> mode_visits;
            for (std::vector<unsigned int>::const_iterator vi =
                visited.begin(); vi != visited.end(); ++vi) {
                if (*vi != 0)
                    mode_visits[vi - visited.begin()] = 1;
            }
            visit_counts.push_back(mode_visits);
        } else {
            // The mode has been known, but we maybe crossed old and new
            // samples.  Update the counts.
            for (std::vector<unsigned int>::const_iterator vi =
                visited.begin(); vi != visited.end(); ++vi) {
                if (*vi != 0)
                    visit_counts[M_cur][vi - visited.begin()] += 1;
            }
        }
    }
#ifdef DEBUG
    std::cout << "Found " << modes.size() << " modes." << std::endl;
#endif

    // Perform index mapping: each sample gets assigned to one mode index.
    unsigned int unmapped_count = 0;
    for (unsigned int sample_index = 0;
        sample_index < X.size(); ++sample_index) {
        // Find mode index with highest count
        unsigned int maximum_count = 0;
        int maximum_mode_index = -1;
        for (unsigned int mode_index = 0;
            mode_index < modes.size(); ++mode_index) {
            if (visit_counts[mode_index].count(sample_index) == 0)
                continue;

            unsigned int count_cur = visit_counts[mode_index][sample_index];
            std::cout << "  mode " << mode_index << " visited "
                << "sample " << sample_index << " for "
                << count_cur << " times." << std::endl;
            if (count_cur > maximum_count) {
                maximum_mode_index = mode_index;
                maximum_count = count_cur;
            }
        }
        if (maximum_mode_index == -1)
            unmapped_count += 1;
        indexmap[sample_index] = maximum_mode_index;
    }
    std::cout << unmapped_count << " unmapped samples." << std::endl;
}

int Meanshift::MeanshiftProcedure(const std::vector<std::vector<double> >& X,
    std::vector<double>& mode, std::vector<unsigned int>& visited) const {
    assert(X.size() > 0);
    assert(visited.size() == X.size());
    assert(X[0].size() == mode.size());
    std::vector<double> meanshift(mode.size(),0);
    std::vector<double> meanshift_cur(mode.size(),0);
    int iter = 0;
    bool converged = false;
    do {
        // Compute the mean shift vector
        gmm::clear(meanshift);
        double denominator = 0.0;
        for (std::vector<std::vector<double> >::const_iterator Xi = X.begin();
            Xi != X.end(); ++Xi) {
            gmm::copy(mode, meanshift_cur);
            gmm::add(gmm::scaled(*Xi, -1.0), meanshift_cur);
            gmm::scale(meanshift_cur, 1.0 / kernel_bandwidth);
            double weight_cur = gmm::vect_norm2(meanshift_cur);
            if (kernel_type == 0) {
                // Epanechnikov kernel is the shadow of the uniform kernel
                if (weight_cur <= 1.0)
                    weight_cur = 1.0;
                else
                    weight_cur = 0.0;
            } else if (kernel_type == 1) {
                // Multivariate normal kernel is the shadow of itself
                weight_cur = kernel_c * exp(-0.5 * weight_cur);

                // Truncate
                if (weight_cur < 1e-2)
                    weight_cur = 0.0;
            }
            if (weight_cur >= 1e-6)
                visited[Xi - X.begin()] = 1;

            gmm::add(gmm::scaled(*Xi, weight_cur), meanshift);
            denominator += weight_cur;
        }

        gmm::scale(meanshift, 1.0 / denominator);
        double distance_moved = gmm::vect_dist2(meanshift, mode);
        gmm::copy(meanshift, mode);

#ifdef DEBUG
        std::cout << "iter " << iter << ", moved "
            << distance_moved << std::endl;
#endif
        iter += 1;
        if (distance_moved <= 1e-8)
            converged = true;
    } while (converged == false);

    return (iter);
}

}

## xmeans.hpp
/*=========================================================================
*
*  Copyright David Doria 2012 daviddoria@gmail.com
*
*  Licensed under the Apache License, Version 2.0 (the "License");
*  you may not use this file except in compliance with the License.
*  You may obtain a copy of the License at
*
*         http://www.apache.org/licenses/LICENSE-2.0.txt
*
*  Unless required by applicable law or agreed to in writing, software
*  distributed under the License is distributed on an "AS IS" BASIS,
*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*  See the License for the specific language governing permissions and
*  limitations under the License.
*
*=========================================================================*/

/** XMeans clustering is a method that creates K clusters of points from
* an unorganized set of input points. It is an extension of KMeans clustering
* that attempts to determine K during the algorithm.
*/

#ifndef XMeansClustering_h
#define XMeansClustering_h

// STL
#include <vector>
#include <iostream>
#include <limits>
#include <numeric>
#include <set>
#include <stdexcept>

// Eigen
#include <Eigen/Dense>

namespace EigenHelpers{
	template <typename TVector>
	TVector RandomUnitVector(const unsigned int dim)
	{
		TVector randomUnitVector(dim);
		for(int i = 0; i < randomUnitVector.size(); ++i)
		{
			randomUnitVector[i] = (double(rand()) / RAND_MAX);
		}

		randomUnitVector.normalize();

		return randomUnitVector;
	}

	template <typename TPoint>
	void GetBoundingBox(const std::vector<TPoint, Eigen::aligned_allocator<TPoint> >& data, TPoint& minCorner, TPoint& maxCorner)
	{
		assert(data.size() > 0);

		minCorner.resize(data[0].size());
		maxCorner.resize(data[0].size());

		for(int coordinate = 0; coordinate < data[0].size(); ++coordinate)
		{
			minCorner[coordinate] = std::numeric_limits<typename TPoint::Scalar>::max();
			maxCorner[coordinate] = std::numeric_limits<typename TPoint::Scalar>::min();

			for(unsigned int pointId = 0; pointId < data.size(); ++pointId)
			{
				if(data[pointId][coordinate] > maxCorner[coordinate])
				{
					maxCorner[coordinate] = data[pointId][coordinate];
				}

				if(data[pointId][coordinate] < minCorner[coordinate])
				{
					minCorner[coordinate] = data[pointId][coordinate];
				}
			}
		}
	}
}

class XMeansClustering
{
public:
	typedef Eigen::VectorXf PointType;
	typedef std::vector<PointType, Eigen::aligned_allocator<PointType> > VectorOfPoints;

	/** Constructor. */
	XMeansClustering();

	/** Set the maximum number of clusters to find. */
	void SetMaxK(const unsigned int maxk);

	/** Get the maximum number of clusters to find. */
	unsigned int GetMaxK();

	/** Get the ids of the points that belong to class 'label'. */
	std::vector<unsigned int> GetIndicesWithLabel(const unsigned int label);

	/** Get the coordinates of the points that belong to class 'label'. */
	VectorOfPoints GetPointsWithLabel(const unsigned int label);

	/** Set the points to cluster. */
	void SetPoints(const VectorOfPoints& points);

	/** Get the resulting cluster id for each point. */
	std::vector<unsigned int> GetLabels();

	/** Actually perform the clustering. */
	void Cluster();

	/** Write the cluster centers to the standard output. */
	void OutputClusterCenters();

private:

	/** Split every cluster into two clusters if that helps the description of the data. */
	void SplitClusters();

	/** The label (cluster ID) of each point. */
	std::vector<unsigned int> Labels;

	/** The maximum number of clusters to find */
	unsigned int MaxK;

	/** The points to cluster. */
	VectorOfPoints Points;

	/** The current cluster centers. */
	VectorOfPoints ClusterCenters;
};


XMeansClustering::XMeansClustering() : MaxK(3)
{

}

void XMeansClustering::Cluster()
{
	if(this->Points.size() < this->MaxK)
	{
		std::stringstream ss;
		ss << "The number of points (" << this->Points.size()
			<< " must be larger than the maximum number of clusters (" << this->MaxK << ")";
		throw std::runtime_error(ss.str());
	}

	// We must store the labels at the previous iteration to determine whether any labels changed at each iteration.
	std::vector<unsigned int> oldLabels(this->Points.size(), 0); // initialize to all zeros

	// Initialize the labels array
	this->Labels.resize(this->Points.size());

	do
	{
		// Save the old labels
		oldLabels = this->Labels;
	}while(this->ClusterCenters.size() < this->MaxK);
}

void XMeansClustering::SplitClusters()
{
	assert(this->Points.size() > 0);

	VectorOfPoints newClusterCenters;

	for(unsigned int clusterId = 0; clusterId < this->ClusterCenters.size(); ++clusterId)
	{
		// Generate a random direction
		PointType randomUnitVector = EigenHelpers::RandomUnitVector<PointType>(this->Points[0].size());

		// Get the bounding box of the points that belong to this cluster
		PointType minCorner;
		PointType maxCorner;
		EigenHelpers::GetBoundingBox(this->Points, minCorner, maxCorner);

		// Scale the unit vector by the size of the region
		PointType splitVector = randomUnitVector * (maxCorner - minCorner) / 2.0f;
		PointType childCenter1 = this->ClusterCenters[clusterId] + splitVector;
		PointType childCenter2 = this->ClusterCenters[clusterId] + splitVector;

		// Compute the BIC of the original model
		float BIC_parent;

		// Compute the BIC of the new (split) model
		float BIC_children;

		// If the split was useful, keep it
		if(BIC_children < BIC_parent)
		{
			newClusterCenters.push_back(childCenter1);
			newClusterCenters.push_back(childCenter2);
		}
		else
		{
			newClusterCenters.push_back(this->ClusterCenters[clusterId]);
		}
	}

	this->ClusterCenters = newClusterCenters;
}

std::vector<unsigned int> XMeansClustering::GetIndicesWithLabel(unsigned int label)
{
	std::vector<unsigned int> pointsWithLabel;
	for(unsigned int i = 0; i < this->Labels.size(); i++)
	{
		if(this->Labels[i] == label)
		{
			pointsWithLabel.push_back(i);
		}
	}

	return pointsWithLabel;
}

XMeansClustering::VectorOfPoints XMeansClustering::GetPointsWithLabel(const unsigned int label)
{
	VectorOfPoints points;

	std::vector<unsigned int> indicesWithLabel = GetIndicesWithLabel(label);

	for(unsigned int i = 0; i < indicesWithLabel.size(); i++)
	{
		points.push_back(this->Points[indicesWithLabel[i]]);
	}

	return points;
}


void XMeansClustering::SetMaxK(const unsigned int maxk)
{
	this->MaxK = maxk;
}

unsigned int XMeansClustering::GetMaxK()
{
	return this->MaxK;
}

void XMeansClustering::SetPoints(const VectorOfPoints& points)
{
	this->Points = points;
}

std::vector<unsigned int> XMeansClustering::GetLabels()
{
	return this->Labels;
}

void XMeansClustering::OutputClusterCenters()
{
	std::cout << std::endl << "Cluster centers: " << std::endl;

	for(unsigned int i = 0; i < ClusterCenters.size(); ++i)
	{
		std::cout << ClusterCenters[i] << " ";
	}
	std::cout << std::endl;
}

#endif
	// Interface for the The C clustering library

	void clusterlibrary::cluster(std::vector< std::vector<float> > & data, int k, int iterations, std::vector<int> & clusterid)
	{
	int nrows = data.size();
	int ncolumns = data.front().size();

	float** data_ptr = new float*[nrows];
	for(size_t i = 0; i < data.size(); i++) data_ptr[i] = &data[i][0];

	std::vector< std::vector<int> > mask(nrows, std::vector<int>(ncolumns, 1));
	int ** mask_ptr = new int*[nrows];
	for(size_t i = 0; i < mask.size(); i++) mask_ptr[i] = &mask[i][0];

	std::vector<float> data_weights(data.size(),1.0);
	float * weights = &data_weights[0];

	char distType = 'e';

	std::vector< std::vector<float> > cdata(k, std::vector<float>(ncolumns, 0));
	float ** cdata_ptr = new float*[ncolumns];
	for(size_t i = 0; i < cdata.size(); i++) cdata_ptr[i] = &cdata[i][0];

	std::vector< std::vector<int> > cmask(k, std::vector<int>(ncolumns, 1));
	int ** cmask_ptr = new int*[ncolumns];
	for(size_t i = 0; i < cmask.size(); i++) cmask_ptr[i] = &cmask[i][0];

	float err = 0;
	std::vector< int > tclusterid(nrows, 0);
	std::vector< int > counts(k, 0);
	std::vector< int > mapping(k, 0);

	clusterid.clear();
	clusterid.resize(nrows, 0);

	clusterlib::kmeans(k, nrows, ncolumns, data_ptr, mask_ptr, weights, 0, iterations,
	distType, cdata_ptr, cmask_ptr, &clusterid[0], &err, &tclusterid[0], &counts[0], &mapping[0] );
	}
	#pragma once

	// Code adapted from https://github.com/propanoid/DBSCAN

	#include <vector>
	#include <algorithm>
	#include <omp.h>

	// Any basic vector/matrix library should also work
	#include <Eigen/Core>

	namespace clustering
	{
	template<typename Vector, typename Matrix>
	class DBSCAN
	{
	public:
	typedef Vector FeaturesWeights;
	typedef Matrix ClusterData;
	typedef Matrix DistanceMatrix;
	typedef std::vector<unsigned int> Neighbors;
	typedef std::vector<int> Labels;

	private:
	double m_eps;
	size_t m_min_elems;
	double m_dmin;
	double m_dmax;

	Labels m_labels;

	public:

	// 'eps' is the search space for neighbors in the range [0,1], where 0.0 is exactly self and 1.0 is entire dataset
	DBSCAN(double eps, size_t min_elems)
	: m_eps( eps )
	, m_min_elems( min_elems )
	, m_dmin(0.0)
	, m_dmax(0.0)
	{
	reset();
	}

	// Call this to perform clustering, get results by calling 'get_labels()'
	void fit( const ClusterData & C )
	{
	const FeaturesWeights W = std_weights( C.cols() );
	wfit( C, W );
	}

	const Labels & get_labels() const
	{
	return m_labels;
	}

	void reset()
	{
	m_labels.clear();
	}

	void init(double eps, size_t min_elems)
	{
	m_eps = eps;
	m_min_elems = min_elems;
	}

	// Useful for testing
	static ClusterData gen_cluster_data( size_t features_num, size_t elements_num )
	{
	ClusterData cl_d( elements_num, features_num );
	for (size_t i = 0; i < elements_num; ++i)
	for (size_t j = 0; j < features_num; ++j)
	cl_d(i, j) = (-1.0 + rand() * (2.0) / RAND_MAX);
	return cl_d;
	}

	FeaturesWeights std_weights( size_t s )
	{
	// num cols
	FeaturesWeights ws( s );

	for (size_t i = 0; i < s; ++i)
	ws(i) = 1.0;

	return ws;
	}

	void fit_precomputed( const DistanceMatrix & D )
	{
	prepare_labels( D.rows() );
	dbscan( D );
	}

	void wfit( const ClusterData & C, const FeaturesWeights & W )
	{
	prepare_labels( C.rows() );
	const DistanceMatrix D = calc_dist_matrix( C, W );
	dbscan( D );
	}

	private:
	void prepare_labels( size_t s )
	{
	m_labels.resize(s, -1);
	}

	Neighbors find_neighbors(const DistanceMatrix & D, unsigned int pid)
	{
	Neighbors ne;

	for (unsigned int j = 0; j < D.rows(); ++j)
	{
	if ( D(pid, j) <= m_eps )
	{
	ne.push_back(j);
	}
	}
	return ne;
	}

	const DistanceMatrix calc_dist_matrix( const ClusterData & C, const FeaturesWeights & W )
	{
	ClusterData cl_d = C;

	#pragma omp parallel for
	for (int i = 0; i < (int)cl_d.cols(); ++i)
	{
	auto col = cl_d.col(i);

	const auto r = std::minmax_element( col.data(), col.data() + col.size() );

	double data_min = *r.first;
	double data_range = r.second - r.first;

	if (data_range == 0.0) { data_range = 1.0; }

	const double scale = 1/data_range;
	const double min = -1.0data_minscale;

	col *= scale;
	col += Vector::Constant(col.size(), min);

	cl_d.col(i) = col;
	}

	// rows x rows
	DistanceMatrix d_m( cl_d.rows(), cl_d.rows() );
	Vector d_max( cl_d.rows() );
	Vector d_min( cl_d.rows() );

	for (int i = 0; i < (int)cl_d.rows(); ++i)
	{
	#pragma omp parallel for
	for (int j = i; j < (int)cl_d.rows(); ++j)
	{
	d_m(i, j) = 0.0;

	if (i != j)
	{
	Vector U = cl_d.row(i);
	Vector V = cl_d.row(j);

	Vector diff = ( U-V );

	for(int k = 0; k < (int)diff.size(); k++)
	{
	auto e = diff[k];
	d_m(i, j) += fabs(e)*W[k];
	}

	d_m(j, i) = d_m(i, j);
	}
	}

	const auto cur_row = d_m.row(i);
	const auto mm = std::minmax_element( cur_row.data(), cur_row.data() + cur_row.size() );

	d_max(i) = *mm.second;
	d_min(i) = *mm.first;
	}

	m_dmin = *(std::min_element( d_min.data(), d_min.data() + d_min.size() ));
	m_dmax = *(std::max_element( d_max.data(), d_max.data() + d_max.size() ));

	m_eps = (m_dmax - m_dmin) * m_eps + m_dmin;

	return d_m;
	}

	void dbscan( const DistanceMatrix & dm )
	{
	std::vector<unsigned int> visited( dm.rows() );

	unsigned int cluster_id = 0;

	for (unsigned int pid = 0; pid < dm.rows(); ++pid)
	{
	if ( !visited[pid] )
	{
	visited[pid] = 1;

	Neighbors ne = find_neighbors(dm, pid );

	if (ne.size() >= m_min_elems)
	{
	m_labels[pid] = cluster_id;

	for (unsigned int i = 0; i < ne.size(); ++i)
	{
	unsigned int nPid = ne[i];

	if ( !visited[nPid] )
	{
	visited[nPid] = 1;

	Neighbors ne1 = find_neighbors(dm, nPid);

	if ( ne1.size() >= m_min_elems )
	{
	for (const auto & n1 : ne1)
	{
	ne.push_back(n1);
	}
	}
	}

	if ( m_labels[nPid] == -1 )
	{
	m_labels[nPid] = cluster_id;
	}
	}

	++cluster_id;
	}
	}
	}
	}
	};
	}
	// Incomplete..

	//Generalized k-means
	//Copyright (C) 2013 Balazs Szalkai
	//If you use this program in your research, please cite the following article:
	//B. Szalkai: An implementation of the relational k-means algorithm. ArXiv e-prints, 2013.

	namespace GeneralizedKMeans
	{
	typedef Eigen::MatrixXf DistanceMatrix;

	struct Clusters
	{
	DistanceMatrix Matrix;
	int Count;
	int[] ObjectToCluster;
	List<int>[] ClusterToObjects;
	CentroidDistance cd;

	Clusters(DistanceMatrix & matrix, int count)
	{
	Matrix = matrix;
	Count = count;
	ObjectToCluster = new int[Matrix.Count];
	ClusterToObjects = new List<int>[Count];
	for (int i = 0; i < Count; i++) ClusterToObjects[i] = new List<int>();
	Randomize();
	}

	static Random Rand = new Random();

	void Randomize()
	{
	for (int i = 0; i < Count; i++) ClusterToObjects[i].Clear();

	for (int i = 0; i < Matrix.Count; i++)
	{
	int cluster = Rand.Next(Count);
	ObjectToCluster[i] = cluster;
	ClusterToObjects[cluster].Add(i);
	}
	}

	void Make_ClusterToObjects()
	{
	for (int cluster = 0; cluster < Count; cluster++)
	ClusterToObjects[cluster].Clear();

	for (int i = 0; i < Matrix.Count; i++)
	ClusterToObjects[ObjectToCluster[i]].Add(i);
	}

	double[] AdditiveConstant;

	// Call this before using the other members functions
	void Initialize()
	{
	// calculate additive constants for clusters
	AdditiveConstant = new double[clusters.Count];
	for (int cluster = 0; cluster < clusters.Count; cluster++)
	{
	var objects = clusters.ClusterToObjects[cluster];
	double x = 0;
	foreach (var i in objects)
	foreach (var j in objects)
	x += clusters.Matrix.SqrDistance[i, j];
	AdditiveConstant[cluster] = -x / (2 * Utils.Sqr((double)objects.Count));
	}
	}
	// Calculates a squared centroid distance
	double CalculateSqrDist(int obj, int cluster)
	{
	var objects = clusters.ClusterToObjects[cluster];
	double x = 0;
	foreach (int j in objects) x += clusters.Matrix.SqrDistance[obj, j];
	double dist = AdditiveConstant[cluster] + x / objects.Count;
	return dist;
	}
	int GetNearestCluster(int obj)
	{
	int nearestCluster = -1;
	double minSqrDist = 0;
	for (int cluster = 0; cluster < clusters.Count; cluster++)
	{
	double sqrDist = CalculateSqrDist(obj, cluster);
	if (nearestCluster < 0 \|\| sqrDist < minSqrDist)
	{
	minSqrDist = sqrDist;
	nearestCluster = cluster;
	}
	}
	return nearestCluster;
	}
	double GetClusteringValue()
	{
	double result = 0;
	for (int i = 0; i < clusters.Matrix.Count; i++)
	result += CalculateSqrDist(i, clusters.ObjectToCluster[i]);
	return result;
	}

	// Returns how many objects have changed cluster
	int Iterate()
	{
	int changed = 0;
	cd.Initialize();

	// find the nearest centroid for the objects
	int[] New_ObjectToCluster = new int[Matrix.Count];
	for (int i = 0; i < Matrix.Count; i++)
	{
	New_ObjectToCluster[i] = cd.GetNearestCluster(i);
	if (New_ObjectToCluster[i] != ObjectToCluster[i]) changed++;
	}

	// update the configuration
	double oldValue = GetValue();
	int[] Old_ObjectToCluster = ObjectToCluster;
	ObjectToCluster = New_ObjectToCluster;
	Make_ClusterToObjects();
	double newValue = GetValue();

	// clustering got worse (this is possible with some distance matrices) or stayed the same?
	if (oldValue <= newValue)
	{
	// undo iteration
	ObjectToCluster = Old_ObjectToCluster;
	Make_ClusterToObjects();
	return 0;
	}
	return changed;
	}

	double GetValue()
	{
	cd.Initialize();
	return cd.GetClusteringValue();
	}
	}

	void Run( DistanceMatrix & m, int nClusters )
	{
	Clusters bestClusters (m, nClusters);

	int badLuckStreak = 0;
	int blockId = 0;
	int maxBadLuckStreak = 100;

	while (true)
	{
	for (int i = 0; i < nThreads; i++)
	{
	Clusters c = new Clusters(m, nClusters);
	while (true)
	{
	if (c.Iterate() == 0) break;
	}

	Clusters c = clustersArray[i];
	if (c.GetValue() < bestClusters.GetValue())
	{
	bestClusters = c;
	badLuckStreak = 0;
	}
	else
	badLuckStreak++;
	}

	if (badLuckStreak >= maxBadLuckStreak) break;
	}
	}
	}
	// Works well

	//////////////////////////////////////////////////////////////////////////////////////////////////
	// Copyright (c) 2010, Lawrence Livermore National Security, LLC.
	// Produced at the Lawrence Livermore National Laboratory
	//////////////////////////////////////////////////////////////////////////////////////////////////
	/// @author Todd Gamblin tgamblin@llnl.gov

	#include <vector>
	#include <sstream>

	#include <algorithm>
	#include <numeric>
	#include <cassert>
	#include <cstdlib>
	using namespace std;

	//#include "random.h"
	//#include "bic.h"
	//#include "matrix_utils.h"

	namespace clustering {
	typedef float Scalar;
	typedef Eigen::Matrix<Scalar,-1,-1> dissimilarity_matrix;

	typedef size_t medoid_id; ///< More descriptive type for medoid index
	typedef size_t object_id; ///< More descriptive type for object index

	///
	/// Explicit representation of a clustering. Instead of a vecto of representative
	/// ids, this has <i>k</i> sets of object_ids indicating which objects are in a
	/// particular cluster. You can convert a partition to a cluster_list with
	/// to_cluster_list().
	///
	typedef std::vector< std::set<object_id> > cluster_list;

	struct kmedoids{

	/// Gives the index of the object that is the ith medoid.
	/// medoids[i] == index in object array for last call to findClusters()
	std::vector<object_id> medoid_ids;

	/// Gives cluster id (index in medoids) for the ith object.
	/// clusterid[i] == id of cluster of which object i is a member.
	/// medoids[clusterid[i]] == representative of that cluster.
	std::vector<medoid_id> cluster_ids;

	std::vector<medoid_id> sec_nearest; /// Index of second closest medoids. Used by PAM.
	double total_dissimilarity; /// Total dissimilarity bt/w objects and their medoid
	bool sort_medoids; /// Whether medoids should be canonically sorted by object id.
	double epsilon; /// Normalized sensitivity for convergence
	size_t init_size; /// initial sample size (before 2*k)
	size_t max_reps; /// initial sample size (before 2*k)

	typedef std::mt19937 random_type; /// Type for RNG used in this algorithm

	kmedoids(size_t num_objects)
	: cluster_ids(num_objects, 0),
	total_dissimilarity(std::numeric_limits<double>::infinity()),
	sort_medoids(false),
	epsilon(1e-15),
	init_size(40),
	max_reps(5)
	{ if (num_objects) medoid_ids.resize(1); }

	double average_dissimilarity() const {
	return total_dissimilarity / cluster_ids.size();
	}

	void set_sort_medoids(bool sort) {
	sort_medoids = sort;
	}

	void set_epsilon(double e) {
	epsilon = e;
	}

	void init_medoids(size_t k, const dissimilarity_matrix& distance) {
	medoid_ids.clear();
	// find first object: object minimum dissimilarity to others
	object_id first_medoid = 0;
	double min_dissim = std::numeric_limits<Scalar>::max();
	for (size_t i=0; i < distance.rows(); i++) {
	double total = 0.0;
	for (size_t j=0; j < distance.cols(); j++) {
	total += distance(i,j);
	}
	if (total < min_dissim) {
	min_dissim = total;
	first_medoid = i;
	}
	}

	// add first object to medoids and compute medoid ids.
	medoid_ids.push_back(first_medoid);
	assign_objects_to_clusters(distance);

	// now select next k-1 objects according to KR's BUILD algorithm
	for (size_t cur_k = 1; cur_k < k; cur_k++) {
	object_id best_obj = 0;
	double max_gain = 0.0;
	for (size_t i=0; i < distance.rows(); i++) {
	if (is_medoid(i)) continue;

	double gain = 0.0;
	#pragma omp parallel for reduction(+:gain)
	for (int j=0; j < (int)distance.rows(); j++) {
	double Dj = distance(j, medoid_ids[cluster_ids[j]]); // distance from j to its medoid
	gain += max(Dj - distance(i,j), 0.0); // gain from selecting i
	}

	if (gain >= max_gain) { // set the next medoid to the object that
	max_gain = gain; // maximizes the gain function.
	best_obj = i;
	}
	}

	medoid_ids.push_back(best_obj);
	assign_objects_to_clusters(distance);
	}
	}

	double cost(medoid_id i, object_id h, const dissimilarity_matrix& distance) const {
	double total = 0;
	for (object_id j = 0; j < cluster_ids.size(); j++) {
	object_id mi = medoid_ids[i]; // object id of medoid i
	double dhj = distance(h, j); // distance between object h and object j

	object_id mj1 = medoid_ids[cluster_ids[j]]; // object id of j's nearest medoid
	double dj1 = distance(mj1, j); // distance to j's nearest medoid

	// check if distance bt/w medoid i and j is same as j's current nearest medoid.
	if (distance(mi, j) == dj1) {
	double dj2 = std::numeric_limits<Scalar>::max();
	if (medoid_ids.size() > 1) { // look at 2nd nearest if there's more than one medoid.
	object_id mj2 = medoid_ids[sec_nearest[j]]; // object id of j's 2nd-nearest medoid
	dj2 = distance(mj2, j); // distance to j's 2nd-nearest medoid
	}
	total += min(dj2, dhj) - dj1;

	} else if (dhj < dj1) {
	total += dhj - dj1;
	}
	}
	return total;
	}

	/// True if and only if object i is a medoid.
	bool is_medoid(object_id oi) const {
	return medoid_ids[cluster_ids[oi]] == oi;
	}

	void pam(const dissimilarity_matrix& distance, size_t k, const object_id *initial_medoids) {
	if (k > distance.rows()) {
	throw std::logic_error("Attempt to run PAM with more clusters than data.");
	}

	if (distance.rows() != distance.cols()) {
	throw std::logic_error("Error: distance matrix is not square!");
	}

	// first get this the right size.
	cluster_ids.resize(distance.rows());

	// size cluster_ids appropriately and randomly pick initial medoids
	if (initial_medoids) {
	medoid_ids.clear();
	copy(initial_medoids, initial_medoids + k, back_inserter(medoid_ids));
	} else {
	init_medoids(k, distance);
	}

	// set tolerance equal to epsilon times mean magnitude of distances.
	// Note that distances should all be non-negative.
	double tolerance = epsilon * distance.sum() / (distance.rows() * distance.cols());

	while (true) {
	// initial cluster setup
	total_dissimilarity = assign_objects_to_clusters(distance);

	//vars to keep track of minimum
	double minTotalCost = std::numeric_limits<Scalar>::max();
	medoid_id minMedoid = 0;
	object_id minObject = 0;

	//iterate over each medoid
	for (medoid_id i=0; i < k; i++) {
	//iterate over all non-medoid objects
	for (object_id h = 0; h < cluster_ids.size(); h++) {
	if (is_medoid(h)) continue;

	//see if the total cost of swapping i & h was less than min
	double curCost = cost(i, h, distance);
	if (curCost < minTotalCost) {
	minTotalCost = curCost;
	minMedoid = i;
	minObject = h;
	}
	}
	}

	// bail if we can't gain anything more (we've converged)
	if (minTotalCost >= -tolerance) break;

	// install the new medoid if we found a beneficial swap
	medoid_ids[minMedoid] = minObject;
	cluster_ids[minObject] = minMedoid;
	}

	//if (sort_medoids) sort();
	}

	/// Assign each object to the cluster with the closest medoid.
	///
	/// @return Total dissimilarity of objects w/their medoids.
	///
	/// @param distance a callable object that computes distances between indices, as a distance
	/// matrix would. Algorithms are free to use real distance matrices (as in PAM)
	/// or to compute lazily (as in CLARA medoid assignment).
	double assign_objects_to_clusters(const dissimilarity_matrix& distance) {
	if (sec_nearest.size() != cluster_ids.size()) {
	sec_nearest.resize(cluster_ids.size());
	}

	// go through and assign each object to nearest medoid, keeping track of total dissimilarity.
	double total_dissimilarity = 0;

	#pragma omp parallel for reduction(+:total_dissimilarity)
	for (int i=0; i < (int)cluster_ids.size(); i++) {
	double d1, d2; // smallest, second smallest distance to medoid, respectively
	medoid_id m1, m2; // index of medoids with distances d1, d2 from object i, respectively

	d1 = d2 = std::numeric_limits<Scalar>::max();
	m1 = m2 = medoid_ids.size();
	for (medoid_id m=0; m < medoid_ids.size(); m++) {
	double d = distance(i, medoid_ids[m]);
	if (d < d1 \|\| medoid_ids[m] == i) { // prefer the medoid in case of ties.
	d2 = d1; m2 = m1;
	d1 = d; m1 = m;
	} else if (d < d2) {
	d2 = d; m2 = m;
	}
	}

	cluster_ids[i] = m1;
	sec_nearest[i] = m2;
	total_dissimilarity += d1;
	}

	return total_dissimilarity;
	}
	};

	} // namespace cluster
	#pragma once

	/* Meanshift non-parametric mode estimator.
	*
	* Code adapted from : Sebastian Nowozin <nowozin@gmail.com>
	*/

	#define _USE_MATH_DEFINES
	#include <math.h>

	#include <vector>
	#include <map>
	#include <iostream>

	//#include <gmm/gmm.h>
	#include <Eigen/Core>

	namespace gmm{
	template<typename V>
	void clear(V & v){
	for(size_t i = 0; i < v.size(); i++)
	v[i] = 0;
	}

	template<typename V, typename W>
	void copy( const V & v, W & w ){
	for(size_t i = 0; i < v.size(); i++)
	w[i] = v[i];
	}

	template<typename V, typename W>
	void add( const V & v, W & w ){
	for(size_t i = 0; i < w.size(); i++)
	w[i] += v[i];
	}

	template<typename V>
	V scaled(const V & v, double s){
	V a = v;
	for(size_t i = 0; i < a.size(); i++)
	a[i] *= s;
	return a;
	}

	template<typename V>
	void scale( V & v, double s ){
	for(size_t i = 0; i < v.size(); i++)
	v[i] *= s;
	}

	template<typename V, typename W>
	double vect_dist2(V & v, W & w){
	Eigen::VectorXd a = Eigen::Map<Eigen::VectorXd>(&v[0], v.size());
	Eigen::VectorXd b = Eigen::Map<Eigen::VectorXd>(&w[0], w.size());
	return (a-b).lpNorm<2>();
	}

	template<typename V>
	double vect_norm2(V & v){
	return Eigen::Map<Eigen::VectorXd>(&v[0], v.size()).norm();
	}
	}

	namespace clustering {

	/* We use the notation of the description of Mean Shift in
	* [Comaniciu2000], Dorin Comaniciu, Peter Meer,
	* "Mean Shift: A Robust Approach toward Feature Space Analysis"
	*
	* The implementation is naive and does not exploit fast nearest neighbor
	* lookups or triangle inequalities to speed up the mean shift procedure.
	* Therefore, it is only suitable for low-dimensional input spaces (say, <=
	* 10) with relatively few samples (say, < 1e6).
	*/
	class Meanshift {
	public:
	/* kernel_type selects the kernel profile:
	* 0 for the Epanechnikov kernel profile
	* k_E(x) = (1 - x) if (0 <= x <= 1), 0 otherwise.
	* 1 for the truncated multivariate normal kernel profile
	* k_N(x) = exp(-0.5 * x)
	* kernel_bandwidth: The positive bandwidth parameter.
	* mode_tolerance: mode matching tolerance. Modes which have a L2
	* distance closer than this value will be treated as being the same.
	*/
	Meanshift(int kernel_type, double kernel_bandwidth,
	int dim, double mode_tolerance);

	/* Find modes of an empirical distribution X.
	*
	* X: N vectors of size M, representing N samples from the distribution.
	* modes: Output, will be allocated properly. Return all modes found.
	* indexmap: N-vector of absolute mode indices. Each sample point is
	* assigned to one mode. The vector must already be properly sized.
	* procedurecount: The mean shift procedure will be run at least this many
	* times, sampling randomly from X as initialization. If zero, all
	* samples from X are used as initializations.
	*/
	void FindModes(const std::vector<std::vector<double> >& X,
	std::vector<std::vector<double> >& modes,
	std::vector<int>& indexmap,
	unsigned int procedure_count = 0) const;

	/* Mean Shift Procedure, starting from mode, perform mean shift on the
	* distribution empirically sampled in X.
	*
	* X: N vectors of size M, representing N samples from the distribution.
	* mode: Starting point (for example a point from X). The result will be
	* given in mode.
	* visited: N-vector of indicator variables. If the mean shift procedure
	* passes through a point in X, the corresponding index in visited will
	* be set to one.
	*
	* Return the number of iterations used.
	*/
	int MeanshiftProcedure(const std::vector<std::vector<double> >& X,
	std::vector<double>& mode, std::vector<unsigned int>& visited) const;

	private:
	int dim; // input space dimension
	int kernel_type; // 0: Epanechnikov, 1: truncated multivariate normal
	double kernel_c; // constant normalization factor
	double kernel_bandwidth;
	double mode_tolerance;
	};


	Meanshift::Meanshift(int kernel_type, double kernel_bandwidth,
	int dim, double mode_tolerance)
	: dim(dim), kernel_type(kernel_type),
	kernel_bandwidth(kernel_bandwidth),
	mode_tolerance(mode_tolerance) {
	assert(kernel_type >= 0 && kernel_type <= 1);
	assert(kernel_bandwidth > 0.0);
	assert(dim > 0);

	// Compute normalization constant
	if (kernel_type == 0) {
	kernel_c = 0.5 * (static_cast<double>(dim) + 2.0) / 1.234;
	// TODO: replace 1.234 with the volume of the d-dimensional unit
	// sphere FIXME
	} else if (kernel_type == 1) {
	kernel_c = pow(2.0 * M_PI, -static_cast<double>(dim)/2.0);
	}
	}

	void Meanshift::FindModes(const std::vector<std::vector<double> >& X,
	std::vector<std::vector<double> >& modes,
	std::vector<int>& indexmap,
	unsigned int procedure_count) const
	{
	//assert(indexmap.size() == X.size());
	indexmap.clear();
	indexmap.resize(X.size(), 0);

	modes.clear();

	if (procedure_count != 0 && procedure_count >= X.size())
	procedure_count = 0;

	// Perform dense mode finding: for each sample in X, perform the mean
	// shift procedure
	std::vector<double> mode(X[0].size());
	std::vector<unsigned int> visited(X.size());
	// [mode_index][sample_index] = number of times sample_index was visited
	// when arriving at the mode with the mode_index.
	std::vector<std::map<unsigned int, unsigned int> > visit_counts;
	for (unsigned int si = 0; (procedure_count == 0 && si < X.size())
	\|\| (procedure_count != 0 && si < procedure_count); ++si) {
	if (procedure_count == 0) {
	gmm::copy(X[si], mode);
	} else {
	// Pick a random one from X
	unsigned sample_index = rand() % X.size();
	gmm::copy(X[sample_index], mode);
	}
	std::cout << "Sample " << si << " of "
	<< (procedure_count == 0 ? X.size() : procedure_count)
	<< std::endl;
	gmm::clear(visited);
	MeanshiftProcedure(X, mode, visited);

	// Identify whether this is a novel mode or a known one
	bool found = false;
	unsigned int M_cur = 0;
	for (std::vector<std::vector<double> >::iterator Mi =
	modes.begin(); found == false && Mi != modes.end(); ++Mi) {
	if (gmm::vect_dist2(*Mi, mode) < mode_tolerance) {
	M_cur = Mi - modes.begin();
	found = true;
	}
	}
	if (found == false) {
	// Add novel mode
	modes.push_back(mode);
	// TODO: remove this debug output
	std::cout << modes.size() << " mode"
	<< (modes.size() >= 2 ? "s" : "") << std::endl;

	// Add a new mapping of which samples have been visited while
	// approaching the novel mode.
	std::map<unsigned int, unsigned int> mode_visits;
	for (std::vector<unsigned int>::const_iterator vi =
	visited.begin(); vi != visited.end(); ++vi) {
	if (*vi != 0)
	mode_visits[vi - visited.begin()] = 1;
	}
	visit_counts.push_back(mode_visits);
	} else {
	// The mode has been known, but we maybe crossed old and new
	// samples. Update the counts.
	for (std::vector<unsigned int>::const_iterator vi =
	visited.begin(); vi != visited.end(); ++vi) {
	if (*vi != 0)
	visit_counts[M_cur][vi - visited.begin()] += 1;
	}
	}
	}
	#ifdef DEBUG
	std::cout << "Found " << modes.size() << " modes." << std::endl;
	#endif

	// Perform index mapping: each sample gets assigned to one mode index.
	unsigned int unmapped_count = 0;
	for (unsigned int sample_index = 0;
	sample_index < X.size(); ++sample_index) {
	// Find mode index with highest count
	unsigned int maximum_count = 0;
	int maximum_mode_index = -1;
	for (unsigned int mode_index = 0;
	mode_index < modes.size(); ++mode_index) {
	if (visit_counts[mode_index].count(sample_index) == 0)
	continue;

	unsigned int count_cur = visit_counts[mode_index][sample_index];
	std::cout << " mode " << mode_index << " visited "
	<< "sample " << sample_index << " for "
	<< count_cur << " times." << std::endl;
	if (count_cur > maximum_count) {
	maximum_mode_index = mode_index;
	maximum_count = count_cur;
	}
	}
	if (maximum_mode_index == -1)
	unmapped_count += 1;
	indexmap[sample_index] = maximum_mode_index;
	}
	std::cout << unmapped_count << " unmapped samples." << std::endl;
	}

	int Meanshift::MeanshiftProcedure(const std::vector<std::vector<double> >& X,
	std::vector<double>& mode, std::vector<unsigned int>& visited) const {
	assert(X.size() > 0);
	assert(visited.size() == X.size());
	assert(X[0].size() == mode.size());
	std::vector<double> meanshift(mode.size(),0);
	std::vector<double> meanshift_cur(mode.size(),0);
	int iter = 0;
	bool converged = false;
	do {
	// Compute the mean shift vector
	gmm::clear(meanshift);
	double denominator = 0.0;
	for (std::vector<std::vector<double> >::const_iterator Xi = X.begin();
	Xi != X.end(); ++Xi) {
	gmm::copy(mode, meanshift_cur);
	gmm::add(gmm::scaled(*Xi, -1.0), meanshift_cur);
	gmm::scale(meanshift_cur, 1.0 / kernel_bandwidth);
	double weight_cur = gmm::vect_norm2(meanshift_cur);
	if (kernel_type == 0) {
	// Epanechnikov kernel is the shadow of the uniform kernel
	if (weight_cur <= 1.0)
	weight_cur = 1.0;
	else
	weight_cur = 0.0;
	} else if (kernel_type == 1) {
	// Multivariate normal kernel is the shadow of itself
	weight_cur = kernel_c * exp(-0.5 * weight_cur);

	// Truncate
	if (weight_cur < 1e-2)
	weight_cur = 0.0;
	}
	if (weight_cur >= 1e-6)
	visited[Xi - X.begin()] = 1;

	gmm::add(gmm::scaled(*Xi, weight_cur), meanshift);
	denominator += weight_cur;
	}

	gmm::scale(meanshift, 1.0 / denominator);
	double distance_moved = gmm::vect_dist2(meanshift, mode);
	gmm::copy(meanshift, mode);

	#ifdef DEBUG
	std::cout << "iter " << iter << ", moved "
	<< distance_moved << std::endl;
	#endif
	iter += 1;
	if (distance_moved <= 1e-8)
	converged = true;
	} while (converged == false);

	return (iter);
	}

	}
	/*=========================================================================
	*
	* Copyright David Doria 2012 daviddoria@gmail.com
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0.txt
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*
	=========================================================================/

	/** XMeans clustering is a method that creates K clusters of points from
	* an unorganized set of input points. It is an extension of KMeans clustering
	* that attempts to determine K during the algorithm.
	*/

	#ifndef XMeansClustering_h
	#define XMeansClustering_h

	// STL
	#include <vector>
	#include <iostream>
	#include <limits>
	#include <numeric>
	#include <set>
	#include <stdexcept>

	// Eigen
	#include <Eigen/Dense>

	namespace EigenHelpers{
	template <typename TVector>
	TVector RandomUnitVector(const unsigned int dim)
	{
	TVector randomUnitVector(dim);
	for(int i = 0; i < randomUnitVector.size(); ++i)
	{
	randomUnitVector[i] = (double(rand()) / RAND_MAX);
	}

	randomUnitVector.normalize();

	return randomUnitVector;
	}

	template <typename TPoint>
	void GetBoundingBox(const std::vector<TPoint, Eigen::aligned_allocator<TPoint> >& data, TPoint& minCorner, TPoint& maxCorner)
	{
	assert(data.size() > 0);

	minCorner.resize(data[0].size());
	maxCorner.resize(data[0].size());

	for(int coordinate = 0; coordinate < data[0].size(); ++coordinate)
	{
	minCorner[coordinate] = std::numeric_limits<typename TPoint::Scalar>::max();
	maxCorner[coordinate] = std::numeric_limits<typename TPoint::Scalar>::min();

	for(unsigned int pointId = 0; pointId < data.size(); ++pointId)
	{
	if(data[pointId][coordinate] > maxCorner[coordinate])
	{
	maxCorner[coordinate] = data[pointId][coordinate];
	}

	if(data[pointId][coordinate] < minCorner[coordinate])
	{
	minCorner[coordinate] = data[pointId][coordinate];
	}
	}
	}
	}
	}

	class XMeansClustering
	{
	public:
	typedef Eigen::VectorXf PointType;
	typedef std::vector<PointType, Eigen::aligned_allocator<PointType> > VectorOfPoints;

	/** Constructor. */
	XMeansClustering();

	/** Set the maximum number of clusters to find. */
	void SetMaxK(const unsigned int maxk);

	/** Get the maximum number of clusters to find. */
	unsigned int GetMaxK();

	/** Get the ids of the points that belong to class 'label'. */
	std::vector<unsigned int> GetIndicesWithLabel(const unsigned int label);

	/** Get the coordinates of the points that belong to class 'label'. */
	VectorOfPoints GetPointsWithLabel(const unsigned int label);

	/** Set the points to cluster. */
	void SetPoints(const VectorOfPoints& points);

	/** Get the resulting cluster id for each point. */
	std::vector<unsigned int> GetLabels();

	/** Actually perform the clustering. */
	void Cluster();

	/** Write the cluster centers to the standard output. */
	void OutputClusterCenters();

	private:

	/** Split every cluster into two clusters if that helps the description of the data. */
	void SplitClusters();

	/** The label (cluster ID) of each point. */
	std::vector<unsigned int> Labels;

	/** The maximum number of clusters to find */
	unsigned int MaxK;

	/** The points to cluster. */
	VectorOfPoints Points;

	/** The current cluster centers. */
	VectorOfPoints ClusterCenters;
	};


	XMeansClustering::XMeansClustering() : MaxK(3)
	{

	}

	void XMeansClustering::Cluster()
	{
	if(this->Points.size() < this->MaxK)
	{
	std::stringstream ss;
	ss << "The number of points (" << this->Points.size()
	<< " must be larger than the maximum number of clusters (" << this->MaxK << ")";
	throw std::runtime_error(ss.str());
	}

	// We must store the labels at the previous iteration to determine whether any labels changed at each iteration.
	std::vector<unsigned int> oldLabels(this->Points.size(), 0); // initialize to all zeros

	// Initialize the labels array
	this->Labels.resize(this->Points.size());

	do
	{
	// Save the old labels
	oldLabels = this->Labels;
	}while(this->ClusterCenters.size() < this->MaxK);
	}

	void XMeansClustering::SplitClusters()
	{
	assert(this->Points.size() > 0);

	VectorOfPoints newClusterCenters;

	for(unsigned int clusterId = 0; clusterId < this->ClusterCenters.size(); ++clusterId)
	{
	// Generate a random direction
	PointType randomUnitVector = EigenHelpers::RandomUnitVector<PointType>(this->Points[0].size());

	// Get the bounding box of the points that belong to this cluster
	PointType minCorner;
	PointType maxCorner;
	EigenHelpers::GetBoundingBox(this->Points, minCorner, maxCorner);

	// Scale the unit vector by the size of the region
	PointType splitVector = randomUnitVector * (maxCorner - minCorner) / 2.0f;
	PointType childCenter1 = this->ClusterCenters[clusterId] + splitVector;
	PointType childCenter2 = this->ClusterCenters[clusterId] + splitVector;

	// Compute the BIC of the original model
	float BIC_parent;

	// Compute the BIC of the new (split) model
	float BIC_children;

	// If the split was useful, keep it
	if(BIC_children < BIC_parent)
	{
	newClusterCenters.push_back(childCenter1);
	newClusterCenters.push_back(childCenter2);
	}
	else
	{
	newClusterCenters.push_back(this->ClusterCenters[clusterId]);
	}
	}

	this->ClusterCenters = newClusterCenters;
	}

	std::vector<unsigned int> XMeansClustering::GetIndicesWithLabel(unsigned int label)
	{
	std::vector<unsigned int> pointsWithLabel;
	for(unsigned int i = 0; i < this->Labels.size(); i++)
	{
	if(this->Labels[i] == label)
	{
	pointsWithLabel.push_back(i);
	}
	}

	return pointsWithLabel;
	}

	XMeansClustering::VectorOfPoints XMeansClustering::GetPointsWithLabel(const unsigned int label)
	{
	VectorOfPoints points;

	std::vector<unsigned int> indicesWithLabel = GetIndicesWithLabel(label);

	for(unsigned int i = 0; i < indicesWithLabel.size(); i++)
	{
	points.push_back(this->Points[indicesWithLabel[i]]);
	}

	return points;
	}


	void XMeansClustering::SetMaxK(const unsigned int maxk)
	{
	this->MaxK = maxk;
	}

	unsigned int XMeansClustering::GetMaxK()
	{
	return this->MaxK;
	}

	void XMeansClustering::SetPoints(const VectorOfPoints& points)
	{
	this->Points = points;
	}

	std::vector<unsigned int> XMeansClustering::GetLabels()
	{
	return this->Labels;
	}

	void XMeansClustering::OutputClusterCenters()
	{
	std::cout << std::endl << "Cluster centers: " << std::endl;

	for(unsigned int i = 0; i < ClusterCenters.size(); ++i)
	{
	std::cout << ClusterCenters[i] << " ";
	}
	std::cout << std::endl;
	}

	#endif