kaizhu256/CalcNaturalBreaks.cpp

## CalcNaturalBreaks.cpp
/*
copyright
This code is written by Maarten Hilferink, © Object Vision BV, and is provided
under GNU GPL v3.0 license

https://www.geodms.nl/CalcNaturalBreaks

This page contains C++ code of a O(k×n×log(n)) method for the classification of
an array of numeric values such that the variance per class is minimal, known as
a Fisher's Natural Breaks Classification. This algorithm is an improvement of
Jenks' Natural Breaks Optimization method, which is an O(k×n2) algorithm. See:
Fisher's Natural Breaks Classification complexity proof

This code is derived from clc/dll/src1/CalcClassBreaks.cpp in our Subversion
Repository which implements the ClassifyJenksFisher operation in the [GeoDms].
It is independently compilable and runnable and finds the JenksFisher
class-breaks for an array of random numbers and made that available here.
*/

#include <algorithm>
#include <assert.h>
#include <iostream>
#include <vector>

typedef std::size_t                  SizeT;
typedef SizeT                        CountType;
typedef std::pair<double, CountType> ValueCountPair;
typedef std::vector<double>          LimitsContainer;
typedef std::vector<ValueCountPair>  ValueCountPairContainer;

// helper funcs
template <typename T> T Min(T a, T b) { return (a<b) ? a : b; }

SizeT GetTotalCount(const  ValueCountPairContainer& vcpc)
{
    SizeT sum = 0;
    ValueCountPairContainer::const_iterator
        i = vcpc.begin(),
        e = vcpc.end();
    for(sum = 0; i!=e; ++i)
        sum += (*i).second;
    return sum;
}

// helper struct JenksFisher

struct JenksFisher  // captures the intermediate data and methods for the calculation of Natural Class Breaks.
{
    JenksFisher(const ValueCountPairContainer& vcpc, SizeT k)
        :    m_M(vcpc.size())
        ,    m_K(k)
        ,    m_BufSize(vcpc.size()-(k-1))
        ,    m_PrevSSM(m_BufSize)
        ,    m_CurrSSM(m_BufSize)
        ,    m_CB(m_BufSize * (m_K-1))
        ,    m_CBPtr()
    {
        m_CumulValues.reserve(vcpc.size());
        double cwv=0;
        CountType cw = 0, w;

        for(SizeT i=0; i!=m_M; ++i)
        {
            assert(!i || vcpc[i].first > vcpc[i-1].first); // PRECONDITION: the value sequence must be strictly increasing

            w   = vcpc[i].second;
            assert(w > 0); // PRECONDITION: all weights must be positive

            cw  += w;
            assert(cw > w || !i); // No overflow? No loss of precision?

            cwv += w * vcpc[i].first;
            m_CumulValues.push_back(ValueCountPair(cwv, cw));
            if (i < m_BufSize)
                m_PrevSSM[i] = cwv * cwv / cw; // prepare SSM for first class. Last (k-1) values are omitted
        }
    }

    double GetW(SizeT b, SizeT e)
    // Gets sum of weighs for elements b..e.
    {
        assert(b);    // First element always belongs to class 0, thus queries should never include it.
        assert(b<=e);
        assert(e<m_M);

        double res = m_CumulValues[e].second;
        res -= m_CumulValues[b-1].second;
        return res;
    }

    double GetWV(SizeT b, SizeT e)
    // Gets sum of weighed values for elements b..e
    {
        assert(b);
        assert(b<=e);
        assert(e<m_M);

        double res = m_CumulValues[e].first;
        res -= m_CumulValues[b-1].first;
        return res;
    }

    double GetSSM(SizeT b, SizeT e)
    // Gets the Squared Mean for elements b..e, multiplied by weight.
    // Note that n*mean^2 = sum^2/n when mean := sum/n
    {
        double res = GetWV(b,e);
        return res * res / GetW(b,e);
    }

    SizeT FindMaxBreakIndex(SizeT i, SizeT bp, SizeT ep)
    // finds CB[i+m_NrCompletedRows] given that
    // the result is at least bp+(m_NrCompletedRows-1)
    // and less than          ep+(m_NrCompletedRows-1)
    // Complexity: O(ep-bp) <= O(m)
    {
        assert(bp < ep);
        assert(bp <= i);
        assert(ep <= i+1);
        assert(i  <  m_BufSize);
        assert(ep <= m_BufSize);

        double minSSM = m_PrevSSM[bp] + GetSSM(bp+m_NrCompletedRows, i+m_NrCompletedRows);
        SizeT foundP = bp;
        while (++bp < ep)
        {
            double currSSM = m_PrevSSM[bp] + GetSSM(bp+m_NrCompletedRows, i+m_NrCompletedRows);
            if (currSSM > minSSM)
            {
                minSSM = currSSM;
                foundP = bp;
            }
        }
        m_CurrSSM[i] = minSSM;
        return foundP;
    }

    void CalcRange(SizeT bi, SizeT ei, SizeT bp, SizeT ep)
    // find CB[i+m_NrCompletedRows]
    // for all i>=bi and i<ei given that
    // the results are at least bp+(m_NrCompletedRows-1)
    // and less than            ep+(m_NrCompletedRows-1)
    // Complexity: O(log(ei-bi)*Max((ei-bi),(ep-bp))) <= O(m*log(m))
    {
        assert(bi <= ei);

        assert(ep <= ei);
        assert(bp <= bi);

        if (bi == ei)
            return;
        assert(bp < ep);

        SizeT mi = (bi + ei)/2;
        SizeT mp = FindMaxBreakIndex(mi, bp, Min<SizeT>(ep, mi+1));

        assert(bp <= mp);
        assert(mp <  ep);
        assert(mp <= mi);

        // solve first half of the sub-problems with lower 'half' of possible outcomes
        CalcRange(bi, mi, bp, Min<SizeT>(mi, mp+1));

        m_CBPtr[ mi ] = mp; // store result for the middle element.

        // solve second half of the sub-problems with upper 'half' of possible outcomes
        CalcRange(mi+1, ei, mp, ep);
    }


    void CalcAll()
    // complexity: O(m*log(m)*k)
    {
        if (m_K>=2)
        {
            m_CBPtr = m_CB.begin();
            for (m_NrCompletedRows=1; m_NrCompletedRows<m_K-1; ++m_NrCompletedRows)
            {
                CalcRange(0, m_BufSize, 0, m_BufSize); // complexity: O(m*log(m))

                m_PrevSSM.swap(m_CurrSSM);
                m_CBPtr += m_BufSize;
            }
        }
    }

    SizeT                   m_M, m_K, m_BufSize;
    ValueCountPairContainer m_CumulValues;

    std::vector<double> m_PrevSSM;
    std::vector<double> m_CurrSSM;
    std::vector<SizeT>               m_CB;
    std::vector<SizeT>::iterator     m_CBPtr;

    SizeT                  m_NrCompletedRows;
};

// GetValueCountPairs
//
// GetValueCountPairs sorts chunks of values and then merges them in order to minimize extra memory and work when many values are equal.
// This is done recursively while retaining used intermediary buffers in order to minimize heap allocations.

const SizeT BUFFER_SIZE = 1024;

void GetCountsDirect(ValueCountPairContainer& vcpc, const double* values, SizeT size)
{
    assert(size <= BUFFER_SIZE);
    assert(size > 0);
    assert(vcpc.empty());

    double buffer[BUFFER_SIZE];

    std::copy(values, values+size, buffer);
    std::sort(buffer, buffer+size);

    double currValue = buffer[0];
    SizeT     currCount = 1;
    for (SizeT index = 1; index != size; ++index)
    {
        if (currValue < buffer[index])
        {
            vcpc.push_back(ValueCountPair(currValue, currCount));
            currValue = buffer[index];
            currCount = 1;
        }
        else
            ++currCount;
    }
    vcpc.push_back(ValueCountPair(currValue, currCount));
}

struct CompareFirst
{
    bool operator () (const ValueCountPair& lhs, const ValueCountPair& rhs)
    {
        return lhs.first < rhs.first;
    }
};

void MergeToLeft(ValueCountPairContainer& vcpcLeft, const ValueCountPairContainer& vcpcRight, ValueCountPairContainer& vcpcDummy)
{
    assert(vcpcDummy.empty());
    vcpcDummy.swap(vcpcLeft);
    vcpcLeft.resize(vcpcRight.size() + vcpcDummy.size());

    std::merge(vcpcRight.begin(), vcpcRight.end(), vcpcDummy.begin(), vcpcDummy.end(), vcpcLeft.begin(), CompareFirst());

    ValueCountPairContainer::iterator
        currPair = vcpcLeft.begin(),
        lastPair = vcpcLeft.end();


    ValueCountPairContainer::iterator index = currPair+1;
    while (index != lastPair && currPair->first < index->first)
    {
        currPair = index;
        ++index;
    }

    double currValue = currPair->first;
    SizeT     currCount = currPair->second;
    for (; index != lastPair;++index)
    {
        if (currValue < index->first)
        {
            *currPair++ = ValueCountPair(currValue, currCount);
            currValue = index->first;
            currCount = index->second;
        }
        else
            currCount += index->second;
    }
    *currPair++ = ValueCountPair(currValue, currCount);
    vcpcLeft.erase(currPair, lastPair);

    vcpcDummy.clear();
}

struct ValueCountPairContainerArray : std::vector<ValueCountPairContainer>
{
    void resize(SizeT k)
    {
        assert(capacity() >= k);
        while (size() < k)
        {
            push_back(ValueCountPairContainer());
            back().reserve(BUFFER_SIZE);
        }
    }

    void GetValueCountPairs(ValueCountPairContainer& vcpc, const double* values, SizeT size, unsigned int nrUsedContainers)
    {
        assert(vcpc.empty());
        if (size <= BUFFER_SIZE)
            GetCountsDirect(vcpc, values, size);
        else
        {
            resize(nrUsedContainers+2);

            unsigned int m = size/2;

            GetValueCountPairs(vcpc, values, m, nrUsedContainers);
            GetValueCountPairs(begin()[nrUsedContainers], values + m, size - m, nrUsedContainers+1);

            MergeToLeft(vcpc, begin()[nrUsedContainers], begin()[nrUsedContainers+1]);
            begin()[nrUsedContainers].clear();
        }
        assert(GetTotalCount(vcpc) == size);
    }
};

void GetValueCountPairs(ValueCountPairContainer& vcpc, const double* values, SizeT n)
{
    vcpc.clear();

    if(n)
    {
        ValueCountPairContainerArray vcpca;
        // max nr halving is log2(max cardinality / BUFFER_SIZE); max cardinality is SizeT(-1)
        vcpca.reserve(3+8*sizeof(SizeT)-10);
        vcpca.GetValueCountPairs(vcpc, values, n, 0);

        assert(vcpc.size());
    }
}

void ClassifyJenksFisherFromValueCountPairs(LimitsContainer& breaksArray, SizeT k, const ValueCountPairContainer& vcpc)
{
    breaksArray.resize(k);
    SizeT m  = vcpc.size();

    assert(k <= m); // PRECONDITION

    if (!k)
        return;

    JenksFisher jf(vcpc,  k);

    if (k > 1)
    {
        jf.CalcAll();

        SizeT lastClassBreakIndex = jf.FindMaxBreakIndex(jf.m_BufSize-1, 0, jf.m_BufSize);

        while (--k)
        {
            breaksArray[k] = vcpc[lastClassBreakIndex+k].first;
            assert(lastClassBreakIndex < jf.m_BufSize);
            if (k > 1)
            {
                jf.m_CBPtr -= jf.m_BufSize;
                lastClassBreakIndex = jf.m_CBPtr[lastClassBreakIndex];
            }
        }
        assert(jf.m_CBPtr == jf.m_CB.begin());
    }
    assert( k == 0 );
    breaksArray[0] = vcpc[0].first; // break for the first class is the minimum of the dataset.
}


// test code

#include "boost/random.hpp"


int main(int c, char** argv)
{
    const double rangeMin = 0.0;
    const double rangeMax = 10.0;
    typedef boost::uniform_real<double> NumberDistribution;
    typedef boost::mt19937 RandomNumberGenerator;
    typedef boost::variate_generator<RandomNumberGenerator&, NumberDistribution> Generator;

    NumberDistribution distribution(rangeMin, rangeMax);
    RandomNumberGenerator generator;
    generator.seed(0); // seed with the current time
    Generator numberGenerator(generator, distribution);

    const int n = 1000000;
    const int k = 10;

    std::cout << "Generating random numbers..." << std::endl;

    std::vector<double> values;
    values.reserve(n);
    for (int i=0; i!=n; ++i)
    {
        double v = numberGenerator();
        values.push_back(v*v); //populate a distribuiton slightly more interesting than uniform, with a lower density at higher values.
    }
    assert(values.size() == n);

    std::cout << "Generating sortedUniqueValueCounts ..." << std::endl;
    ValueCountPairContainer sortedUniqueValueCounts;
    GetValueCountPairs(sortedUniqueValueCounts, &values[0], n);

    std::cout << "Finding Jenks ClassBreaks..." << std::endl;
    LimitsContainer resultingbreaksArray;
    ClassifyJenksFisherFromValueCountPairs(resultingbreaksArray, k, sortedUniqueValueCounts);

    std::cout << "Reporting results..." << std::endl;
    for (double breakValue: resultingbreaksArray)
        std::cout << breakValue << std::endl << std::endl;

    std::cout << "Press a char and <enter> to terminate" << std::endl;
    char ch;
    std::cin >> ch; // wait for user to enter a key
} // main

## JenksFisher.js
/*
* https://github.com/pschoepf/naturalbreaks/blob/master/src/main/javascript/JenksFisher.js
* Port of Jenks/Fisher breaks originally created in C by Maarten Hilferink.
*
* Copyright (C) {2015}  {Philipp Schoepf}
*
*    This program is free software: you can redistribute it and/or modify
*    it under the terms of the GNU General Public License as published by
*    the Free Software Foundation, either version 3 of the License, or
*    (at your option) any later version.
*
*    This program is distributed in the hope that it will be useful,
*    but WITHOUT ANY WARRANTY; without even the implied warranty of
*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*    GNU General Public License for more details.
*
*    You should have received a copy of the GNU General Public License
*    along with this program.  If not, see <http://www.gnu.org/licenses/>.
**/

/**
 * Internal helper used for assertions.
 *
 * @param  {boolean} correct The boolean input.
 * @param  {String} msg     Message to be thrown.
 */
function assert(correct, msg) {
    //alert("correct="+correct+",\n\nactual="+actual);
    if (!correct)
        throw new Error("ASSERTION FAILED: " + msg);
}

/**
 * Internal helper to swap two arrays.
 *
 * @param  {Array} a Array one.
 * @param  {Array} b Array two and the holder of the result.
 */
function swapArrays(a, b) {
    var temp = a.slice(0);
    a.length = 0;
    a.push.apply(a, b);
    b.length = 0;
    b.push.apply(b, temp);
}

/**
* Constructor that initializes main variables used in fisher calculation of natural breaks.
*
* @param vcpc Ordered list of pairs of values to occurrence counts.
* @param k Number of breaks to find.
*/
function JenksFisher(/*array of {val:double, count:double}*/ vcpc, /*int*/ k) {
  if (!(this instanceof JenksFisher)) {
    return new JenksFisher();
  }
  this.cumulValues = [/*{wv: weighted value as double,cwv: cummul weighted value as double}*/];
  this.numValues = 0;
  this.numBreaks = 0;
  this.bufferSize = 0;
  this.previousSSM = [];
  this.currentSSM = [];
  this.classBreaks = [];
  this.classBreaksIndex = 0;
  this.completedRows = 0;
  this.cumulValues.length = [],
          this.numValues = vcpc.length;
  this.numBreaks = k,
          this.bufferSize = (vcpc.length - (k - 1)),
          this.previousSSM = new Array(this.bufferSize),
          this.currentSSM = new Array(this.bufferSize),
          this.classBreaks = new Array(this.bufferSize * (this.numBreaks - 1)),
          this.classBreaksIndex = 0, this.completedRows = 0;
  var cwv = 0.0;
  var cw = 0, w = 0;
  //console.debug("init:: values and weight:"+vcpc.length+", buffSize:"+this.bufferSize+", content:"+JSON.stringify(vcpc));
  for (var i = 0; i != this.numValues; ++i)
  {
      assert(!i || vcpc[i].val >= vcpc[i - 1].val); // PRECONDITION: the value sequence must be strictly increasing

      w = vcpc[i].count;
      assert(w > 0); // PRECONDITION: all weights must be positive

      cw += w;
      assert(cw >= w); // No overflow? No loss of precision?

      cwv += w * vcpc[i].val;
      this.cumulValues.push({cwv: cwv, cw: cw});
      if (i < this.bufferSize)
          this.previousSSM[i] = cwv * cwv / cw; // prepare SSM for first class. Last (k-1) values are omitted
  }
  //console.debug("init:: CumulValues:"+this.cumulValues);
  //console.debug("init:: previousSSM:"+this.previousSSM)
}


/**
 * Gets sum of weighs for elements with index b..e.
 *
 * @param b index of begin element
 * @param e index of end element
 * @return sum of weights.
 */
JenksFisher.prototype.getSumOfWeights = function (/*int*/ b, /*int*/ e)
        {
            assert(b);    // First element always belongs to class 0, thus queries should never include it.
            assert(b <= e);
            assert(e < this.numValues);

            var res = this.cumulValues[e].cw;
            res -= this.cumulValues[b - 1].cw;
            return res;
        }
/**
 * Gets sum of weighed values for elements with index b..e
 *
 * @param b index of begin element.
 * @param e index of end element
 * @return the cumul. sum of the values*weight
 */
JenksFisher.prototype.getSumOfWeightedValues = function (/*int*/ b, /*int*/ e)
        // Gets sum of weighed values for elements b..e
        {
            assert(b);
            assert(b <= e);
            assert(e < this.numValues);

            var res = this.cumulValues[e].cwv;
            res -= this.cumulValues[b - 1].cwv;
            return res;
        }
/**
 * Gets the Squared Mean for elements within index b..e, multiplied by weight. Note that
 * n*mean^2 = sum^2/n when mean := sum/n
 *
 * @param b index of begin element
 * @param e index of end element
 * @return the sum of squared mean
 */
JenksFisher.prototype.getSSM = function (/*int*/ b, /*int*/ e)
{
    var res = this.getSumOfWeightedValues(b, e);
    return res * res / this.getSumOfWeights(b, e);
}
/**
 * Finds CB[i+completedRows] given that the result is at least
 * bp+(completedRows-1) and less than ep+(completedRows-1)
 * Complexity: O(ep-bp) <= O(m) @
 *
 * @param i startIndex
 * @param bp endindex
 * @param ep
 *
 * @return the index
 */
JenksFisher.prototype.findMaxBreakIndex = function (/*int*/ i, /*int*/ bp, /*int*/ ep)
{
    assert(bp < ep);
    assert(bp <= i);
    assert(ep <= i + 1);
    assert(i < this.bufferSize);
    assert(ep <= this.bufferSize);

    var minSSM = this.previousSSM[bp] + this.getSSM(bp + this.completedRows, i + this.completedRows);
    var foundP = bp;
    while (++bp < ep)
    {
        var currSSM = this.previousSSM[bp] + this.getSSM(bp + this.completedRows, i + this.completedRows);
        if (currSSM > minSSM)
        {
            //console.debug("findMaxBreakIndex:: new minSSM: " + minSSM+ ", currSSM:"+ currSSM);
            minSSM = currSSM;

            foundP = bp;
        }
    }
    this.currentSSM[i] = minSSM;
    return foundP;
},

/**
 * Find CB[i+completedRows] for all i>=bi and i<ei given that the
 * results are at least bp+(completedRows-1) and less than
 * ep+(completedRows-1)
 * Complexity: O(log(ei-bi)*Max((ei-bi),(ep-bp)))
 * <= O(m*log(m))
 *
 *
 * @param bi
 * @param ei
 * @param bp
 * @param ep
 * @return
 */
JenksFisher.prototype.calcRange = function (/*int*/ bi, /*int*/ ei, /*int*/ bp, /*int*/ ep)
{
    assert(bi <= ei);

    assert(ep <= ei);
    assert(bp <= bi);

    if (bi === ei)
        return;
    assert(bp < ep);

    var mi = Math.floor((bi + ei) / 2);
    var mp = this.findMaxBreakIndex(mi, bp, Math.min(ep, mi + 1));

    assert(bp <= mp);
    assert(mp < ep);
    assert(mp <= mi);

    // solve first half of the sub-problems with lower 'half' of possible outcomes
    this.calcRange(bi, mi, bp, Math.min(mi, mp + 1));

    this.classBreaks[this.classBreaksIndex + mi ] = mp; // store result for the middle element.

    // solve second half of the sub-problems with upper 'half' of possible outcomes
    this.calcRange(mi + 1, ei, mp, ep);
},
/**
* Starting point of calculation of breaks.
*
* complexity: O(m*log(m)*k)
*/
JenksFisher.prototype.calcAll = function ()
    {
        if (this.numBreaks >= 2)
        {
            this.classBreaksIndex = 0;
            for (this.completedRows = 1; this.completedRows < this.numBreaks - 1; ++this.completedRows)
            {
                this.calcRange(0, this.bufferSize, 0, this.bufferSize); // complexity: O(m*log(m))

                swapArrays(this.previousSSM, this.currentSSM);
                this.classBreaksIndex += this.bufferSize;
            }
        }
    }

/**
*  Does the internal processing to actually create the breaks.
*
* @param k number of breaks
* @param vcpc asc ordered input of values and their occurence counts.
*/
function classifyJenksFisherFromValueCountPairs(/*double[]*/ breaksArray, /*int*/ k, /*{val:value,count:count}*/ vcpc)
{
    var m = vcpc.length;
    assert(k <= m); // PRECONDITION
    if (!k)
        return;
    var jf = new JenksFisher(vcpc, k);
    if (k > 1) {
        // runs the actual calculation
        jf.calcAll();
        var lastClassBreakIndex = jf.findMaxBreakIndex(jf.bufferSize - 1, 0, jf.bufferSize);
        while (--k) {
            // assign the break values to the result
            breaksArray[k] = vcpc[lastClassBreakIndex + k].val;
            assert(lastClassBreakIndex < jf.bufferSize);
            if (k > 1)
            {
                jf.classBreaksIndex -= jf.bufferSize;
                lastClassBreakIndex = jf.classBreaks[jf.classBreaksIndex + lastClassBreakIndex];
            }
        }
        assert(jf.classBreaks[jf.classBreaksIndex] === jf.classBreaks[0]);
    }
    assert(k === 0);
    breaksArray[0] = vcpc[0].val; // break for the first class is the minimum of the dataset.
}

/**
 * Main entry point for creation of Jenks-Fisher natural breaks.
 *
 * @param {type} values - array of the values, do not need to be sorted.
 * @param {type} k - number of breaks to create
 * @returns {Array} - Array with breaks
 */
function createJenksFisherBreaksArray(/*double*/ values,/*int*/ k)
{
    // create pair of value->number of occurences (weight) which is input for the JF- algorithm
    var sortedUniqueValueCounts = getValueCountPairs(values);

    var breaksArray = new Array(k);
    if (sortedUniqueValueCounts.length>k){
         classifyJenksFisherFromValueCountPairs(breaksArray, k, sortedUniqueValueCounts);
    }
    else {
        breaksArray = [];
        sortedUniqueValueCounts.forEach(function(el){
            breaksArray.push(Math.abs(el.val))});
    }
    return breaksArray;
}
/**
 * create list with weighted unique values, sorted ASC by values.
 * @param {type} rawValues
 * @returns [{val:value as int,count:weight as int},...]
 */
function getValueCountPairs(rawValues) {
    var result = {};
    rawValues.forEach(function (el) {
        var k = "k_"+Number(el).toString();
        if (result[k]=="undefined"|| result[k] == null){
            result[k] = 1;
        }else{
            result[k] = result[k] + 1;
        }
    });
    var sortedResults = [];
    Object.keys(result).forEach(function (key) {
        sortedResults.push({val: Number(key.substring(2)), count: result[key]});
    });
    sortedResults.sort(function (a, b) {
        return a.val - b.val;
    });
    return sortedResults;
}
	/*
	copyright
	This code is written by Maarten Hilferink, © Object Vision BV, and is provided
	under GNU GPL v3.0 license

	https://www.geodms.nl/CalcNaturalBreaks

	This page contains C++ code of a O(k×n×log(n)) method for the classification of
	an array of numeric values such that the variance per class is minimal, known as
	a Fisher's Natural Breaks Classification. This algorithm is an improvement of
	Jenks' Natural Breaks Optimization method, which is an O(k×n2) algorithm. See:
	Fisher's Natural Breaks Classification complexity proof

	This code is derived from clc/dll/src1/CalcClassBreaks.cpp in our Subversion
	Repository which implements the ClassifyJenksFisher operation in the [GeoDms].
	It is independently compilable and runnable and finds the JenksFisher
	class-breaks for an array of random numbers and made that available here.
	*/

	#include <algorithm>
	#include <assert.h>
	#include <iostream>
	#include <vector>

	typedef std::size_t SizeT;
	typedef SizeT CountType;
	typedef std::pair<double, CountType> ValueCountPair;
	typedef std::vector<double> LimitsContainer;
	typedef std::vector<ValueCountPair> ValueCountPairContainer;

	// helper funcs
	template <typename T> T Min(T a, T b) { return (a<b) ? a : b; }

	SizeT GetTotalCount(const ValueCountPairContainer& vcpc)
	{
	SizeT sum = 0;
	ValueCountPairContainer::const_iterator
	i = vcpc.begin(),
	e = vcpc.end();
	for(sum = 0; i!=e; ++i)
	sum += (*i).second;
	return sum;
	}

	// helper struct JenksFisher

	struct JenksFisher // captures the intermediate data and methods for the calculation of Natural Class Breaks.
	{
	JenksFisher(const ValueCountPairContainer& vcpc, SizeT k)
	: m_M(vcpc.size())
	, m_K(k)
	, m_BufSize(vcpc.size()-(k-1))
	, m_PrevSSM(m_BufSize)
	, m_CurrSSM(m_BufSize)
	, m_CB(m_BufSize * (m_K-1))
	, m_CBPtr()
	{
	m_CumulValues.reserve(vcpc.size());
	double cwv=0;
	CountType cw = 0, w;

	for(SizeT i=0; i!=m_M; ++i)
	{
	assert(!i \|\| vcpc[i].first > vcpc[i-1].first); // PRECONDITION: the value sequence must be strictly increasing

	w = vcpc[i].second;
	assert(w > 0); // PRECONDITION: all weights must be positive

	cw += w;
	assert(cw > w \|\| !i); // No overflow? No loss of precision?

	cwv += w * vcpc[i].first;
	m_CumulValues.push_back(ValueCountPair(cwv, cw));
	if (i < m_BufSize)
	m_PrevSSM[i] = cwv * cwv / cw; // prepare SSM for first class. Last (k-1) values are omitted
	}
	}

	double GetW(SizeT b, SizeT e)
	// Gets sum of weighs for elements b..e.
	{
	assert(b); // First element always belongs to class 0, thus queries should never include it.
	assert(b<=e);
	assert(e<m_M);

	double res = m_CumulValues[e].second;
	res -= m_CumulValues[b-1].second;
	return res;
	}

	double GetWV(SizeT b, SizeT e)
	// Gets sum of weighed values for elements b..e
	{
	assert(b);
	assert(b<=e);
	assert(e<m_M);

	double res = m_CumulValues[e].first;
	res -= m_CumulValues[b-1].first;
	return res;
	}

	double GetSSM(SizeT b, SizeT e)
	// Gets the Squared Mean for elements b..e, multiplied by weight.
	// Note that n*mean^2 = sum^2/n when mean := sum/n
	{
	double res = GetWV(b,e);
	return res * res / GetW(b,e);
	}

	SizeT FindMaxBreakIndex(SizeT i, SizeT bp, SizeT ep)
	// finds CB[i+m_NrCompletedRows] given that
	// the result is at least bp+(m_NrCompletedRows-1)
	// and less than ep+(m_NrCompletedRows-1)
	// Complexity: O(ep-bp) <= O(m)
	{
	assert(bp < ep);
	assert(bp <= i);
	assert(ep <= i+1);
	assert(i < m_BufSize);
	assert(ep <= m_BufSize);

	double minSSM = m_PrevSSM[bp] + GetSSM(bp+m_NrCompletedRows, i+m_NrCompletedRows);
	SizeT foundP = bp;
	while (++bp < ep)
	{
	double currSSM = m_PrevSSM[bp] + GetSSM(bp+m_NrCompletedRows, i+m_NrCompletedRows);
	if (currSSM > minSSM)
	{
	minSSM = currSSM;
	foundP = bp;
	}
	}
	m_CurrSSM[i] = minSSM;
	return foundP;
	}

	void CalcRange(SizeT bi, SizeT ei, SizeT bp, SizeT ep)
	// find CB[i+m_NrCompletedRows]
	// for all i>=bi and i<ei given that
	// the results are at least bp+(m_NrCompletedRows-1)
	// and less than ep+(m_NrCompletedRows-1)
	// Complexity: O(log(ei-bi)Max((ei-bi),(ep-bp))) <= O(mlog(m))
	{
	assert(bi <= ei);

	assert(ep <= ei);
	assert(bp <= bi);

	if (bi == ei)
	return;
	assert(bp < ep);

	SizeT mi = (bi + ei)/2;
	SizeT mp = FindMaxBreakIndex(mi, bp, Min<SizeT>(ep, mi+1));

	assert(bp <= mp);
	assert(mp < ep);
	assert(mp <= mi);

	// solve first half of the sub-problems with lower 'half' of possible outcomes
	CalcRange(bi, mi, bp, Min<SizeT>(mi, mp+1));

	m_CBPtr[ mi ] = mp; // store result for the middle element.

	// solve second half of the sub-problems with upper 'half' of possible outcomes
	CalcRange(mi+1, ei, mp, ep);
	}


	void CalcAll()
	// complexity: O(mlog(m)k)
	{
	if (m_K>=2)
	{
	m_CBPtr = m_CB.begin();
	for (m_NrCompletedRows=1; m_NrCompletedRows<m_K-1; ++m_NrCompletedRows)
	{
	CalcRange(0, m_BufSize, 0, m_BufSize); // complexity: O(m*log(m))

	m_PrevSSM.swap(m_CurrSSM);
	m_CBPtr += m_BufSize;
	}
	}
	}

	SizeT m_M, m_K, m_BufSize;
	ValueCountPairContainer m_CumulValues;

	std::vector<double> m_PrevSSM;
	std::vector<double> m_CurrSSM;
	std::vector<SizeT> m_CB;
	std::vector<SizeT>::iterator m_CBPtr;

	SizeT m_NrCompletedRows;
	};

	// GetValueCountPairs
	//
	// GetValueCountPairs sorts chunks of values and then merges them in order to minimize extra memory and work when many values are equal.
	// This is done recursively while retaining used intermediary buffers in order to minimize heap allocations.

	const SizeT BUFFER_SIZE = 1024;

	void GetCountsDirect(ValueCountPairContainer& vcpc, const double* values, SizeT size)
	{
	assert(size <= BUFFER_SIZE);
	assert(size > 0);
	assert(vcpc.empty());

	double buffer[BUFFER_SIZE];

	std::copy(values, values+size, buffer);
	std::sort(buffer, buffer+size);

	double currValue = buffer[0];
	SizeT currCount = 1;
	for (SizeT index = 1; index != size; ++index)
	{
	if (currValue < buffer[index])
	{
	vcpc.push_back(ValueCountPair(currValue, currCount));
	currValue = buffer[index];
	currCount = 1;
	}
	else
	++currCount;
	}
	vcpc.push_back(ValueCountPair(currValue, currCount));
	}

	struct CompareFirst
	{
	bool operator () (const ValueCountPair& lhs, const ValueCountPair& rhs)
	{
	return lhs.first < rhs.first;
	}
	};

	void MergeToLeft(ValueCountPairContainer& vcpcLeft, const ValueCountPairContainer& vcpcRight, ValueCountPairContainer& vcpcDummy)
	{
	assert(vcpcDummy.empty());
	vcpcDummy.swap(vcpcLeft);
	vcpcLeft.resize(vcpcRight.size() + vcpcDummy.size());

	std::merge(vcpcRight.begin(), vcpcRight.end(), vcpcDummy.begin(), vcpcDummy.end(), vcpcLeft.begin(), CompareFirst());

	ValueCountPairContainer::iterator
	currPair = vcpcLeft.begin(),
	lastPair = vcpcLeft.end();


	ValueCountPairContainer::iterator index = currPair+1;
	while (index != lastPair && currPair->first < index->first)
	{
	currPair = index;
	++index;
	}

	double currValue = currPair->first;
	SizeT currCount = currPair->second;
	for (; index != lastPair;++index)
	{
	if (currValue < index->first)
	{
	*currPair++ = ValueCountPair(currValue, currCount);
	currValue = index->first;
	currCount = index->second;
	}
	else
	currCount += index->second;
	}
	*currPair++ = ValueCountPair(currValue, currCount);
	vcpcLeft.erase(currPair, lastPair);

	vcpcDummy.clear();
	}

	struct ValueCountPairContainerArray : std::vector<ValueCountPairContainer>
	{
	void resize(SizeT k)
	{
	assert(capacity() >= k);
	while (size() < k)
	{
	push_back(ValueCountPairContainer());
	back().reserve(BUFFER_SIZE);
	}
	}

	void GetValueCountPairs(ValueCountPairContainer& vcpc, const double* values, SizeT size, unsigned int nrUsedContainers)
	{
	assert(vcpc.empty());
	if (size <= BUFFER_SIZE)
	GetCountsDirect(vcpc, values, size);
	else
	{
	resize(nrUsedContainers+2);

	unsigned int m = size/2;

	GetValueCountPairs(vcpc, values, m, nrUsedContainers);
	GetValueCountPairs(begin()[nrUsedContainers], values + m, size - m, nrUsedContainers+1);

	MergeToLeft(vcpc, begin()[nrUsedContainers], begin()[nrUsedContainers+1]);
	begin()[nrUsedContainers].clear();
	}
	assert(GetTotalCount(vcpc) == size);
	}
	};

	void GetValueCountPairs(ValueCountPairContainer& vcpc, const double* values, SizeT n)
	{
	vcpc.clear();

	if(n)
	{
	ValueCountPairContainerArray vcpca;
	// max nr halving is log2(max cardinality / BUFFER_SIZE); max cardinality is SizeT(-1)
	vcpca.reserve(3+8*sizeof(SizeT)-10);
	vcpca.GetValueCountPairs(vcpc, values, n, 0);

	assert(vcpc.size());
	}
	}

	void ClassifyJenksFisherFromValueCountPairs(LimitsContainer& breaksArray, SizeT k, const ValueCountPairContainer& vcpc)
	{
	breaksArray.resize(k);
	SizeT m = vcpc.size();

	assert(k <= m); // PRECONDITION

	if (!k)
	return;

	JenksFisher jf(vcpc, k);

	if (k > 1)
	{
	jf.CalcAll();

	SizeT lastClassBreakIndex = jf.FindMaxBreakIndex(jf.m_BufSize-1, 0, jf.m_BufSize);

	while (--k)
	{
	breaksArray[k] = vcpc[lastClassBreakIndex+k].first;
	assert(lastClassBreakIndex < jf.m_BufSize);
	if (k > 1)
	{
	jf.m_CBPtr -= jf.m_BufSize;
	lastClassBreakIndex = jf.m_CBPtr[lastClassBreakIndex];
	}
	}
	assert(jf.m_CBPtr == jf.m_CB.begin());
	}
	assert( k == 0 );
	breaksArray[0] = vcpc[0].first; // break for the first class is the minimum of the dataset.
	}


	// test code

	#include "boost/random.hpp"


	int main(int c, char** argv)
	{
	const double rangeMin = 0.0;
	const double rangeMax = 10.0;
	typedef boost::uniform_real<double> NumberDistribution;
	typedef boost::mt19937 RandomNumberGenerator;
	typedef boost::variate_generator<RandomNumberGenerator&, NumberDistribution> Generator;

	NumberDistribution distribution(rangeMin, rangeMax);
	RandomNumberGenerator generator;
	generator.seed(0); // seed with the current time
	Generator numberGenerator(generator, distribution);

	const int n = 1000000;
	const int k = 10;

	std::cout << "Generating random numbers..." << std::endl;

	std::vector<double> values;
	values.reserve(n);
	for (int i=0; i!=n; ++i)
	{
	double v = numberGenerator();
	values.push_back(v*v); //populate a distribuiton slightly more interesting than uniform, with a lower density at higher values.
	}
	assert(values.size() == n);

	std::cout << "Generating sortedUniqueValueCounts ..." << std::endl;
	ValueCountPairContainer sortedUniqueValueCounts;
	GetValueCountPairs(sortedUniqueValueCounts, &values[0], n);

	std::cout << "Finding Jenks ClassBreaks..." << std::endl;
	LimitsContainer resultingbreaksArray;
	ClassifyJenksFisherFromValueCountPairs(resultingbreaksArray, k, sortedUniqueValueCounts);

	std::cout << "Reporting results..." << std::endl;
	for (double breakValue: resultingbreaksArray)
	std::cout << breakValue << std::endl << std::endl;

	std::cout << "Press a char and <enter> to terminate" << std::endl;
	char ch;
	std::cin >> ch; // wait for user to enter a key
	} // main
	/*
	* https://github.com/pschoepf/naturalbreaks/blob/master/src/main/javascript/JenksFisher.js
	* Port of Jenks/Fisher breaks originally created in C by Maarten Hilferink.
	*
	* Copyright (C) {2015} {Philipp Schoepf}
	*
	* This program is free software: you can redistribute it and/or modify
	* it under the terms of the GNU General Public License as published by
	* the Free Software Foundation, either version 3 of the License, or
	* (at your option) any later version.
	*
	* This program is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	* GNU General Public License for more details.
	*
	* You should have received a copy of the GNU General Public License
	* along with this program. If not, see <http://www.gnu.org/licenses/>.
	**/

	/**
	* Internal helper used for assertions.
	*
	* @param {boolean} correct The boolean input.
	* @param {String} msg Message to be thrown.
	*/
	function assert(correct, msg) {
	//alert("correct="+correct+",\n\nactual="+actual);
	if (!correct)
	throw new Error("ASSERTION FAILED: " + msg);
	}

	/**
	* Internal helper to swap two arrays.
	*
	* @param {Array} a Array one.
	* @param {Array} b Array two and the holder of the result.
	*/
	function swapArrays(a, b) {
	var temp = a.slice(0);
	a.length = 0;
	a.push.apply(a, b);
	b.length = 0;
	b.push.apply(b, temp);
	}

	/**
	* Constructor that initializes main variables used in fisher calculation of natural breaks.
	*
	* @param vcpc Ordered list of pairs of values to occurrence counts.
	* @param k Number of breaks to find.
	*/
	function JenksFisher(/array of {val:double, count:double}/ vcpc, /int/ k) {
	if (!(this instanceof JenksFisher)) {
	return new JenksFisher();
	}
	this.cumulValues = [/{wv: weighted value as double,cwv: cummul weighted value as double}/];
	this.numValues = 0;
	this.numBreaks = 0;
	this.bufferSize = 0;
	this.previousSSM = [];
	this.currentSSM = [];
	this.classBreaks = [];
	this.classBreaksIndex = 0;
	this.completedRows = 0;
	this.cumulValues.length = [],
	this.numValues = vcpc.length;
	this.numBreaks = k,
	this.bufferSize = (vcpc.length - (k - 1)),
	this.previousSSM = new Array(this.bufferSize),
	this.currentSSM = new Array(this.bufferSize),
	this.classBreaks = new Array(this.bufferSize * (this.numBreaks - 1)),
	this.classBreaksIndex = 0, this.completedRows = 0;
	var cwv = 0.0;
	var cw = 0, w = 0;
	//console.debug("init:: values and weight:"+vcpc.length+", buffSize:"+this.bufferSize+", content:"+JSON.stringify(vcpc));
	for (var i = 0; i != this.numValues; ++i)
	{
	assert(!i \|\| vcpc[i].val >= vcpc[i - 1].val); // PRECONDITION: the value sequence must be strictly increasing

	w = vcpc[i].count;
	assert(w > 0); // PRECONDITION: all weights must be positive

	cw += w;
	assert(cw >= w); // No overflow? No loss of precision?

	cwv += w * vcpc[i].val;
	this.cumulValues.push({cwv: cwv, cw: cw});
	if (i < this.bufferSize)
	this.previousSSM[i] = cwv * cwv / cw; // prepare SSM for first class. Last (k-1) values are omitted
	}
	//console.debug("init:: CumulValues:"+this.cumulValues);
	//console.debug("init:: previousSSM:"+this.previousSSM)
	}


	/**
	* Gets sum of weighs for elements with index b..e.
	*
	* @param b index of begin element
	* @param e index of end element
	* @return sum of weights.
	*/
	JenksFisher.prototype.getSumOfWeights = function (/int/ b, /int/ e)
	{
	assert(b); // First element always belongs to class 0, thus queries should never include it.
	assert(b <= e);
	assert(e < this.numValues);

	var res = this.cumulValues[e].cw;
	res -= this.cumulValues[b - 1].cw;
	return res;
	}
	/**
	* Gets sum of weighed values for elements with index b..e
	*
	* @param b index of begin element.
	* @param e index of end element
	* @return the cumul. sum of the values*weight
	*/
	JenksFisher.prototype.getSumOfWeightedValues = function (/int/ b, /int/ e)
	// Gets sum of weighed values for elements b..e
	{
	assert(b);
	assert(b <= e);
	assert(e < this.numValues);

	var res = this.cumulValues[e].cwv;
	res -= this.cumulValues[b - 1].cwv;
	return res;
	}
	/**
	* Gets the Squared Mean for elements within index b..e, multiplied by weight. Note that
	* n*mean^2 = sum^2/n when mean := sum/n
	*
	* @param b index of begin element
	* @param e index of end element
	* @return the sum of squared mean
	*/
	JenksFisher.prototype.getSSM = function (/int/ b, /int/ e)
	{
	var res = this.getSumOfWeightedValues(b, e);
	return res * res / this.getSumOfWeights(b, e);
	}
	/**
	* Finds CB[i+completedRows] given that the result is at least
	* bp+(completedRows-1) and less than ep+(completedRows-1)
	* Complexity: O(ep-bp) <= O(m) @
	*
	* @param i startIndex
	* @param bp endindex
	* @param ep
	*
	* @return the index
	*/
	JenksFisher.prototype.findMaxBreakIndex = function (/int/ i, /int/ bp, /int/ ep)
	{
	assert(bp < ep);
	assert(bp <= i);
	assert(ep <= i + 1);
	assert(i < this.bufferSize);
	assert(ep <= this.bufferSize);

	var minSSM = this.previousSSM[bp] + this.getSSM(bp + this.completedRows, i + this.completedRows);
	var foundP = bp;
	while (++bp < ep)
	{
	var currSSM = this.previousSSM[bp] + this.getSSM(bp + this.completedRows, i + this.completedRows);
	if (currSSM > minSSM)
	{
	//console.debug("findMaxBreakIndex:: new minSSM: " + minSSM+ ", currSSM:"+ currSSM);
	minSSM = currSSM;

	foundP = bp;
	}
	}
	this.currentSSM[i] = minSSM;
	return foundP;
	},

	/**
	* Find CB[i+completedRows] for all i>=bi and i<ei given that the
	* results are at least bp+(completedRows-1) and less than
	* ep+(completedRows-1)
	* Complexity: O(log(ei-bi)*Max((ei-bi),(ep-bp)))
	* <= O(m*log(m))
	*
	*
	* @param bi
	* @param ei
	* @param bp
	* @param ep
	* @return
	*/
	JenksFisher.prototype.calcRange = function (/int/ bi, /int/ ei, /int/ bp, /int/ ep)
	{
	assert(bi <= ei);

	assert(ep <= ei);
	assert(bp <= bi);

	if (bi === ei)
	return;
	assert(bp < ep);

	var mi = Math.floor((bi + ei) / 2);
	var mp = this.findMaxBreakIndex(mi, bp, Math.min(ep, mi + 1));

	assert(bp <= mp);
	assert(mp < ep);
	assert(mp <= mi);

	// solve first half of the sub-problems with lower 'half' of possible outcomes
	this.calcRange(bi, mi, bp, Math.min(mi, mp + 1));

	this.classBreaks[this.classBreaksIndex + mi ] = mp; // store result for the middle element.

	// solve second half of the sub-problems with upper 'half' of possible outcomes
	this.calcRange(mi + 1, ei, mp, ep);
	},
	/**
	* Starting point of calculation of breaks.
	*
	* complexity: O(mlog(m)k)
	*/
	JenksFisher.prototype.calcAll = function ()
	{
	if (this.numBreaks >= 2)
	{
	this.classBreaksIndex = 0;
	for (this.completedRows = 1; this.completedRows < this.numBreaks - 1; ++this.completedRows)
	{
	this.calcRange(0, this.bufferSize, 0, this.bufferSize); // complexity: O(m*log(m))

	swapArrays(this.previousSSM, this.currentSSM);
	this.classBreaksIndex += this.bufferSize;
	}
	}
	}

	/**
	* Does the internal processing to actually create the breaks.
	*
	* @param k number of breaks
	* @param vcpc asc ordered input of values and their occurence counts.
	*/
	function classifyJenksFisherFromValueCountPairs(/double[]/ breaksArray, /int/ k, /{val:value,count:count}/ vcpc)
	{
	var m = vcpc.length;
	assert(k <= m); // PRECONDITION
	if (!k)
	return;
	var jf = new JenksFisher(vcpc, k);
	if (k > 1) {
	// runs the actual calculation
	jf.calcAll();
	var lastClassBreakIndex = jf.findMaxBreakIndex(jf.bufferSize - 1, 0, jf.bufferSize);
	while (--k) {
	// assign the break values to the result
	breaksArray[k] = vcpc[lastClassBreakIndex + k].val;
	assert(lastClassBreakIndex < jf.bufferSize);
	if (k > 1)
	{
	jf.classBreaksIndex -= jf.bufferSize;
	lastClassBreakIndex = jf.classBreaks[jf.classBreaksIndex + lastClassBreakIndex];
	}
	}
	assert(jf.classBreaks[jf.classBreaksIndex] === jf.classBreaks[0]);
	}
	assert(k === 0);
	breaksArray[0] = vcpc[0].val; // break for the first class is the minimum of the dataset.
	}

	/**
	* Main entry point for creation of Jenks-Fisher natural breaks.
	*
	* @param {type} values - array of the values, do not need to be sorted.
	* @param {type} k - number of breaks to create
	* @returns {Array} - Array with breaks
	*/
	function createJenksFisherBreaksArray(/double/ values,/int/ k)
	{
	// create pair of value->number of occurences (weight) which is input for the JF- algorithm
	var sortedUniqueValueCounts = getValueCountPairs(values);

	var breaksArray = new Array(k);
	if (sortedUniqueValueCounts.length>k){
	classifyJenksFisherFromValueCountPairs(breaksArray, k, sortedUniqueValueCounts);
	}
	else {
	breaksArray = [];
	sortedUniqueValueCounts.forEach(function(el){
	breaksArray.push(Math.abs(el.val))});
	}
	return breaksArray;
	}
	/**
	* create list with weighted unique values, sorted ASC by values.
	* @param {type} rawValues
	* @returns [{val:value as int,count:weight as int},...]
	*/
	function getValueCountPairs(rawValues) {
	var result = {};
	rawValues.forEach(function (el) {
	var k = "k_"+Number(el).toString();
	if (result[k]=="undefined"\|\| result[k] == null){
	result[k] = 1;
	}else{
	result[k] = result[k] + 1;
	}
	});
	var sortedResults = [];
	Object.keys(result).forEach(function (key) {
	sortedResults.push({val: Number(key.substring(2)), count: result[key]});
	});
	sortedResults.sort(function (a, b) {
	return a.val - b.val;
	});
	return sortedResults;
	}