Last active
November 11, 2021 01:56
-
-
Save kaizhu256/064f65818c75b3ab5fc2a6309ee19922 to your computer and use it in GitHub Desktop.
This page contains C++ code of a O(k×n×log(n)) method for the classification of an array of numeric values such that the variance per class is minimal, known as a Fisher's Natural Breaks Classification
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
copyright | |
This code is written by Maarten Hilferink, © Object Vision BV, and is provided | |
under GNU GPL v3.0 license | |
https://www.geodms.nl/CalcNaturalBreaks | |
This page contains C++ code of a O(k×n×log(n)) method for the classification of | |
an array of numeric values such that the variance per class is minimal, known as | |
a Fisher's Natural Breaks Classification. This algorithm is an improvement of | |
Jenks' Natural Breaks Optimization method, which is an O(k×n2) algorithm. See: | |
Fisher's Natural Breaks Classification complexity proof | |
This code is derived from clc/dll/src1/CalcClassBreaks.cpp in our Subversion | |
Repository which implements the ClassifyJenksFisher operation in the [GeoDms]. | |
It is independently compilable and runnable and finds the JenksFisher | |
class-breaks for an array of random numbers and made that available here. | |
*/ | |
#include <algorithm> | |
#include <assert.h> | |
#include <iostream> | |
#include <vector> | |
typedef std::size_t SizeT; | |
typedef SizeT CountType; | |
typedef std::pair<double, CountType> ValueCountPair; | |
typedef std::vector<double> LimitsContainer; | |
typedef std::vector<ValueCountPair> ValueCountPairContainer; | |
// helper funcs | |
template <typename T> T Min(T a, T b) { return (a<b) ? a : b; } | |
SizeT GetTotalCount(const ValueCountPairContainer& vcpc) | |
{ | |
SizeT sum = 0; | |
ValueCountPairContainer::const_iterator | |
i = vcpc.begin(), | |
e = vcpc.end(); | |
for(sum = 0; i!=e; ++i) | |
sum += (*i).second; | |
return sum; | |
} | |
// helper struct JenksFisher | |
struct JenksFisher // captures the intermediate data and methods for the calculation of Natural Class Breaks. | |
{ | |
JenksFisher(const ValueCountPairContainer& vcpc, SizeT k) | |
: m_M(vcpc.size()) | |
, m_K(k) | |
, m_BufSize(vcpc.size()-(k-1)) | |
, m_PrevSSM(m_BufSize) | |
, m_CurrSSM(m_BufSize) | |
, m_CB(m_BufSize * (m_K-1)) | |
, m_CBPtr() | |
{ | |
m_CumulValues.reserve(vcpc.size()); | |
double cwv=0; | |
CountType cw = 0, w; | |
for(SizeT i=0; i!=m_M; ++i) | |
{ | |
assert(!i || vcpc[i].first > vcpc[i-1].first); // PRECONDITION: the value sequence must be strictly increasing | |
w = vcpc[i].second; | |
assert(w > 0); // PRECONDITION: all weights must be positive | |
cw += w; | |
assert(cw > w || !i); // No overflow? No loss of precision? | |
cwv += w * vcpc[i].first; | |
m_CumulValues.push_back(ValueCountPair(cwv, cw)); | |
if (i < m_BufSize) | |
m_PrevSSM[i] = cwv * cwv / cw; // prepare SSM for first class. Last (k-1) values are omitted | |
} | |
} | |
double GetW(SizeT b, SizeT e) | |
// Gets sum of weighs for elements b..e. | |
{ | |
assert(b); // First element always belongs to class 0, thus queries should never include it. | |
assert(b<=e); | |
assert(e<m_M); | |
double res = m_CumulValues[e].second; | |
res -= m_CumulValues[b-1].second; | |
return res; | |
} | |
double GetWV(SizeT b, SizeT e) | |
// Gets sum of weighed values for elements b..e | |
{ | |
assert(b); | |
assert(b<=e); | |
assert(e<m_M); | |
double res = m_CumulValues[e].first; | |
res -= m_CumulValues[b-1].first; | |
return res; | |
} | |
double GetSSM(SizeT b, SizeT e) | |
// Gets the Squared Mean for elements b..e, multiplied by weight. | |
// Note that n*mean^2 = sum^2/n when mean := sum/n | |
{ | |
double res = GetWV(b,e); | |
return res * res / GetW(b,e); | |
} | |
SizeT FindMaxBreakIndex(SizeT i, SizeT bp, SizeT ep) | |
// finds CB[i+m_NrCompletedRows] given that | |
// the result is at least bp+(m_NrCompletedRows-1) | |
// and less than ep+(m_NrCompletedRows-1) | |
// Complexity: O(ep-bp) <= O(m) | |
{ | |
assert(bp < ep); | |
assert(bp <= i); | |
assert(ep <= i+1); | |
assert(i < m_BufSize); | |
assert(ep <= m_BufSize); | |
double minSSM = m_PrevSSM[bp] + GetSSM(bp+m_NrCompletedRows, i+m_NrCompletedRows); | |
SizeT foundP = bp; | |
while (++bp < ep) | |
{ | |
double currSSM = m_PrevSSM[bp] + GetSSM(bp+m_NrCompletedRows, i+m_NrCompletedRows); | |
if (currSSM > minSSM) | |
{ | |
minSSM = currSSM; | |
foundP = bp; | |
} | |
} | |
m_CurrSSM[i] = minSSM; | |
return foundP; | |
} | |
void CalcRange(SizeT bi, SizeT ei, SizeT bp, SizeT ep) | |
// find CB[i+m_NrCompletedRows] | |
// for all i>=bi and i<ei given that | |
// the results are at least bp+(m_NrCompletedRows-1) | |
// and less than ep+(m_NrCompletedRows-1) | |
// Complexity: O(log(ei-bi)*Max((ei-bi),(ep-bp))) <= O(m*log(m)) | |
{ | |
assert(bi <= ei); | |
assert(ep <= ei); | |
assert(bp <= bi); | |
if (bi == ei) | |
return; | |
assert(bp < ep); | |
SizeT mi = (bi + ei)/2; | |
SizeT mp = FindMaxBreakIndex(mi, bp, Min<SizeT>(ep, mi+1)); | |
assert(bp <= mp); | |
assert(mp < ep); | |
assert(mp <= mi); | |
// solve first half of the sub-problems with lower 'half' of possible outcomes | |
CalcRange(bi, mi, bp, Min<SizeT>(mi, mp+1)); | |
m_CBPtr[ mi ] = mp; // store result for the middle element. | |
// solve second half of the sub-problems with upper 'half' of possible outcomes | |
CalcRange(mi+1, ei, mp, ep); | |
} | |
void CalcAll() | |
// complexity: O(m*log(m)*k) | |
{ | |
if (m_K>=2) | |
{ | |
m_CBPtr = m_CB.begin(); | |
for (m_NrCompletedRows=1; m_NrCompletedRows<m_K-1; ++m_NrCompletedRows) | |
{ | |
CalcRange(0, m_BufSize, 0, m_BufSize); // complexity: O(m*log(m)) | |
m_PrevSSM.swap(m_CurrSSM); | |
m_CBPtr += m_BufSize; | |
} | |
} | |
} | |
SizeT m_M, m_K, m_BufSize; | |
ValueCountPairContainer m_CumulValues; | |
std::vector<double> m_PrevSSM; | |
std::vector<double> m_CurrSSM; | |
std::vector<SizeT> m_CB; | |
std::vector<SizeT>::iterator m_CBPtr; | |
SizeT m_NrCompletedRows; | |
}; | |
// GetValueCountPairs | |
// | |
// GetValueCountPairs sorts chunks of values and then merges them in order to minimize extra memory and work when many values are equal. | |
// This is done recursively while retaining used intermediary buffers in order to minimize heap allocations. | |
const SizeT BUFFER_SIZE = 1024; | |
void GetCountsDirect(ValueCountPairContainer& vcpc, const double* values, SizeT size) | |
{ | |
assert(size <= BUFFER_SIZE); | |
assert(size > 0); | |
assert(vcpc.empty()); | |
double buffer[BUFFER_SIZE]; | |
std::copy(values, values+size, buffer); | |
std::sort(buffer, buffer+size); | |
double currValue = buffer[0]; | |
SizeT currCount = 1; | |
for (SizeT index = 1; index != size; ++index) | |
{ | |
if (currValue < buffer[index]) | |
{ | |
vcpc.push_back(ValueCountPair(currValue, currCount)); | |
currValue = buffer[index]; | |
currCount = 1; | |
} | |
else | |
++currCount; | |
} | |
vcpc.push_back(ValueCountPair(currValue, currCount)); | |
} | |
struct CompareFirst | |
{ | |
bool operator () (const ValueCountPair& lhs, const ValueCountPair& rhs) | |
{ | |
return lhs.first < rhs.first; | |
} | |
}; | |
void MergeToLeft(ValueCountPairContainer& vcpcLeft, const ValueCountPairContainer& vcpcRight, ValueCountPairContainer& vcpcDummy) | |
{ | |
assert(vcpcDummy.empty()); | |
vcpcDummy.swap(vcpcLeft); | |
vcpcLeft.resize(vcpcRight.size() + vcpcDummy.size()); | |
std::merge(vcpcRight.begin(), vcpcRight.end(), vcpcDummy.begin(), vcpcDummy.end(), vcpcLeft.begin(), CompareFirst()); | |
ValueCountPairContainer::iterator | |
currPair = vcpcLeft.begin(), | |
lastPair = vcpcLeft.end(); | |
ValueCountPairContainer::iterator index = currPair+1; | |
while (index != lastPair && currPair->first < index->first) | |
{ | |
currPair = index; | |
++index; | |
} | |
double currValue = currPair->first; | |
SizeT currCount = currPair->second; | |
for (; index != lastPair;++index) | |
{ | |
if (currValue < index->first) | |
{ | |
*currPair++ = ValueCountPair(currValue, currCount); | |
currValue = index->first; | |
currCount = index->second; | |
} | |
else | |
currCount += index->second; | |
} | |
*currPair++ = ValueCountPair(currValue, currCount); | |
vcpcLeft.erase(currPair, lastPair); | |
vcpcDummy.clear(); | |
} | |
struct ValueCountPairContainerArray : std::vector<ValueCountPairContainer> | |
{ | |
void resize(SizeT k) | |
{ | |
assert(capacity() >= k); | |
while (size() < k) | |
{ | |
push_back(ValueCountPairContainer()); | |
back().reserve(BUFFER_SIZE); | |
} | |
} | |
void GetValueCountPairs(ValueCountPairContainer& vcpc, const double* values, SizeT size, unsigned int nrUsedContainers) | |
{ | |
assert(vcpc.empty()); | |
if (size <= BUFFER_SIZE) | |
GetCountsDirect(vcpc, values, size); | |
else | |
{ | |
resize(nrUsedContainers+2); | |
unsigned int m = size/2; | |
GetValueCountPairs(vcpc, values, m, nrUsedContainers); | |
GetValueCountPairs(begin()[nrUsedContainers], values + m, size - m, nrUsedContainers+1); | |
MergeToLeft(vcpc, begin()[nrUsedContainers], begin()[nrUsedContainers+1]); | |
begin()[nrUsedContainers].clear(); | |
} | |
assert(GetTotalCount(vcpc) == size); | |
} | |
}; | |
void GetValueCountPairs(ValueCountPairContainer& vcpc, const double* values, SizeT n) | |
{ | |
vcpc.clear(); | |
if(n) | |
{ | |
ValueCountPairContainerArray vcpca; | |
// max nr halving is log2(max cardinality / BUFFER_SIZE); max cardinality is SizeT(-1) | |
vcpca.reserve(3+8*sizeof(SizeT)-10); | |
vcpca.GetValueCountPairs(vcpc, values, n, 0); | |
assert(vcpc.size()); | |
} | |
} | |
void ClassifyJenksFisherFromValueCountPairs(LimitsContainer& breaksArray, SizeT k, const ValueCountPairContainer& vcpc) | |
{ | |
breaksArray.resize(k); | |
SizeT m = vcpc.size(); | |
assert(k <= m); // PRECONDITION | |
if (!k) | |
return; | |
JenksFisher jf(vcpc, k); | |
if (k > 1) | |
{ | |
jf.CalcAll(); | |
SizeT lastClassBreakIndex = jf.FindMaxBreakIndex(jf.m_BufSize-1, 0, jf.m_BufSize); | |
while (--k) | |
{ | |
breaksArray[k] = vcpc[lastClassBreakIndex+k].first; | |
assert(lastClassBreakIndex < jf.m_BufSize); | |
if (k > 1) | |
{ | |
jf.m_CBPtr -= jf.m_BufSize; | |
lastClassBreakIndex = jf.m_CBPtr[lastClassBreakIndex]; | |
} | |
} | |
assert(jf.m_CBPtr == jf.m_CB.begin()); | |
} | |
assert( k == 0 ); | |
breaksArray[0] = vcpc[0].first; // break for the first class is the minimum of the dataset. | |
} | |
// test code | |
#include "boost/random.hpp" | |
int main(int c, char** argv) | |
{ | |
const double rangeMin = 0.0; | |
const double rangeMax = 10.0; | |
typedef boost::uniform_real<double> NumberDistribution; | |
typedef boost::mt19937 RandomNumberGenerator; | |
typedef boost::variate_generator<RandomNumberGenerator&, NumberDistribution> Generator; | |
NumberDistribution distribution(rangeMin, rangeMax); | |
RandomNumberGenerator generator; | |
generator.seed(0); // seed with the current time | |
Generator numberGenerator(generator, distribution); | |
const int n = 1000000; | |
const int k = 10; | |
std::cout << "Generating random numbers..." << std::endl; | |
std::vector<double> values; | |
values.reserve(n); | |
for (int i=0; i!=n; ++i) | |
{ | |
double v = numberGenerator(); | |
values.push_back(v*v); //populate a distribuiton slightly more interesting than uniform, with a lower density at higher values. | |
} | |
assert(values.size() == n); | |
std::cout << "Generating sortedUniqueValueCounts ..." << std::endl; | |
ValueCountPairContainer sortedUniqueValueCounts; | |
GetValueCountPairs(sortedUniqueValueCounts, &values[0], n); | |
std::cout << "Finding Jenks ClassBreaks..." << std::endl; | |
LimitsContainer resultingbreaksArray; | |
ClassifyJenksFisherFromValueCountPairs(resultingbreaksArray, k, sortedUniqueValueCounts); | |
std::cout << "Reporting results..." << std::endl; | |
for (double breakValue: resultingbreaksArray) | |
std::cout << breakValue << std::endl << std::endl; | |
std::cout << "Press a char and <enter> to terminate" << std::endl; | |
char ch; | |
std::cin >> ch; // wait for user to enter a key | |
} // main |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* https://github.com/pschoepf/naturalbreaks/blob/master/src/main/javascript/JenksFisher.js | |
* Port of Jenks/Fisher breaks originally created in C by Maarten Hilferink. | |
* | |
* Copyright (C) {2015} {Philipp Schoepf} | |
* | |
* This program is free software: you can redistribute it and/or modify | |
* it under the terms of the GNU General Public License as published by | |
* the Free Software Foundation, either version 3 of the License, or | |
* (at your option) any later version. | |
* | |
* This program is distributed in the hope that it will be useful, | |
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
* GNU General Public License for more details. | |
* | |
* You should have received a copy of the GNU General Public License | |
* along with this program. If not, see <http://www.gnu.org/licenses/>. | |
**/ | |
/** | |
* Internal helper used for assertions. | |
* | |
* @param {boolean} correct The boolean input. | |
* @param {String} msg Message to be thrown. | |
*/ | |
function assert(correct, msg) { | |
//alert("correct="+correct+",\n\nactual="+actual); | |
if (!correct) | |
throw new Error("ASSERTION FAILED: " + msg); | |
} | |
/** | |
* Internal helper to swap two arrays. | |
* | |
* @param {Array} a Array one. | |
* @param {Array} b Array two and the holder of the result. | |
*/ | |
function swapArrays(a, b) { | |
var temp = a.slice(0); | |
a.length = 0; | |
a.push.apply(a, b); | |
b.length = 0; | |
b.push.apply(b, temp); | |
} | |
/** | |
* Constructor that initializes main variables used in fisher calculation of natural breaks. | |
* | |
* @param vcpc Ordered list of pairs of values to occurrence counts. | |
* @param k Number of breaks to find. | |
*/ | |
function JenksFisher(/*array of {val:double, count:double}*/ vcpc, /*int*/ k) { | |
if (!(this instanceof JenksFisher)) { | |
return new JenksFisher(); | |
} | |
this.cumulValues = [/*{wv: weighted value as double,cwv: cummul weighted value as double}*/]; | |
this.numValues = 0; | |
this.numBreaks = 0; | |
this.bufferSize = 0; | |
this.previousSSM = []; | |
this.currentSSM = []; | |
this.classBreaks = []; | |
this.classBreaksIndex = 0; | |
this.completedRows = 0; | |
this.cumulValues.length = [], | |
this.numValues = vcpc.length; | |
this.numBreaks = k, | |
this.bufferSize = (vcpc.length - (k - 1)), | |
this.previousSSM = new Array(this.bufferSize), | |
this.currentSSM = new Array(this.bufferSize), | |
this.classBreaks = new Array(this.bufferSize * (this.numBreaks - 1)), | |
this.classBreaksIndex = 0, this.completedRows = 0; | |
var cwv = 0.0; | |
var cw = 0, w = 0; | |
//console.debug("init:: values and weight:"+vcpc.length+", buffSize:"+this.bufferSize+", content:"+JSON.stringify(vcpc)); | |
for (var i = 0; i != this.numValues; ++i) | |
{ | |
assert(!i || vcpc[i].val >= vcpc[i - 1].val); // PRECONDITION: the value sequence must be strictly increasing | |
w = vcpc[i].count; | |
assert(w > 0); // PRECONDITION: all weights must be positive | |
cw += w; | |
assert(cw >= w); // No overflow? No loss of precision? | |
cwv += w * vcpc[i].val; | |
this.cumulValues.push({cwv: cwv, cw: cw}); | |
if (i < this.bufferSize) | |
this.previousSSM[i] = cwv * cwv / cw; // prepare SSM for first class. Last (k-1) values are omitted | |
} | |
//console.debug("init:: CumulValues:"+this.cumulValues); | |
//console.debug("init:: previousSSM:"+this.previousSSM) | |
} | |
/** | |
* Gets sum of weighs for elements with index b..e. | |
* | |
* @param b index of begin element | |
* @param e index of end element | |
* @return sum of weights. | |
*/ | |
JenksFisher.prototype.getSumOfWeights = function (/*int*/ b, /*int*/ e) | |
{ | |
assert(b); // First element always belongs to class 0, thus queries should never include it. | |
assert(b <= e); | |
assert(e < this.numValues); | |
var res = this.cumulValues[e].cw; | |
res -= this.cumulValues[b - 1].cw; | |
return res; | |
} | |
/** | |
* Gets sum of weighed values for elements with index b..e | |
* | |
* @param b index of begin element. | |
* @param e index of end element | |
* @return the cumul. sum of the values*weight | |
*/ | |
JenksFisher.prototype.getSumOfWeightedValues = function (/*int*/ b, /*int*/ e) | |
// Gets sum of weighed values for elements b..e | |
{ | |
assert(b); | |
assert(b <= e); | |
assert(e < this.numValues); | |
var res = this.cumulValues[e].cwv; | |
res -= this.cumulValues[b - 1].cwv; | |
return res; | |
} | |
/** | |
* Gets the Squared Mean for elements within index b..e, multiplied by weight. Note that | |
* n*mean^2 = sum^2/n when mean := sum/n | |
* | |
* @param b index of begin element | |
* @param e index of end element | |
* @return the sum of squared mean | |
*/ | |
JenksFisher.prototype.getSSM = function (/*int*/ b, /*int*/ e) | |
{ | |
var res = this.getSumOfWeightedValues(b, e); | |
return res * res / this.getSumOfWeights(b, e); | |
} | |
/** | |
* Finds CB[i+completedRows] given that the result is at least | |
* bp+(completedRows-1) and less than ep+(completedRows-1) | |
* Complexity: O(ep-bp) <= O(m) @ | |
* | |
* @param i startIndex | |
* @param bp endindex | |
* @param ep | |
* | |
* @return the index | |
*/ | |
JenksFisher.prototype.findMaxBreakIndex = function (/*int*/ i, /*int*/ bp, /*int*/ ep) | |
{ | |
assert(bp < ep); | |
assert(bp <= i); | |
assert(ep <= i + 1); | |
assert(i < this.bufferSize); | |
assert(ep <= this.bufferSize); | |
var minSSM = this.previousSSM[bp] + this.getSSM(bp + this.completedRows, i + this.completedRows); | |
var foundP = bp; | |
while (++bp < ep) | |
{ | |
var currSSM = this.previousSSM[bp] + this.getSSM(bp + this.completedRows, i + this.completedRows); | |
if (currSSM > minSSM) | |
{ | |
//console.debug("findMaxBreakIndex:: new minSSM: " + minSSM+ ", currSSM:"+ currSSM); | |
minSSM = currSSM; | |
foundP = bp; | |
} | |
} | |
this.currentSSM[i] = minSSM; | |
return foundP; | |
}, | |
/** | |
* Find CB[i+completedRows] for all i>=bi and i<ei given that the | |
* results are at least bp+(completedRows-1) and less than | |
* ep+(completedRows-1) | |
* Complexity: O(log(ei-bi)*Max((ei-bi),(ep-bp))) | |
* <= O(m*log(m)) | |
* | |
* | |
* @param bi | |
* @param ei | |
* @param bp | |
* @param ep | |
* @return | |
*/ | |
JenksFisher.prototype.calcRange = function (/*int*/ bi, /*int*/ ei, /*int*/ bp, /*int*/ ep) | |
{ | |
assert(bi <= ei); | |
assert(ep <= ei); | |
assert(bp <= bi); | |
if (bi === ei) | |
return; | |
assert(bp < ep); | |
var mi = Math.floor((bi + ei) / 2); | |
var mp = this.findMaxBreakIndex(mi, bp, Math.min(ep, mi + 1)); | |
assert(bp <= mp); | |
assert(mp < ep); | |
assert(mp <= mi); | |
// solve first half of the sub-problems with lower 'half' of possible outcomes | |
this.calcRange(bi, mi, bp, Math.min(mi, mp + 1)); | |
this.classBreaks[this.classBreaksIndex + mi ] = mp; // store result for the middle element. | |
// solve second half of the sub-problems with upper 'half' of possible outcomes | |
this.calcRange(mi + 1, ei, mp, ep); | |
}, | |
/** | |
* Starting point of calculation of breaks. | |
* | |
* complexity: O(m*log(m)*k) | |
*/ | |
JenksFisher.prototype.calcAll = function () | |
{ | |
if (this.numBreaks >= 2) | |
{ | |
this.classBreaksIndex = 0; | |
for (this.completedRows = 1; this.completedRows < this.numBreaks - 1; ++this.completedRows) | |
{ | |
this.calcRange(0, this.bufferSize, 0, this.bufferSize); // complexity: O(m*log(m)) | |
swapArrays(this.previousSSM, this.currentSSM); | |
this.classBreaksIndex += this.bufferSize; | |
} | |
} | |
} | |
/** | |
* Does the internal processing to actually create the breaks. | |
* | |
* @param k number of breaks | |
* @param vcpc asc ordered input of values and their occurence counts. | |
*/ | |
function classifyJenksFisherFromValueCountPairs(/*double[]*/ breaksArray, /*int*/ k, /*{val:value,count:count}*/ vcpc) | |
{ | |
var m = vcpc.length; | |
assert(k <= m); // PRECONDITION | |
if (!k) | |
return; | |
var jf = new JenksFisher(vcpc, k); | |
if (k > 1) { | |
// runs the actual calculation | |
jf.calcAll(); | |
var lastClassBreakIndex = jf.findMaxBreakIndex(jf.bufferSize - 1, 0, jf.bufferSize); | |
while (--k) { | |
// assign the break values to the result | |
breaksArray[k] = vcpc[lastClassBreakIndex + k].val; | |
assert(lastClassBreakIndex < jf.bufferSize); | |
if (k > 1) | |
{ | |
jf.classBreaksIndex -= jf.bufferSize; | |
lastClassBreakIndex = jf.classBreaks[jf.classBreaksIndex + lastClassBreakIndex]; | |
} | |
} | |
assert(jf.classBreaks[jf.classBreaksIndex] === jf.classBreaks[0]); | |
} | |
assert(k === 0); | |
breaksArray[0] = vcpc[0].val; // break for the first class is the minimum of the dataset. | |
} | |
/** | |
* Main entry point for creation of Jenks-Fisher natural breaks. | |
* | |
* @param {type} values - array of the values, do not need to be sorted. | |
* @param {type} k - number of breaks to create | |
* @returns {Array} - Array with breaks | |
*/ | |
function createJenksFisherBreaksArray(/*double*/ values,/*int*/ k) | |
{ | |
// create pair of value->number of occurences (weight) which is input for the JF- algorithm | |
var sortedUniqueValueCounts = getValueCountPairs(values); | |
var breaksArray = new Array(k); | |
if (sortedUniqueValueCounts.length>k){ | |
classifyJenksFisherFromValueCountPairs(breaksArray, k, sortedUniqueValueCounts); | |
} | |
else { | |
breaksArray = []; | |
sortedUniqueValueCounts.forEach(function(el){ | |
breaksArray.push(Math.abs(el.val))}); | |
} | |
return breaksArray; | |
} | |
/** | |
* create list with weighted unique values, sorted ASC by values. | |
* @param {type} rawValues | |
* @returns [{val:value as int,count:weight as int},...] | |
*/ | |
function getValueCountPairs(rawValues) { | |
var result = {}; | |
rawValues.forEach(function (el) { | |
var k = "k_"+Number(el).toString(); | |
if (result[k]=="undefined"|| result[k] == null){ | |
result[k] = 1; | |
}else{ | |
result[k] = result[k] + 1; | |
} | |
}); | |
var sortedResults = []; | |
Object.keys(result).forEach(function (key) { | |
sortedResults.push({val: Number(key.substring(2)), count: result[key]}); | |
}); | |
sortedResults.sort(function (a, b) { | |
return a.val - b.val; | |
}); | |
return sortedResults; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment