Skip to content

Instantly share code, notes, and snippets.

@feyderm
Last active March 26, 2017 05:58
Show Gist options
  • Save feyderm/75c18a9143aac50a24e392762f10d6a4 to your computer and use it in GitHub Desktop.
Save feyderm/75c18a9143aac50a24e392762f10d6a4 to your computer and use it in GitHub Desktop.
k-means clustering

Data are a sample from the S2 dataset of P. Fränti and O. Virmajoki, "Iterative shrinking method for clustering problems", Pattern Recognition, 39 (5), 761-765, May 2006.

Reload to explore local minima.

<!DOCTYPE html>
<meta charset="utf-8">
<style>
.pt {
stroke:black;
}
#stats {
font-family: sans-serif;
fill: #4d4d4d;
}
</style>
<body>
<!--viz-->
<div id="chart"></div>
<script src="https://d3js.org/d3.v4.min.js"></script>
<script type="text/javascript">
// dims
var margin = { top: 20, right: 0, bottom: 50, left: 120 },
svg_dx = 800,
svg_dy = 400,
plot_dx = svg_dx - margin.right - margin.left,
plot_dy = svg_dy - margin.top - margin.bottom;
// scales
var x = d3.scaleLinear().range([margin.left, plot_dx]),
y = d3.scaleLinear().range([plot_dy, margin.top]);
var svg = d3.select("#chart")
.append("svg")
.attr("width", svg_dx)
.attr("height", svg_dy);
var hulls = svg.append("g")
.attr("id", "hulls");
var circles = svg.append("g")
.attr("id", "circles");
// synthetic data with 15 known Gaussian clusters
// ref: S2 dataset from http://cs.joensuu.fi/sipu/datasets/
var clusters = d3.range(0, 15).map((n) => n.toString());
// costs for each iteration
var costs = [];
hulls.selectAll("path")
.data(clusters)
.enter()
.append("path")
.attr("class", "hull")
.attr("id", d => "hull_" + d);
// data is 10% sample of original dataset
d3.csv("s2_sample.csv", d => {
d.forEach(d => {
d.x = +d.x;
d.y = +d.y;
});
setScaleDomains(d);
plotCircles(d);
// randomly select 15 data points for initial centroids
var initialCentroids = clusters.map(() => d[Math.round(d3.randomUniform(0, d.length)())]);
assignCluster(initialCentroids);
addHull();
costs.push(computeCost());
var iterate = d3.interval(() => {
var c = computeCentroids()
assignCluster(c)
addHull();
var cost = computeCost();
// stop iterating when algorithm coverges to local minimum
if (cost == costs[costs.length - 1]) {
displayStats(costs);
iterate.stop();
}
costs.push(cost)
}, 500);
});
function displayStats(costs) {
var stats = svg.append("g")
.attr("id", "stats");
var formatMin = d3.format(".4");
var n_iters = stats.append("text")
.attr("x", 10)
.attr("y", 20);
n_iters.append("tspan")
.style("font-weight", "bold")
.text("Num. Iterations: ");
n_iters.append("tspan")
.text(costs.length);
var cost = stats.append("text")
.attr("x", 10)
.attr("y", 40);
cost.append("tspan")
.style("font-weight", "bold")
.text("Local Minimum: ");
cost.append("tspan")
.text(formatMin(costs[costs.length - 1]));
}
function computeCentroids() {
var centroids = clusters.map(cluster => {
var d = d3.selectAll(".cluster_" + cluster)
.data(),
n = d.length;
var x_sum = d3.sum(d, d => d.x),
y_sum = d3.sum(d, d => d.y);
return { x:(x_sum / n), y:(y_sum / n) };
});
return centroids;
}
function addHull() {
clusters.forEach(cluster => {
// parse cluster data
var d_cluster = d3.selectAll(".cluster_" + cluster)
.data()
.map((datum) => [x(datum.x), y(datum.y)]);
// path given data points for cluster
var d_path = d3.polygonHull(d_cluster);
var color = d3.schemeCategory20[+cluster];
// ref: https://bl.ocks.org/mbostock/4341699
d3.select("#hull_" + cluster)
.attr("id", "hull_" + cluster)
.transition()
.duration(250)
.attr("d", d_path === null ? null : "M" + d_path.join("L") + "Z")
.attr("fill", color)
.style("stroke", color);
});
}
function computeCost() {
var dists = d3.selectAll("circle")
.data()
.map(d => d._dist);
return d3.sum(dists);
}
function assignCluster(centroids) {
d3.selectAll("circle")
.each(function(d) {
// distances of data point from all centroids
var dists = computeDistances(centroids, d);
// min. distance defines cluster number
var dist_min = d3.min(dists);
var cluster_num = dists.findIndex(dist => dist == dist_min);
var color = d3.schemeCategory20[cluster_num];
// stash min. distance to compute cost
d._dist = dist_min;
// assign data point to cluster of minimum distance
d3.select(this)
.attr("fill", d3.color(color).brighter(0.5))
.attr("class", "pt cluster_" + cluster_num);
});
}
function computeDistances(centroids, d_pt) {
var dists = centroids.map(centroid => {
var dist = Math.sqrt(Math.pow(d_pt.x - centroid.x, 2) + Math.pow(d_pt.y - centroid.y, 2));
return dist;
})
return dists;
}
function setScaleDomains(d) {
x.domain(d3.extent(d, d => d.x));
y.domain(d3.extent(d, d => d.y));
}
function plotCircles(d) {
circles.selectAll("circle")
.data(d)
.enter()
.append("circle")
.attr("class", "pt")
.attr("r", 5)
.attr("cx", (d) => x(d.x))
.attr("cy", (d) => y(d.y));
}
</script>
</body>
x y
868777 586944
367020 431868
841726 800729
176787 455252
693716 476453
473080 838594
362249 399055
539611 398504
449467 594608
446119 601343
372307 207143
800231 232016
557729 241086
799944 816622
579218 185160
644438 720614
190767 706219
796502 510572
805108 796673
561853 312878
803357 779049
298298 412540
352868 389859
603094 315792
868895 604801
256255 743704
545045 424260
129344 161493
401212 788368
828956 617518
210204 525598
239399 487248
152867 264083
677057 153715
402472 181925
713592 135254
193996 488095
215615 565032
683156 477909
443267 354086
579813 431520
254983 731658
690480 168208
151352 235918
513319 442815
833899 769899
441050 601420
833920 273791
370731 217636
598498 733367
172246 511422
231574 696781
747199 581068
664594 695034
806535 822224
415598 205574
596539 376915
811794 796546
195166 438443
600938 632401
180427 312249
396190 147633
392049 421698
239740 424128
727112 497222
454855 338314
321709 148954
233496 791806
845753 636607
672748 143913
667783 714977
551504 163718
850080 301694
774427 788519
830057 593423
435454 582389
157863 234771
186264 695907
624439 284325
711441 151028
367014 393820
802930 232758
656341 159600
142200 219871
384609 54012
504986 872830
808797 782400
659403 713194
96693 226847
599399 491415
197688 484429
495770 314954
680746 200342
486998 186575
520952 823921
818350 787409
712504 156892
452060 618631
728129 500290
124465 141723
681528 158990
366133 381852
836090 553666
709537 529162
459889 378512
145002 258062
287834 722534
535151 429278
55608 239370
347695 244892
367885 392222
241344 757443
116999 471084
305482 426633
429488 612140
793835 803740
220177 221674
262124 479518
620515 179428
505796 823492
72590 266630
797349 262013
642584 700876
720486 163061
769904 494189
570399 302552
839617 656945
310836 453367
788340 235197
216910 714147
138455 477812
318948 427185
685914 146709
779163 791549
214167 475347
577939 713682
525885 408633
644682 714745
862496 778936
93554 238027
486881 821631
639407 453570
237599 737826
160746 461333
526382 240215
598696 426412
643031 711746
715169 485793
805067 246062
175763 259085
450073 177505
293378 730822
446833 618978
188413 435252
354488 384371
362438 399028
199017 249218
740262 454955
297392 837410
78660 284471
674937 133330
397307 123704
610174 773555
739547 489215
416238 192639
840280 643770
382235 404889
850054 775827
690616 104291
679826 139538
587211 212643
650187 695637
448826 537561
616652 307558
327673 129969
438100 647695
849098 786555
666089 834004
679675 145741
803229 246622
837750 634132
845361 796494
58050 223469
562761 451747
208417 813898
182053 420973
564558 251534
504920 856370
342396 391351
422092 641107
815014 790524
511578 477934
841353 810185
376199 179596
641164 670236
641684 622495
195311 426719
250965 638991
589897 941206
588052 698231
412031 164238
581758 242967
185554 434810
729190 580460
452764 940358
546863 446509
353389 370521
582510 232791
373227 166478
534359 410864
802856 239801
159080 210722
426060 229744
370049 391721
750522 621023
385673 169869
676848 196504
198303 478619
343924 394943
543510 406511
856835 289341
771727 481735
839817 657140
167454 212223
806771 696582
173988 455711
140450 251264
462641 873096
404216 111800
822815 616988
157309 236777
516087 229278
474946 400305
203563 488698
770665 821201
631587 656092
242177 789274
277460 489953
844586 802273
821453 779300
823326 281490
544848 453980
503244 848895
366903 161264
761897 442656
762598 815926
484034 788380
523254 437878
373259 425038
219520 412301
203424 155150
234863 467401
400394 380796
389161 136652
614768 433212
167088 231235
444738 611875
806003 241718
812780 635276
832359 781195
583750 440376
898849 767378
311758 252287
826452 774195
219609 734441
488127 73846
764197 149751
587867 711394
126009 263298
197194 481989
596805 276457
432770 626516
203104 552398
612380 276270
815736 251749
231852 300137
149811 261597
281780 775943
172444 256387
595184 437458
590610 363074
812051 780503
705772 693959
556204 478664
414871 690586
812456 609760
510223 792220
461991 549808
681298 260543
612291 131433
427786 615834
250998 756247
272708 722671
247122 715890
642076 201050
590381 824899
808472 241370
844405 710620
212929 476235
807493 241644
691442 511116
301060 715198
594137 703143
439176 594910
134197 229036
687052 186012
782521 782691
686639 180824
808035 798648
502156 829132
806678 241501
445690 614204
842837 647272
473518 431029
517539 835591
235700 742331
515997 873751
848376 221062
420275 394734
545421 190964
394944 157785
514165 906736
750835 484965
142031 499068
167164 191976
447370 572513
542553 222643
560227 441363
665597 154475
805167 195569
380383 436027
739447 497012
178163 282559
180686 500459
387305 387873
892746 564842
702306 508607
199114 469623
515828 956802
557961 864098
414615 681241
639214 723166
239976 833889
683348 177770
749620 446114
484075 839356
182210 807220
219148 671608
796737 235100
148258 485289
735419 521089
538071 476262
484168 387405
737036 156973
706391 175549
238601 769529
638563 721974
533127 916539
896024 206022
744128 491509
506023 816501
409604 719216
637681 686532
211181 778052
449808 623716
917544 625573
303315 750320
505676 855482
837660 211926
270830 751681
503413 819127
801854 795090
756376 469701
734376 455402
275338 518305
733684 216569
837545 807762
535593 143445
852753 799139
377224 415288
552401 833620
561535 422869
369117 397361
172229 208129
871946 767622
441676 361920
549320 462207
362468 150201
543279 201456
322898 707331
400677 223791
894943 635233
826740 617100
118884 300452
543185 449059
810776 255655
617516 155138
514390 894483
358783 228332
280484 412725
815671 220031
446989 611213
553779 243274
780635 613287
408082 859669
572815 413163
446211 212565
738184 129882
657706 234370
126749 322037
208270 463405
268582 750393
595870 694998
788649 794537
368932 390922
398572 370806
635196 471927
836176 639066
171329 274604
556325 411879
350796 417352
410652 127303
732311 143631
683193 153585
604791 256206
593205 453290
144536 253350
703914 726785
227939 492292
170563 774573
736648 486939
568522 412767
437877 347481
353798 153195
575185 935592
581344 385622
441417 591083
160305 394216
787165 670018
808336 241768
674203 723060
826119 273742
782953 207370
865073 661021
416055 67530
507531 899483
869697 240782
792217 324106
604067 254704
187608 800179
810615 244983
561290 237997
441101 632597
849885 226831
377351 397813
561214 239273
431366 787771
358829 388192
367991 414300
179395 462021
492219 176220
355387 401760
256102 748634
642061 703080
545646 813736
854434 182273
101403 505387
680868 731424
332667 214384
813142 634900
417370 195374
158466 255685
813657 150704
391415 152720
488198 321077
791155 234010
693853 483179
640146 716376
109749 144848
427578 177762
367537 392135
750750 213230
410952 118990
801185 253763
379846 169598
661592 678312
698637 479409
421081 221014
608865 760275
134601 255747
88506 502797
827016 648159
752795 492722
786184 663836
165098 235353
210080 198044
578789 227708
565035 407936
188602 207706
801229 617139
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment