Skip to content

Instantly share code, notes, and snippets.

@ashenfad
Last active August 29, 2015 13:59
Show Gist options
  • Save ashenfad/10782633 to your computer and use it in GitHub Desktop.
Save ashenfad/10782633 to your computer and use it in GitHub Desktop.
BigML Clusters - Iris

A visualization of three clusters discovered on the Iris dataset.

Each cluster is represented by a ball. The cluster radii are proportional to the population of each cluster.

The y-axis shows the distance of each cluster to the current point (selected by the sliders). The lower a cluster's position, the nearer it is the current point. The order of the clusters on the x-axis is from nearest to furthest.

The initial point is the median for each field. Selecting a cluster will set the current point equal to the cluster's centroid.

Finally, the colors on each slider represent the closest cluster to the current point for that range.

<!DOCTYPE html>
<meta charset="utf-8">
<style>
.axis {
font: 10px sans-serif;
-webkit-user-select: none;
-moz-user-select: none;
user-select: none;
}
.axis .domain {
fill: none;
stroke: #000;
stroke-opacity: .3;
stroke-width: 10px;
stroke-linecap: round;
}
.slider .handle {
fill: #fff;
stroke: #000;
stroke-opacity: .5;
stroke-width: 1.25px;
pointer-events: none;
}
</style>
<body>
<script src="http://d3js.org/d3.v3.min.js"></script>
<script>
var margin = {top: 40, right: 20, bottom: 50, left: 20},
width = 960 - margin.left - margin.right,
height = 500 - margin.bottom - margin.top;
var svg = d3.select("body").append("svg")
.attr("width", width + margin.left + margin.right)
.attr("height", height + margin.top + margin.bottom)
.append("g")
.attr("transform", "translate(" + margin.left + "," + margin.top + ")");
var defs = d3.select("svg").append("defs");
d3.json("iris.json", function(error, root) {
var model = root.model;
var fields = model.fields;
var max_y = height / 2;
var buffer = width / (model.clusters.length + 1);
var slider_size = width / 3;
function make_hline(y_loc) {
svg.append("rect")
.attr("x", buffer - 20)
.attr("y", y_loc)
.attr("width", width - 2 * buffer + 40)
.attr("height", 0.5)
.style("fill", "none")
.style("stroke", "#ccc");
}
make_hline(0);
make_hline(max_y / 4);
make_hline(max_y / 2);
make_hline(3 * max_y / 4);
make_hline(max_y);
var handle_scales = new Array();
var mins = new Array();
var maxs = new Array();
var keys = new Array();
var point = new Array();
var fnames = new Array();
var i = 0;
for (id in fields) {
if (!fields.hasOwnProperty(id)) { continue; }
keys[i] = id;
point[i] = fields[id].summary.median;
mins[i] = fields[id].summary.minimum;
maxs[i] = fields[id].summary.maximum;
fnames[i] = fields[id].name;
make_slider(fields[id], i);
i = i + 1;
}
var scales = new Array();
for (var i = 0; i < keys.length; i++) {
scales[i] = model.final_field_scales[keys[i]];
}
var centroids = new Array();
var scaled_centroids = new Array();
var cnames = new Array();
var max_cluster_count = 0;
i = 0;
for (cid in model.clusters) {
var cluster = model.clusters[cid];
var centroid = new Array();
var scaled_centroid = new Array();
cnames[i] = cluster.name;
max_cluster_count = Math.max(max_cluster_count, cluster.count);
for (var j = 0; j < keys.length; j++) {
var val = cluster.center[keys[j]];
centroid[j] = val;
scaled_centroid[j] = scales[j] * val;
}
centroids[i] = centroid;
scaled_centroids[i] = scaled_centroid;
i = i + 1;
}
update_ranges();
var max_dist = 0;
for (var i = 0; i < centroids.length - 1; i++) {
for (var j = i + 1; j < centroids.length; j++) {
max_dist = Math.max(max_dist, dist(centroids[i], centroids[j]));
}
}
svg.append("text")
.attr("class", "nearest")
.attr("x", buffer - 20)
.attr("y", -20);
var dists = new Array();
var dist_order = new Array();
update_dists();
i = 0;
for (cid in model.clusters) {
var cluster = model.clusters[cid];
make_balls(cluster, i);
i = i + 1;
}
function make_balls(cluster, index) {
var max_rad = 30;
var color = d3.scale.category10().range()[index];
var r = Math.pow((cluster.count / max_cluster_count), 1/3) * max_rad;
svg.append("circle")
.attr("class", "cluster")
.attr("r", r)
.attr("cx", (dist_order[index] + 1) * buffer)
.attr("cy", (1 - Math.min(dists[index] / max_dist, 1)) * max_y)
.style("stroke", d3.rgb(color).darker())
.style("fill", color)
.on("click", function() {
point = centroids[index].slice(0);
update_dists();
update_ranges();
update_balls(300);
update_handles(300);
update_field_text();
})
.on("mouseover", function() {
d3.select(this)
.attr("r", 1.1 * r)
.style("stroke", color)
.style("fill", d3.rgb(color).brighter());
})
.on("mouseout", function() {
d3.select(this)
.attr("r", r)
.style("stroke", d3.rgb(color).darker())
.style("fill", color);
});
}
function update_dists() {
var sorted_dists = new Array();
var min_dist;
var min_index = -1;
for (var i = 0; i < centroids.length; i++) {
dists[i] = dist(point, centroids[i]);
sorted_dists[i] = new Object();
sorted_dists[i].d = dists[i];
sorted_dists[i].i = i;
if (min_index == -1 || min_dist > dists[i]) {
min_index = i;
min_dist = dists[i];
}
}
sorted_dists.sort(function(a, b) {
if (a.d < b.d) return -1;
if (a.d > b.d) return 1;
return 0;
});
for (var i = 0; i < sorted_dists.length; i++) {
dist_order[sorted_dists[i].i] = i;
}
d3.select(".nearest")
.text("Nearest to " + cnames[min_index]
+ " - Distance: " + dists[min_index].toFixed(4))
.style("fill", d3.scale.category10().range()[min_index]);
}
function update_ranges() {
for (var i = 0; i < scales.length; i++) {
var grad = d3.select("#g" + i);
grad.selectAll("stop").remove();
var min_pt = point.slice(0);
min_pt[i] = mins[i];
var min_index = -1;
var min_dist;
for (var j = 0; j < centroids.length; j++) {
var d = dist(min_pt, centroids[j]);
if (min_index == -1 || min_dist > d) {
min_index = j;
min_dist = d;
}
}
var p = min_pt[i];
var c = min_index;
var range = maxs[i] - mins[i];
while (p < maxs[i]) {
grad.append("svg:stop")
.attr("offset", (100 * (p - mins[i]) / range).toFixed(2).toString() + "%")
.attr("stop-color", d3.scale.category10().range()[c])
.attr("stop-opacity", 1);
var nextp = maxs[i];
var nextc = c;
var used = new Object();
used[c.toString()] = true;
for (var j = 0; j < centroids.length; j++) {
if (used[j.toString] == true) { continue; }
var candidate = vdist(line(i), plane(j, c));
if (candidate < nextp && candidate > p) {
nextp = candidate;
nextc = j;
}
}
grad.append("svg:stop")
.attr("offset", (100 * (nextp - mins[i]) / range).toFixed(2).toString() + "%")
.attr("stop-color", d3.scale.category10().range()[c])
.attr("stop-opacity", 1);
p = nextp;
c = nextc;
used[c.toString()] = true;
}
}
}
function update_balls(duration) {
var buffer = width / (model.clusters.length + 1);
var ids = Array.apply(null, Array(centroids.length)).map(function (_, i) {return i;});
d3.selectAll(".cluster")
.data(ids)
.transition().duration(duration + 50)
.attr("cx", function(d) {return (dist_order[d] + 1) * buffer;})
.attr("cy", function(d) {return (1 - Math.min(dists[d] / max_dist, 1)) * max_y;});
}
function make_slider(field, index) {
var summary = field.summary;
var buffer = (width - (2 * slider_size)) / 3;
var x_loc = ((index % 2) + 1) * buffer + (index % 2) * slider_size;
var y_loc = max_y + 60 + Math.floor(index / 2) * 60;
var x = d3.scale.linear()
.domain([summary.minimum, summary.maximum])
.range([0, slider_size])
.clamp(true);
handle_scales[index] = x;
var brush = d3.svg.brush().x(x);
brush.on("brush", function() {
var value = brush.extent()[0];
if (d3.event.sourceEvent) { // not a programmatic event
value = x.invert(d3.mouse(this)[0] - x_loc);
brush.extent([value, value]);
}
handle.attr("cx", x(value));
point[index] = value;
update_dists();
update_ranges();
update_balls(0);
update_field_text();
});
var axis = svg.append("g")
.attr("class", "x axis")
.attr("transform", "translate(" + x_loc + "," + y_loc + ")");
axis.call(d3.svg.axis()
.scale(x)
.orient("bottom")
.tickFormat(function(d) { return d; })
.tickSize(0)
.tickPadding(10))
.select(".domain")
.style("stroke", "none")
.style("fill", "none");
var gradient_name = "g" + index;
var gradient = defs.append("linearGradient")
.attr("id", gradient_name)
.attr("field_id", index)
.attr("x1", "0%")
.attr("y1", "0%")
.attr("x2", "100%")
.attr("y2", "0%");
axis.append("rect")
.attr("class", "range")
.attr("y", -4)
.attr("height", 8)
.attr("width", slider_size)
.attr("field_index", index)
.attr("rx", 4)
.attr("ry", 4)
.style("stroke", "#333")
.style("fill", "url(#" + gradient_name + ")");
var slider = svg.append("g")
.attr("class", "slider")
.call(brush);
slider.append("text")
.attr("class", "field_text")
.attr("transform", "translate(" + x_loc + "," + (y_loc - 13) + ")")
.text(field.name + ": " + point[index].toFixed(2));
slider.selectAll(".extent,.resize").remove();
slider.select(".background").attr("height", 40)
.attr("x", x_loc)
.attr("y", y_loc - 20);
var handle = slider.append("circle")
.attr("class", "handle")
.attr("transform", "translate(" + x_loc + "," + (y_loc - 1) + ")")
.attr("r", 8)
.attr("cx", handle_scales[index](point[index]));
brush.extent([point[index], point[index]]);
}
function update_handles(duration) {
var ids = Array.apply(null, Array(point.length)).map(function (_, i) {return i;});
d3.selectAll(".handle")
.data(ids)
.transition().duration(duration)
.attr("cx", function(d) {return handle_scales[d](point[d])});
}
function update_field_text() {
var ids = Array.apply(null, Array(point.length)).map(function (_, i) {return i;});
d3.selectAll(".field_text")
.data(ids)
.text(function(d) {return fnames[d] + ": " + point[d].toFixed(2);});
}
function dist(p1, p2) {
var tot = 0;
for (var i = 0; i < p1.length; i++) {
var diff = (p1[i] - p2[i]) * scales[i];
tot = tot + diff * diff;
}
return Math.sqrt(tot);
}
function scale_point(point) {
var scaled_point = new Array();
for (var i = 0; i < point.length; i++) {
scaled_point[i] = scales[i] * point[i];
}
return scaled_point;
}
function dotp(v1, v2) {
var tot = 0;
for (var i = 0; i < v1.length; i++) {
tot += (v1[i] * v2[i]);
}
return tot;
}
function plane(cent_id1, cent_id2) {
var pt1 = scaled_centroids[cent_id1];
var pt2 = scaled_centroids[cent_id2];
var origin = new Array();
var norm = new Array();
for (var i = 0; i < pt1.length; i++) {
origin[i] = (pt2[i] + pt1[i]) / 2;
norm[i] = pt2[i] - pt1[i];
}
var plane = new Object();
plane.n = norm;
plane.o = origin;
return plane;
}
function line(field_index) {
var scaled_point = scale_point(point);
scaled_point[field_index] = 0;
var line = new Object;
line.i = field_index;
line.o = scaled_point;
line.v = Array.apply(null, new Array(point.length)).map(Number.prototype.valueOf,0);
line.v[field_index] = 1;
return line;
}
function vdist(line, plane) {
var po = plane.o;
var lo = line.o;
var v = new Array();
for (var i = 0; i < po.length; i++) {
v[i] = po[i] - lo[i];
}
var diff = dotp(plane.n, v) / dotp(line.v, plane.n);
return (diff / scales[line.i]);
}
});
</script>
{
"model" : {
"kind" : "kmeans",
"dataset_id" : "1397431904053",
"missing_tokens" : [ "", "N/A", "n/a", "NULL", "null", "-", "#DIV/0", "#REF!", "#NAME?", "NIL", "nil", "NA", "na", "#VALUE!", "#NULL!", "NaN", "#N/A", "#NUM!", "?" ],
"branching-factor" : 4,
"locale" : "en_US",
"k" : 3,
"type" : "unsupervised",
"clusters" : [ {
"name" : "Cluster 1",
"id" : "000000",
"center" : {
"000000" : 6.82859,
"000001" : 3.12908,
"000002" : 5.57868,
"000003" : 2.01276
},
"distance" : {
"standard_deviation" : 0.07897,
"mean" : 0.16652,
"median" : 0.16222,
"minimum" : 0.03489,
"sum_squares" : 1.4543,
"bins" : [ [ 0.03489, 1 ], [ 0.05993, 1 ], [ 0.07899, 1 ], [ 0.08296, 2 ], [ 0.08864, 1 ], [ 0.09131, 1 ], [ 0.09436, 1 ], [ 0.09994, 2 ], [ 0.10177, 1 ], [ 0.10566, 2 ], [ 0.11111, 2 ], [ 0.11572, 1 ], [ 0.12624, 1 ], [ 0.14402, 3 ], [ 0.15868, 1 ], [ 0.16222, 1 ], [ 0.17117, 2 ], [ 0.17617, 1 ], [ 0.18242, 1 ], [ 0.18834, 1 ], [ 0.19064, 1 ], [ 0.19621, 4 ], [ 0.19977, 1 ], [ 0.2083, 1 ], [ 0.21219, 1 ], [ 0.22182, 1 ], [ 0.23188, 1 ], [ 0.25378, 1 ], [ 0.26726, 2 ], [ 0.33677, 1 ], [ 0.36896, 1 ], [ 0.38363, 1 ] ],
"sum" : 7.16051,
"population" : 43,
"maximum" : 0.38363,
"variance" : 0.00624
},
"count" : 43
}, {
"name" : "Cluster 2",
"id" : "000001",
"center" : {
"000000" : 5.8639,
"000001" : 2.69137,
"000002" : 4.43337,
"000003" : 1.43939
},
"distance" : {
"standard_deviation" : 0.0627,
"mean" : 0.16247,
"median" : 0.15076,
"minimum" : 0.06858,
"sum_squares" : 1.72469,
"bins" : [ [ 0.06858, 1 ], [ 0.07267, 1 ], [ 0.07672, 1 ], [ 0.08444, 3 ], [ 0.0905, 1 ], [ 0.09556, 2 ], [ 0.10682, 3 ], [ 0.11371, 1 ], [ 0.1223, 1 ], [ 0.12974, 3 ], [ 0.13382, 3 ], [ 0.13852, 2 ], [ 0.14269, 5 ], [ 0.15277, 5 ], [ 0.15958, 1 ], [ 0.16466, 2 ], [ 0.16861, 1 ], [ 0.17376, 3 ], [ 0.1814, 2 ], [ 0.18824, 1 ], [ 0.19151, 1 ], [ 0.1954, 2 ], [ 0.19887, 1 ], [ 0.21964, 2 ], [ 0.2233, 1 ], [ 0.24078, 2 ], [ 0.24929, 1 ], [ 0.25629, 1 ], [ 0.26517, 1 ], [ 0.29659, 1 ], [ 0.30194, 1 ], [ 0.38164, 1 ] ],
"sum" : 9.26052,
"population" : 57,
"maximum" : 0.38164,
"variance" : 0.00393
},
"count" : 57
}, {
"name" : "Cluster 3",
"id" : "000002",
"center" : {
"000000" : 5.006,
"000001" : 3.428,
"000002" : 1.462,
"000003" : 0.246
},
"distance" : {
"standard_deviation" : 0.10099,
"mean" : 0.15072,
"median" : 0.13393,
"minimum" : 0.01692,
"sum_squares" : 1.63568,
"bins" : [ [ 0.01692, 1 ], [ 0.02701, 1 ], [ 0.03976, 4 ], [ 0.04698, 1 ], [ 0.05116, 1 ], [ 0.05536, 2 ], [ 0.06733, 1 ], [ 0.0745, 1 ], [ 0.08518, 1 ], [ 0.08882, 1 ], [ 0.09307, 3 ], [ 0.0969, 1 ], [ 0.10169, 1 ], [ 0.11685, 1 ], [ 0.12167, 3 ], [ 0.12718, 1 ], [ 0.13393, 2 ], [ 0.14237, 1 ], [ 0.14704, 3 ], [ 0.16085, 2 ], [ 0.16884, 3 ], [ 0.18397, 2 ], [ 0.19028, 2 ], [ 0.22407, 3 ], [ 0.22853, 1 ], [ 0.24727, 1 ], [ 0.26337, 1 ], [ 0.29205, 1 ], [ 0.30355, 1 ], [ 0.34761, 1 ], [ 0.44439, 1 ], [ 0.4947, 1 ] ],
"sum" : 7.53624,
"population" : 50,
"maximum" : 0.4947,
"variance" : 0.0102
},
"count" : 50
} ],
"fields" : {
"000000" : {
"preferred" : true,
"summary" : {
"standard_deviation" : 0.82807,
"mean" : 5.84333,
"median" : 5.77889,
"minimum" : 4.3,
"sum_squares" : 5223.85,
"missing_count" : 0,
"bins" : [ [ 4.3, 1 ], [ 4.425, 4 ], [ 4.6, 4 ], [ 4.77143, 7 ], [ 4.9625, 16 ], [ 5.1, 9 ], [ 5.2, 4 ], [ 5.3, 1 ], [ 5.4, 6 ], [ 5.5, 7 ], [ 5.6, 6 ], [ 5.7, 8 ], [ 5.8, 7 ], [ 5.9, 3 ], [ 6, 6 ], [ 6.1, 6 ], [ 6.2, 4 ], [ 6.3, 9 ], [ 6.4, 7 ], [ 6.5, 5 ], [ 6.6, 2 ], [ 6.7, 8 ], [ 6.8, 3 ], [ 6.9, 4 ], [ 7, 1 ], [ 7.1, 1 ], [ 7.2, 3 ], [ 7.3, 1 ], [ 7.4, 1 ], [ 7.6, 1 ], [ 7.7, 4 ], [ 7.9, 1 ] ],
"sum" : 876.5,
"population" : 150,
"maximum" : 7.9,
"variance" : 0.68569,
"splits" : [ 4.51526, 4.67252, 4.81113, 4.89582, 4.96139, 5.01131, 5.05992, 5.11148, 5.18177, 5.35681, 5.44129, 5.5108, 5.58255, 5.65532, 5.71658, 5.77889, 5.85381, 5.97078, 6.05104, 6.13074, 6.23023, 6.29578, 6.35078, 6.41459, 6.49383, 6.63013, 6.70719, 6.79218, 6.92597, 7.20423, 7.64746 ]
},
"datatype" : "double",
"order" : 0,
"optype" : "numeric",
"name" : "sepal length",
"column_number" : 0
},
"000001" : {
"preferred" : true,
"summary" : {
"standard_deviation" : 0.43587,
"counts" : [ [ 2, 1 ], [ 2.2, 3 ], [ 2.3, 4 ], [ 2.4, 3 ], [ 2.5, 8 ], [ 2.6, 5 ], [ 2.7, 9 ], [ 2.8, 14 ], [ 2.9, 10 ], [ 3, 26 ], [ 3.1, 11 ], [ 3.2, 13 ], [ 3.3, 6 ], [ 3.4, 12 ], [ 3.5, 6 ], [ 3.6, 4 ], [ 3.7, 3 ], [ 3.8, 6 ], [ 3.9, 2 ], [ 4, 1 ], [ 4.1, 1 ], [ 4.2, 1 ], [ 4.4, 1 ] ],
"mean" : 3.05733,
"median" : 3.02044,
"minimum" : 2,
"sum_squares" : 1430.4,
"missing_count" : 0,
"sum" : 458.6,
"population" : 150,
"maximum" : 4.4,
"variance" : 0.18998
},
"datatype" : "double",
"order" : 1,
"optype" : "numeric",
"name" : "sepal width",
"column_number" : 1
},
"000002" : {
"preferred" : true,
"summary" : {
"standard_deviation" : 1.7653,
"mean" : 3.758,
"median" : 4.34142,
"minimum" : 1,
"sum_squares" : 2582.71,
"missing_count" : 0,
"bins" : [ [ 1, 1 ], [ 1.16667, 3 ], [ 1.3, 7 ], [ 1.4, 13 ], [ 1.5, 13 ], [ 1.6, 7 ], [ 1.7, 4 ], [ 1.9, 2 ], [ 3, 1 ], [ 3.3, 2 ], [ 3.5, 2 ], [ 3.6, 1 ], [ 3.75, 2 ], [ 3.9, 3 ], [ 4.0375, 8 ], [ 4.23333, 6 ], [ 4.46667, 12 ], [ 4.6, 3 ], [ 4.74444, 9 ], [ 4.94444, 9 ], [ 5.1, 8 ], [ 5.25, 4 ], [ 5.46, 5 ], [ 5.6, 6 ], [ 5.75, 6 ], [ 5.95, 4 ], [ 6.1, 3 ], [ 6.3, 1 ], [ 6.4, 1 ], [ 6.6, 1 ], [ 6.7, 2 ], [ 6.9, 1 ] ],
"sum" : 563.7,
"population" : 150,
"maximum" : 6.9,
"variance" : 3.11628,
"splits" : [ 1.25138, 1.32426, 1.37171, 1.40962, 1.44567, 1.48173, 1.51859, 1.56301, 1.6255, 1.74645, 3.23033, 3.675, 3.94203, 4.0469, 4.18243, 4.34142, 4.45309, 4.51823, 4.61771, 4.72566, 4.83445, 4.93363, 5.03807, 5.1064, 5.20938, 5.43979, 5.5744, 5.6646, 5.81496, 6.02913, 6.38125 ]
},
"datatype" : "double",
"order" : 2,
"optype" : "numeric",
"name" : "petal length",
"column_number" : 2
},
"000003" : {
"preferred" : true,
"summary" : {
"standard_deviation" : 0.76224,
"counts" : [ [ 0.1, 5 ], [ 0.2, 29 ], [ 0.3, 7 ], [ 0.4, 7 ], [ 0.5, 1 ], [ 0.6, 1 ], [ 1, 7 ], [ 1.1, 3 ], [ 1.2, 5 ], [ 1.3, 13 ], [ 1.4, 8 ], [ 1.5, 12 ], [ 1.6, 4 ], [ 1.7, 2 ], [ 1.8, 12 ], [ 1.9, 5 ], [ 2, 6 ], [ 2.1, 6 ], [ 2.2, 3 ], [ 2.3, 8 ], [ 2.4, 3 ], [ 2.5, 3 ] ],
"mean" : 1.19933,
"median" : 1.32848,
"minimum" : 0.1,
"sum_squares" : 302.33,
"missing_count" : 0,
"sum" : 179.9,
"population" : 150,
"maximum" : 2.5,
"variance" : 0.58101
},
"datatype" : "double",
"order" : 3,
"optype" : "numeric",
"name" : "petal width",
"column_number" : 3
}
},
"max-workers" : 1,
"final_field_scales" : {
"000000" : 0.22445403384096205,
"000001" : 0.4264199229189562,
"000002" : 0.10528728930079047,
"000003" : 0.24383875393929133
},
"excluded_input_fields" : [ "species" ],
"input_fields" : [ "000000", "000001", "000002", "000003" ],
"seed" : "kmeans-seed"
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment