Skip to content

Instantly share code, notes, and snippets.

@Kcnarf
Last active December 3, 2018 08:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Kcnarf/3a72616a5f9b3478570cf3ee001bb89c to your computer and use it in GitHub Desktop.
Save Kcnarf/3a72616a5f9b3478570cf3ee001bb89c to your computer and use it in GitHub Desktop.
KDE to Violin plot
license: gpl-3.0

This block shows how to produce the contour of a Violin viz using a Kernel Density Estimation.

==original README==

Kernel density estimation is a method of estimating the probability distribution of a random variable based on a random sample. In contrast to a histogram, kernel density estimation produces a smooth estimate. The smoothness can be tuned via the kernel’s bandwidth parameter. With the correct choice of bandwidth, important features of the distribution can be seen, while an incorrect choice results in undersmoothing or oversmoothing and obscured features.

This example shows a histogram and a kernel density estimation for times between eruptions of Old Faithful Geyser in Yellowstone National Park, taken from R’s faithful dataset. The data follow a bimodal distribution; short eruptions are followed by a wait time averaging about 55 minutes, and long eruptions by a wait time averaging about 80 minutes. In recent years, wait times have been increasing, possibly due to the effects of earthquakes on the geyser’s geohydrology.

This example is based on a Protovis version by John Firebaugh.

[79,54,74,62,85,55,88,85,51,85,54,84,78,47,83,52,62,84,52,79,51,47,78,69,74,83,55,76,78,79,73,77,66,80,74,52,48,80,59,90,80,58,84,58,73,83,64,53,82,59,75,90,54,80,54,83,71,64,77,81,59,84,48,82,60,92,78,78,65,73,82,56,79,71,62,76,60,78,76,83,75,82,70,65,73,88,76,80,48,86,60,90,50,78,63,72,84,75,51,82,62,88,49,83,81,47,84,52,86,81,75,59,89,79,59,81,50,85,59,87,53,69,77,56,88,81,45,82,55,90,45,83,56,89,46,82,51,86,53,79,81,60,82,77,76,59,80,49,96,53,77,77,65,81,71,70,81,93,53,89,45,86,58,78,66,76,63,88,52,93,49,57,77,68,81,81,73,50,85,74,55,77,83,83,51,78,84,46,83,55,81,57,76,84,77,81,87,77,51,78,60,82,91,53,78,46,77,84,49,83,71,80,49,75,64,76,53,94,55,76,50,82,54,75,78,79,78,78,70,79,70,54,86,50,90,54,54,77,79,64,75,47,86,63,85,82,57,82,67,74,54,83,73,73,88,80,71,83,56,79,78,84,58,83,43,60,75,81,46,90,46,74]
<!DOCTYPE html>
<meta charset="utf-8">
<title>Kernel Density Estimation to Violin Plot</title>
<style>
body {
font: 10px sans-serif;
}
#original-chart {
font-size: 80px;
fill: #eee;
}
.bar {
fill: #bbb;
shape-rendering: crispEdges;
}
.line {
fill: none;
stroke: #000;
stroke-width: 1.5px;
}
.violin {
fill: grey;
stroke: black;
}
#violin-text {
font-size: 20px;
fill: white;
}
.axis path,
.axis line {
fill: none;
stroke: #000;
shape-rendering: crispEdges;
}
.y.axis path {
display: none;
}
</style>
<body>
<script src="https://d3js.org/d3.v3.min.js"></script>
<script>
var margin = {top: 20, right: 30, bottom: 30, left: 40},
width = 960 - margin.left - margin.right,
height = 500 - margin.top - margin.bottom
duration = 1000;
var x = d3.scale.linear()
.domain([30, 110])
.range([0, width]);
var y = d3.scale.linear()
.domain([0, .1])
.range([height, 0]);
var xAxis = d3.svg.axis()
.scale(x)
.orient("bottom");
var yAxis = d3.svg.axis()
.scale(y)
.orient("left")
.tickFormat(d3.format("%"));
var line = d3.svg.line()
.x(function(d) { return x(d[0]); })
.y(function(d) { return y(d[1]); });
var violinAsLine = d3.svg.area()
.x(function(d) { return x(d[0]); })
.y0(function(d) { return y(d[1]); })
.y1(function(d) { return y(d[1]); });
var violinTooLarge = d3.svg.area()
.x(function(d) { return x(d[0]); })
.y0(function(d) { return y(d[1]); })
.y1(function(d) { return y(-d[1]); });
var violin = d3.svg.area()
.x(function(d) { return x(d[0]); })
.y0(function(d) { return y(d[1]/2); })
.y1(function(d) { return y(-d[1]/2); });
var histogram = d3.layout.histogram()
.frequency(false)
.bins(x.ticks(40));
var svg = d3.select("body").append("svg")
.attr("width", width + margin.left + margin.right)
.attr("height", height + margin.top + margin.bottom)
.append("g")
.attr("transform", "translate(" + margin.left + "," + margin.top + ")");
svg.append("text")
.attr("id", "original-chart")
.attr("dx", width/4)
.attr("dy", 50)
.text("Original chart")
.attr("text-anchor", "center");
svg.append("g")
.attr("class", "x axis")
.attr("transform", "translate(0," + height + ")")
.call(xAxis)
.append("text")
.attr("class", "label")
.attr("x", width)
.attr("y", -6)
.style("text-anchor", "end")
.text("Time between Eruptions (min.)");
svg.append("g")
.attr("class", "y axis")
.call(yAxis);
d3.json("faithful.json", function(error, faithful) {
if (error) throw error;
var data = histogram(faithful),
kde = kernelDensityEstimator(epanechnikovKernel(7), x.ticks(100))
kdeVerteces = kde(faithful);
svg.append("g").classed("bars", true)
.selectAll(".bar")
.data(data)
.enter().insert("rect", ".axis")
.attr("class", "bar")
.attr("x", function(d) { return x(d.x) + 1; })
.attr("y", function(d) { return y(d.y); })
.attr("width", x(data[0].dx + data[0].x) - x(data[0].x) - 1)
.attr("height", function(d) { return height - y(d.y); });
svg.append("path")
.datum(kdeVerteces)
.attr("class", "line")
.attr("d", line);
svg.append("path")
.datum(kdeVerteces)
.attr("class", "violin")
.attr("d", violinAsLine);
svg.append("text")
.attr("id", "violin-text")
.attr("dx", x(47))
.attr("dy", y(-0.0065))
.text("Here is the Violin");
(function run () {
d3.select("body")
.transition()
.delay(2000)
.duration(duration)
.each(function() {
svg.transition()
.duration(duration)
.attr("transform", "translate("+[margin.left,-height/2]+")");
d3.selectAll(".bars").transition()
.style("opacity", 0);
d3.select(".y.axis").transition()
.style("opacity", 0);
})
.transition()
.duration(duration)
.each(function() {
d3.select(".violin").transition()
.attr("d", violinTooLarge);
})
.transition()
.duration(4*duration)
.each(function() {
d3.select(".violin").transition()
.duration(duration)
.attr("d", violin)
.each("end", function () {
d3.select("#violin-text")
.text("Here is the Violin .................. with adequate area");
});
d3.select(".line").transition()
.duration(duration)
.style("opacity", 0);
})
.transition()
.duration(duration)
.each(function() {
d3.select("#violin-text")
.text("Here is the Violin");
d3.select(".violin").transition()
.attr("d", violinTooLarge);
d3.select(".line").transition()
.style("opacity", 1);
})
.transition()
.duration(duration)
.each(function() {
d3.select(".violin").transition()
.attr("d", violinAsLine);
})
.transition()
.duration(duration)
.each(function() {
svg.transition()
.duration(duration)
.attr("transform", "translate("+[margin.left, margin.top]+")");
d3.selectAll(".bars").transition()
.style("opacity", 1);
d3.select(".y.axis").transition()
.style("opacity", 1);
})
.each("end", run);
})();
});
function kernelDensityEstimator(kernel, x) {
return function(sample) {
return x.map(function(x) {
return [x, d3.mean(sample, function(v) { return kernel(x - v); })];
});
};
}
function epanechnikovKernel(scale) {
return function(u) {
return Math.abs(u /= scale) <= 1 ? .75 * (1 - u * u) / scale : 0;
};
}
</script>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment