Skip to content

Instantly share code, notes, and snippets.

@tingletech
Forked from mbostock/.block
Created November 19, 2012 00:03
Show Gist options
  • Save tingletech/4108269 to your computer and use it in GitHub Desktop.
Save tingletech/4108269 to your computer and use it in GitHub Desktop.
Bayesian Javascript Example

on-line calculator version: http://tingletech.github.com/bayesian-credible-interval/

Based on d3.js box plots example, this page calculates bayesian credible intervals for a discrete random variable pop-up user survey.

The coding logic of how to do this was explained to me in R by Zen on stats.stackexchange.com and I translated this to python and now to javascript.

This uses https://github.com/jstat/jstat/ JavaScript Statistical Libraray to get random samples from the gamma distribution for calculating the dirichlet distribution.

A Data Driven Docuent http://d3js.org is used to display the box plots.

(function() {
// Inspired by http://informationandvisualization.de/blog/box-plot
d3.box = function() {
var width = 1,
height = 1,
duration = 0,
domain = null,
value = Number,
whiskers = boxWhiskers,
quartiles = boxQuartiles,
tickFormat = null;
// For each small multiple…
function box(g) {
g.each(function(d, i) {
d = d.map(value).sort(d3.ascending);
var g = d3.select(this),
n = d.length,
min = d[0],
max = d[n - 1];
// Compute quartiles. Must return exactly 3 elements.
var quartileData = d.quartiles = quartiles(d);
// Compute whiskers. Must return exactly 2 elements, or null.
var whiskerIndices = whiskers && whiskers.call(this, d, i),
whiskerData = whiskerIndices && whiskerIndices.map(function(i) { return d[i]; });
// Compute outliers. If no whiskers are specified, all data are "outliers".
// We compute the outliers as indices, so that we can join across transitions!
var outlierIndices = whiskerIndices
? d3.range(0, whiskerIndices[0]).concat(d3.range(whiskerIndices[1] + 1, n))
: d3.range(n);
// Compute the new x-scale.
var x1 = d3.scale.linear()
.domain(domain && domain.call(this, d, i) || [min, max])
.range([height, 0]);
// Retrieve the old x-scale, if this is an update.
var x0 = this.__chart__ || d3.scale.linear()
.domain([0, Infinity])
.range(x1.range());
// Stash the new scale.
this.__chart__ = x1;
// Note: the box, median, and box tick elements are fixed in number,
// so we only have to handle enter and update. In contrast, the outliers
// and other elements are variable, so we need to exit them! Variable
// elements also fade in and out.
// Update center line: the vertical line spanning the whiskers.
var center = g.selectAll("line.center")
.data(whiskerData ? [whiskerData] : []);
center.enter().insert("line", "rect")
.attr("class", "center")
.attr("x1", width / 2)
.attr("y1", function(d) { return x0(d[0]); })
.attr("x2", width / 2)
.attr("y2", function(d) { return x0(d[1]); })
.style("opacity", 1e-6)
.transition()
.duration(duration)
.style("opacity", 1)
.attr("y1", function(d) { return x1(d[0]); })
.attr("y2", function(d) { return x1(d[1]); });
center.transition()
.duration(duration)
.style("opacity", 1)
.attr("y1", function(d) { return x1(d[0]); })
.attr("y2", function(d) { return x1(d[1]); });
center.exit().transition()
.duration(duration)
.style("opacity", 1e-6)
.attr("y1", function(d) { return x1(d[0]); })
.attr("y2", function(d) { return x1(d[1]); })
.remove();
// Update innerquartile box.
var box = g.selectAll("rect.box")
.data([quartileData]);
box.enter().append("rect")
.attr("class", "box")
.attr("x", 0)
.attr("y", function(d) { return x0(d[2]); })
.attr("width", width)
.attr("height", function(d) { return x0(d[0]) - x0(d[2]); })
.transition()
.duration(duration)
.attr("y", function(d) { return x1(d[2]); })
.attr("height", function(d) { return x1(d[0]) - x1(d[2]); });
box.transition()
.duration(duration)
.attr("y", function(d) { return x1(d[2]); })
.attr("height", function(d) { return x1(d[0]) - x1(d[2]); });
// Update median line.
var medianLine = g.selectAll("line.median")
.data([quartileData[1]]);
medianLine.enter().append("line")
.attr("class", "median")
.attr("x1", 0)
.attr("y1", x0)
.attr("x2", width)
.attr("y2", x0)
.transition()
.duration(duration)
.attr("y1", x1)
.attr("y2", x1);
medianLine.transition()
.duration(duration)
.attr("y1", x1)
.attr("y2", x1);
// Update whiskers.
var whisker = g.selectAll("line.whisker")
.data(whiskerData || []);
whisker.enter().insert("line", "circle, text")
.attr("class", "whisker")
.attr("x1", 0)
.attr("y1", x0)
.attr("x2", width)
.attr("y2", x0)
.style("opacity", 1e-6)
.transition()
.duration(duration)
.attr("y1", x1)
.attr("y2", x1)
.style("opacity", 1);
whisker.transition()
.duration(duration)
.attr("y1", x1)
.attr("y2", x1)
.style("opacity", 1);
whisker.exit().transition()
.duration(duration)
.attr("y1", x1)
.attr("y2", x1)
.style("opacity", 1e-6)
.remove();
// Update outliers.
var outlier = g.selectAll("circle.outlier")
.data(outlierIndices, Number);
outlier.enter().insert("circle", "text")
.attr("class", "outlier")
.attr("r", 5)
.attr("cx", width / 2)
.attr("cy", function(i) { return x0(d[i]); })
.style("opacity", 1e-6)
.transition()
.duration(duration)
.attr("cy", function(i) { return x1(d[i]); })
.style("opacity", 1);
outlier.transition()
.duration(duration)
.attr("cy", function(i) { return x1(d[i]); })
.style("opacity", 1);
outlier.exit().transition()
.duration(duration)
.attr("cy", function(i) { return x1(d[i]); })
.style("opacity", 1e-6)
.remove();
// Compute the tick format.
var format = tickFormat || x1.tickFormat(8);
// Update box ticks.
var boxTick = g.selectAll("text.box")
.data(quartileData);
boxTick.enter().append("text")
.attr("class", "box")
.attr("dy", ".3em")
.attr("dx", function(d, i) { return i & 1 ? 6 : -6 })
.attr("x", function(d, i) { return i & 1 ? width : 0 })
.attr("y", x0)
.attr("text-anchor", function(d, i) { return i & 1 ? "start" : "end"; })
.text(format)
.transition()
.duration(duration)
.attr("y", x1);
boxTick.transition()
.duration(duration)
.text(format)
.attr("y", x1);
// Update whisker ticks. These are handled separately from the box
// ticks because they may or may not exist, and we want don't want
// to join box ticks pre-transition with whisker ticks post-.
var whiskerTick = g.selectAll("text.whisker")
.data(whiskerData || []);
whiskerTick.enter().append("text")
.attr("class", "whisker")
.attr("dy", ".3em")
.attr("dx", 6)
.attr("x", width)
.attr("y", x0)
.text(format)
.style("opacity", 1e-6)
.transition()
.duration(duration)
.attr("y", x1)
.style("opacity", 1);
whiskerTick.transition()
.duration(duration)
.text(format)
.attr("y", x1)
.style("opacity", 1);
whiskerTick.exit().transition()
.duration(duration)
.attr("y", x1)
.style("opacity", 1e-6)
.remove();
});
d3.timer.flush();
}
box.width = function(x) {
if (!arguments.length) return width;
width = x;
return box;
};
box.height = function(x) {
if (!arguments.length) return height;
height = x;
return box;
};
box.tickFormat = function(x) {
if (!arguments.length) return tickFormat;
tickFormat = x;
return box;
};
box.duration = function(x) {
if (!arguments.length) return duration;
duration = x;
return box;
};
box.domain = function(x) {
if (!arguments.length) return domain;
domain = x == null ? x : d3.functor(x);
return box;
};
box.value = function(x) {
if (!arguments.length) return value;
value = x;
return box;
};
box.whiskers = function(x) {
if (!arguments.length) return whiskers;
whiskers = x;
return box;
};
box.quartiles = function(x) {
if (!arguments.length) return quartiles;
quartiles = x;
return box;
};
return box;
};
function boxWhiskers(d) {
return [0, d.length - 1];
}
function boxQuartiles(d) {
return [
d3.quantile(d, .25),
d3.quantile(d, .5),
d3.quantile(d, .75)
];
}
})();
'use strict';
// http://stats.stackexchange.com/questions/39319/bayesian-user-survey-with-a-credible-interval#40403
// https://gist.github.com/3896501
var rdirichlet = function(a){
var y = [];
var sum_y = 0;
// first loop with the gamma sample
for (var i = 0; i < a.length; i++) {
y[i] = jStat.gamma.sample(a[i], a.length, 1);
sum_y = sum_y + y[i];
}
// second loop to normalize
for (var i = 0; i < a.length; i++) {
y[i] = y[i] / sum_y;
}
return y;
};
var monte_carlo = function(observations){
var ITERATIONS = 1000;
var results = [];
var max = 0;
for (var i =0; i < observations.length; i++) {
observations[i] = observations[i] + 1;
results[i] = [];
}
for (var i = 0; i < ITERATIONS; i++) {
var roll = rdirichlet(observations);
for (var j = 0; j < roll.length; j++) {
var v = roll[j];
if (v > max) max = v;
results[j].push(v);
}
}
/*
the return array is an array of arrays
results.length == observations.length
results.[i].length == ITERATIONS
That is because this matches "data" in https://gist.github.com/4061502
*/
return {
'results': results,
'max': max
};
}
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta charset="UTF-8"/>
<title>bayes.js Baysean Javascript Example</title>
<style>
/* css from https://gist.github.com/4061502 */
body {
font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
margin: auto;
position: relative;
width: 960px;
}
button {
position: absolute;
right: 10px;
top: 10px;
}
.box {
font: 10px sans-serif;
}
.box line,
.box rect,
.box circle {
fill: #fff;
stroke: #000;
stroke-width: 1.5px;
}
.box .center {
stroke-dasharray: 3,3;
}
.box .outlier {
fill: none;
stroke: #ccc;
display: none;
}
</style>
</head>
<body>
</body>
<script type="text/javascript" src="http://tingletech.github.com/jstat/jstat.min.js"></script>
<script src="http://d3js.org/d3.v3.min.js"></script>
<script src="box.js"></script>
<script>
'use strict';
// http://stats.stackexchange.com/questions/39319/bayesian-user-survey-with-a-credible-interval#40403
// https://gist.github.com/3896501
var rdirichlet = function(a){
var y = [];
var sum_y = 0;
// first loop with the gamma sample
for (var i = 0; i < a.length; i++) {
y[i] = jStat.gamma.sample(a[i], a.length, 1);
sum_y = sum_y + y[i];
}
// second loop to normalize
for (var i = 0; i < a.length; i++) {
y[i] = y[i] / sum_y;
}
return y;
};
var monte_carlo = function(observations){
var ITERATIONS = 1000;
var results = [];
for (var i =0; i < observations.length; i++) {
observations[i] = observations[i] + 1;
results[i] = [];
}
for (var i = 0; i < ITERATIONS; i++) {
var roll = rdirichlet(observations);
for (var j = 0; j < roll.length; j++) {
results[j].push(roll[j]);
}
}
/*
the return array is an array of arrays
results.length == observations.length
results.[i].length == ITERATIONS
That is because this matches "data" in https://gist.github.com/4061502
*/
return results;
}
// need to get these into the chart
var categories = [
'k-12 teacher or librarian',
'k-12 student',
'college or graduate student',
'faculty or academic researcher',
'archivist or librarian',
'genealogist or family researcher',
'other'
];
var observations = [ 6, 7, 53, 30, 36, 23, 58, 0 ];
// run 1000 simulations
var simulations = 1000;
// set up a standard library array to hold the simulation results
var results = monte_carlo(observations);
//
// from here down is copied from https://gist.github.com/4061502
var margin = {top: 10, right: 50, bottom: 20, left: 50},
width = 120 - margin.left - margin.right,
height = 500 - margin.top - margin.bottom;
var min = Infinity,
max = -Infinity;
var chart = d3.box()
.whiskers(iqr(1.5))
.width(width)
.height(height);
chart.domain([0, 0.4]);
var svg = d3.select("body").selectAll("svg")
.data(results)
.enter().append("svg")
.attr("class", "box")
.attr("width", width + margin.left + margin.right)
.attr("height", height + margin.bottom + margin.top)
.append("g")
.attr("transform", "translate(" + margin.left + "," + margin.top + ")")
.call(chart);
// Returns a function to compute the interquartile range.
function iqr(k) {
return function(d, i) {
var q1 = d.quartiles[0],
q3 = d.quartiles[2],
iqr = (q3 - q1) * k,
i = -1,
j = d.length;
while (d[++i] < q1 - iqr);
while (d[--j] > q3 + iqr);
return [i, j];
};
}
</script>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment