Skip to content

Instantly share code, notes, and snippets.

@saifuddin778
Last active April 17, 2016 06:58
Show Gist options
  • Save saifuddin778/f8a02e5a5d0602ff206df8b888daaedf to your computer and use it in GitHub Desktop.
Save saifuddin778/f8a02e5a5d0602ff206df8b888daaedf to your computer and use it in GitHub Desktop.
Classification Heatmap

Implementing an idea of visualizing the classification problems (with real-valued attributes) as heatmaps. Each row, in this heatmap is an attribute, where columns correspond to the classes to be classified.

A good benefit of this approach of multidimensional visualization is that it color-codes (or exposes) key variations of different attributes (columns) over the classes (rows) and allows the individual to decide about which attribute should be more focused, while classifying the data.

For the sake of visuals, an implementation is done on popular IRIS dataset.

<!DOCTYPE html>
<meta charset="utf-8">
<style>
.category_title {
fill: gray;
font-size: 20px;
font-family: monospace;
text-anchor: middle;
cursor: pointer;
}
.feature_title {
fill: black;
font-size: 15px;
font-style: italic;
font-family: monospace;
text-anchor: middle;
cursor: pointer;
display: none;
}
</style>
<body>
<script src="http://code.jquery.com/jquery-1.8.3.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.5/d3.min.js"></script>
<script>
function get_features(row, cat_i) {
var keys = d3.keys(row);
//var cat_i = row.category_index;
var features = [];
for (var i = 0; i < keys.length - 1; i++) {
if (i != cat_i) {
features.push(keys[i]);
}
}
return features;
}
var color_scheme = d3.scale.linear().interpolate(d3.interpolateHsl).range(['yellowgreen', 'yellow']);
d3.csv("https://raw.githubusercontent.com/plotly/datasets/master/iris.csv", function(d) {
return d
}, function(error, rows) {
var cat_i = 4; //##--INDICATE THIS FOR YOUR DATASET
var features = get_features(rows[0], cat_i);
var bounds = {
'_min': d3.min,
'_max': d3.max
};
var boundaries = {};
var colors = {}
for (var i = 0; i < features.length; i++) {
var feat = features[i];
colors[feat] = [];
for (bound in bounds) {
boundaries[feat + bound] = bounds[bound](rows, function(d) {
return d[feat]
});
colors[feat].push(boundaries[feat + bound]);
}
colors[feat] = color_scheme.domain(colors[feat]);
}
plot_(rows, boundaries, features, colors, cat_i);
});
function plot_(rows, boundaries, features, colors, cat_i) {
var width = 800;
var height = 500;
var svg = d3.select("body").append("svg").attr("width", width).attr("height", height);
var x_ = d3.scale.linear().domain([0, rows.length]).range([0, width * 0.95]);
var y_ = d3.scale.linear().domain([0, features.length]).range([height * 0.1, height * 0.95]);
var box_width = width / rows.length;
var box_height = y_(1) - y_(0);
var category_limits = {};
var features_limits = {};
var min_y = Number.POSITIVE_INFINITY;
for (var i = 0; i < rows.length; i++) {
var row_data = rows[i];
x = x_(i);
var cat_ = row_data[d3.keys(row_data)[cat_i]]; //row_data.category_index]];//row_data.species;
//keep track of where a category begins and ends in a sorted dataset
//on x-axis
if (!(cat_ in category_limits)) {
category_limits[cat_] = {
min: x,
max: x,
feature_limits: {}
};
} else {
if (x < category_limits[cat_].min) {
category_limits[cat_].min = x;
}
if (x > category_limits[cat_].max) {
category_limits[cat_].max = x;
}
}
for (var j = 0; j < features.length; j++) {
svg.append("rect")
.attr("x", x)
.attr("y", y_(j))
.attr("width", box_width)
.attr("height", box_height)
.attr("category", cat_)
.attr("feature", features[j])
.style("fill", colors[features[j]](row_data[features[j]]))
.style("stroke", "white")
.style("stroke-width", 0);
//track whats the top y limit as min of y (since we kept the y-axis inverse)
if (y_(j) < min_y) {
min_y = y_(j);
}
if (!(features[j] in category_limits[cat_].feature_limits)) {
category_limits[cat_].feature_limits[features[j]] = y_(j);
}
}
}
//now append the category titles
for (category in category_limits) {
var text_x = (category_limits[category].min + category_limits[category].max) / 2;
var text_y = min_y - 10;
svg.append("text")
.attr("x", text_x)
.attr("y", text_y)
.attr("class", "category_title")
.text(category)
.on("mouseover", mouseover_)
.on("mouseout", mouseout_);
for (feature_limit in category_limits[category].feature_limits) {
var feat_x = text_x;
var feat_y = category_limits[category].feature_limits[feature_limit] + (box_height) / 2;
svg.append("text")
.attr("x", text_x)
.attr("y", feat_y)
.attr("title", category)
.attr("class", "feature_title")
.text(feature_limit);
}
}
return;
}
function mouseout_() {
d3.selectAll(".feature_title").style("display", "none");
d3.event.stopPropagation();
return;
}
function mouseover_() {
var category = $(this)[0].innerHTML;
d3.selectAll(".feature_title").filter(function() {
return $(this).attr("title") == category
}).style("display", "block");
d3.event.stopPropagation();
return;
}
</script>
</body>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment