feyderm/Readme.md

## Readme.md

      
    Raw
  

              Readme.md
            
          
    Data are from the freetrade dataset of the Amelia package.
Missing data is represented by filled circles, and columns represent unique patterns of missing data within an observation.  The percentage of observations having a given pattern is reflected in the bar chart on top. The percentage of missing values for a given variable across all observations is reflected in the bar chart to the right.

  
## freetrade_missing_data_by_variable.csv

          
            year
            country
            tariff
            polity
            pop
            gdp.pc
            intresmi
            signed
            fiveop
            usheg

            
              0
              0
              0.339181286549708
              0.0116959064327485
              0
              0
              0.0760233918128655
              0.0175438596491228
              0.105263157894737
              0

## freetrade_missing_data_combos.csv

          
            x.year
            x.country
            x.tariff
            x.polity
            x.pop
            x.gdp.pc
            x.intresmi
            x.signed
            x.fiveop
            x.usheg
            freq
            percent

            
              TRUE
              TRUE
              TRUE
              TRUE
              TRUE
              TRUE
              TRUE
              TRUE
              TRUE
              TRUE
              96
              0.56140350877193

            
              TRUE
              TRUE
              FALSE
              TRUE
              TRUE
              TRUE
              TRUE
              TRUE
              TRUE
              TRUE
              52
              0.304093567251462

            
              TRUE
              TRUE
              TRUE
              TRUE
              TRUE
              TRUE
              FALSE
              TRUE
              FALSE
              TRUE
              9
              0.0526315789473684

            
              TRUE
              TRUE
              TRUE
              TRUE
              TRUE
              TRUE
              TRUE
              TRUE
              FALSE
              TRUE
              5
              0.0292397660818713

            
              TRUE
              TRUE
              FALSE
              TRUE
              TRUE
              TRUE
              FALSE
              TRUE
              FALSE
              TRUE
              4
              0.0233918128654971

            
              TRUE
              TRUE
              FALSE
              TRUE
              TRUE
              TRUE
              TRUE
              FALSE
              TRUE
              TRUE
              2
              0.0116959064327485

            
              TRUE
              TRUE
              TRUE
              FALSE
              TRUE
              TRUE
              TRUE
              TRUE
              TRUE
              TRUE
              2
              0.0116959064327485

            
              TRUE
              TRUE
              TRUE
              TRUE
              TRUE
              TRUE
              TRUE
              FALSE
              TRUE
              TRUE
              1
              0.00584795321637427

## index.html
<!DOCTYPE html>
<meta charset="utf-8">
<style>

        text {
            font-family: sans-serif;
        }

        text.var_labels {
            font-size: 0.75em;
            dominant-baseline: middle;
            text-anchor: end;
        }

        circle.absent {
            fill: #3f51b5;
        }

        circle.present {
            stroke: #1a1a1a;
            fill: #FFFFFF;
        }

        .bar_absent {
            stroke: #3f51b5;
            fill: #3f51b5;
        }

        .bar_combo {
            stroke: #1a1a1a;
            fill: #999999;
        }

        .axis {
            opacity: 0.8;
        }

        .axis_label {
            fill: #000000;
        }

</style>
<body>

    <div id="block"></div>

    <script src="https://d3js.org/d3.v4.min.js"></script>

    <script type="text/javascript">

        // variable labels

        const var_labels = ["Year",
                            "Country",
                            "Tariff rate",
                            "Polity IV score",
                            "Population",
                            "GDP",
                            "International reserves",
                            "IMF agreement",
                            "Financial openness",
                            "US hegemony"];

        // dimensions

        const svg_dx = 800,
              svg_dy = 700;

        const dot_plot_dx = 200,
              dot_plot_dy = 300;
              margin = { top: dot_plot_dy * 0.05,
                         bottom: dot_plot_dy * 0.05,
                         left: dot_plot_dx * 0.10,
                         right: dot_plot_dx * 0.10 };

        // scales

        let cxScale = d3.scalePoint()
                        .range([margin.left, dot_plot_dx - margin.right]);

        let cyScale = d3.scalePoint()
                        .range([margin.bottom, dot_plot_dy - margin.top]);

        let barScale = d3.scaleLinear()
                         .range([0, 100]);

        // axes

        const percentFormat = d3.format(".0%");

        let bar_combos_axis = d3.axisLeft(barScale)
                                .ticks(5)
                                .tickFormat(percentFormat);

        let bar_element_axis = d3.axisBottom(barScale)
                                 .ticks(4)
                                 .tickFormat(percentFormat);

        // organize layout

        let svg = d3.select("#block")
                    .append("svg")
                    .attr("height", svg_dy)
                    .attr("width", svg_dx);

        let circle_combos = svg.append("g")
                               .attr("transform", "translate(400, 125)");

        let bar_combos = svg.append("g")
                            .attr("transform", "translate(400, 15)");

        let bar_elements = svg.append("g")
                              .attr("transform", "translate(600, 125)");

        d3.csv("freetrade_missing_data_combos.csv", (d) => {

            // array of variable names excluding summary stats
            const col_names = d.columns.filter((col) => col != "freq")
                                       .filter((col) => col != "percent");

            cyScale.domain(col_names);

            // data excluding array of column names appended by d3.csv()
            const d_wo_cols = d.filter((datum) => typeof(datum) != 'array');

            // combos across x-position
            cxScale.domain(d3.range(d_wo_cols.length));

            let combo = circle_combos.selectAll("g")
                                     .data(d_wo_cols)
                                     .enter()
                                     .append("g");

            // circle per combo per variable (column name)
            combo.each(function(d, i) {

                d3.select(this)
                  .selectAll("circle")
                  .data(col_names)
                  .enter()
                  .append("circle")
                  .attr("cx", () => cxScale(i))                       // combos spread across x
                  .attr("cy", (col_name) => cyScale(col_name))        // variables spread across y
                  .attr("r", 7)
                  .attr("class", (col_name) => d_wo_cols[i][col_name] == "TRUE" ? "present" : "absent");
            });

            // variable labels
            circle_combos.append("g")
                         .selectAll("text")
                         .data(var_labels)
                         .enter()
                         .append("text")
                         .attr("class", "var_labels")
                         .text((d) => d)
                         .attr("y", (d, i) => cyScale(col_names[i]));

            // bar chart of combo percentages
            barScale.domain([d3.max(d_wo_cols, (d) => +d.percent), 0]);

            bar_combos.selectAll("rect")
                      .data(d_wo_cols)
                      .enter()
                      .append("rect")
                      .attr("class", "bar_combo")
                      .attr("x", (d, i) => cxScale(i) - 10)
                      .attr("y", (d) => barScale(+d.percent))
                      .attr("height", (d) => d3.max(barScale.range()) - barScale(+d.percent))
                      .attr("width", 20);

            // bar chart axis
            bar_combos.append("g")
                      .attr("id", "bar_combos_axis")
                      .attr("class", "axis")
                      .call(bar_combos_axis);


            // axis label
            d3.select("#bar_combos_axis")
              .append("text")
              .attr("class", "axis_label")
              .text("% of observations")
              .attr("x", 0)
              .attr("y", 0)
              .attr("transform", "translate(-35, 17) rotate(270)");
        });

        // bar chart of variable percentages
        d3.csv("freetrade_missing_data_by_variable.csv", (d) => {

            // first item in array is object of variable names and % values
            const var_percents = d[0];

            const col_names = Object.keys(var_percents);

            cyScale.domain(col_names);
            barScale.domain([0, d3.max(Object.values(var_percents))]);

            bar_elements.selectAll("rect")
                        .data(col_names)
                        .enter()
                        .append("rect")
                        .attr("class", "bar_absent")
                        .attr("x", 0)
                        .attr("y", (d) => cyScale(d) - 10)
                        .attr("height", 20)
                        .attr("width", (d) => barScale(var_percents[d]));

            // bar chart axis
            bar_elements.append("g")
                        .attr("id", "bar_vars_axis")
                        .attr("class", "axis")
                        .attr("transform", "translate(0, " + dot_plot_dy + ")")
                        .call(bar_element_axis);

            // axis label
            d3.select("#bar_vars_axis")
              .append("text")
              .attr("class", "axis_label")
              .text("% missing")
              .attr("x", 0)
              .attr("y", 0)
              .attr("transform", "translate(47, 30)");
        });
    </script>
</body>

## missing_data_viz.R

library(Amelia)
library(magrittr)
library(plyr)
library(dplyr)

data(freetrade)

# % missing by variable
n_NA <- plyr::colwise(function(x) { is.na(x) %>% sum() })
(n_NA(freetrade) / nrow(freetrade)) %>% write.csv("freetrade_missing_data_by_variable.csv", row.names = FALSE)

# count (freq) and % of total for each combination of missing data elements
# across all observations
freetrade %>%
     is.na() %>%
     not() %>%
     plyr::count() %>%
     dplyr::mutate(percent = freq/sum(freq)) %>%
     dplyr::arrange(desc(percent)) %>%
     write.csv("freetrade_missing_data_combos.csv", row.names = FALSE)


## preview.png

      
    Raw
  

              preview.png
            
          
## thumbnail.png

      
    Raw
  

              thumbnail.png
	year	country	tariff	polity	pop	gdp.pc	intresmi	signed	fiveop	usheg
	0	0	0.339181286549708	0.0116959064327485	0	0	0.0760233918128655	0.0175438596491228	0.105263157894737	0
x.year	x.country	x.tariff	x.polity	x.pop	x.gdp.pc	x.intresmi	x.signed	x.fiveop	x.usheg	freq	percent
TRUE	TRUE	TRUE	TRUE	TRUE	TRUE	TRUE	TRUE	TRUE	TRUE	96	0.56140350877193
TRUE	TRUE	FALSE	TRUE	TRUE	TRUE	TRUE	TRUE	TRUE	TRUE	52	0.304093567251462
TRUE	TRUE	TRUE	TRUE	TRUE	TRUE	FALSE	TRUE	FALSE	TRUE	9	0.0526315789473684
TRUE	TRUE	TRUE	TRUE	TRUE	TRUE	TRUE	TRUE	FALSE	TRUE	5	0.0292397660818713
TRUE	TRUE	FALSE	TRUE	TRUE	TRUE	FALSE	TRUE	FALSE	TRUE	4	0.0233918128654971
TRUE	TRUE	FALSE	TRUE	TRUE	TRUE	TRUE	FALSE	TRUE	TRUE	2	0.0116959064327485
TRUE	TRUE	TRUE	FALSE	TRUE	TRUE	TRUE	TRUE	TRUE	TRUE	2	0.0116959064327485
TRUE	TRUE	TRUE	TRUE	TRUE	TRUE	TRUE	FALSE	TRUE	TRUE	1	0.00584795321637427
	<!DOCTYPE html>
	<meta charset="utf-8">
	<style>

	text {
	font-family: sans-serif;
	}

	text.var_labels {
	font-size: 0.75em;
	dominant-baseline: middle;
	text-anchor: end;
	}

	circle.absent {
	fill: #3f51b5;
	}

	circle.present {
	stroke: #1a1a1a;
	fill: #FFFFFF;
	}

	.bar_absent {
	stroke: #3f51b5;
	fill: #3f51b5;
	}

	.bar_combo {
	stroke: #1a1a1a;
	fill: #999999;
	}

	.axis {
	opacity: 0.8;
	}

	.axis_label {
	fill: #000000;
	}

	</style>
	<body>

	<div id="block"></div>

	<script src="https://d3js.org/d3.v4.min.js"></script>

	<script type="text/javascript">

	// variable labels

	const var_labels = ["Year",
	"Country",
	"Tariff rate",
	"Polity IV score",
	"Population",
	"GDP",
	"International reserves",
	"IMF agreement",
	"Financial openness",
	"US hegemony"];

	// dimensions

	const svg_dx = 800,
	svg_dy = 700;

	const dot_plot_dx = 200,
	dot_plot_dy = 300;
	margin = { top: dot_plot_dy * 0.05,
	bottom: dot_plot_dy * 0.05,
	left: dot_plot_dx * 0.10,
	right: dot_plot_dx * 0.10 };

	// scales

	let cxScale = d3.scalePoint()
	.range([margin.left, dot_plot_dx - margin.right]);

	let cyScale = d3.scalePoint()
	.range([margin.bottom, dot_plot_dy - margin.top]);

	let barScale = d3.scaleLinear()
	.range([0, 100]);

	// axes

	const percentFormat = d3.format(".0%");

	let bar_combos_axis = d3.axisLeft(barScale)
	.ticks(5)
	.tickFormat(percentFormat);

	let bar_element_axis = d3.axisBottom(barScale)
	.ticks(4)
	.tickFormat(percentFormat);

	// organize layout

	let svg = d3.select("#block")
	.append("svg")
	.attr("height", svg_dy)
	.attr("width", svg_dx);

	let circle_combos = svg.append("g")
	.attr("transform", "translate(400, 125)");

	let bar_combos = svg.append("g")
	.attr("transform", "translate(400, 15)");

	let bar_elements = svg.append("g")
	.attr("transform", "translate(600, 125)");

	d3.csv("freetrade_missing_data_combos.csv", (d) => {

	// array of variable names excluding summary stats
	const col_names = d.columns.filter((col) => col != "freq")
	.filter((col) => col != "percent");

	cyScale.domain(col_names);

	// data excluding array of column names appended by d3.csv()
	const d_wo_cols = d.filter((datum) => typeof(datum) != 'array');

	// combos across x-position
	cxScale.domain(d3.range(d_wo_cols.length));

	let combo = circle_combos.selectAll("g")
	.data(d_wo_cols)
	.enter()
	.append("g");

	// circle per combo per variable (column name)
	combo.each(function(d, i) {

	d3.select(this)
	.selectAll("circle")
	.data(col_names)
	.enter()
	.append("circle")
	.attr("cx", () => cxScale(i)) // combos spread across x
	.attr("cy", (col_name) => cyScale(col_name)) // variables spread across y
	.attr("r", 7)
	.attr("class", (col_name) => d_wo_cols[i][col_name] == "TRUE" ? "present" : "absent");
	});

	// variable labels
	circle_combos.append("g")
	.selectAll("text")
	.data(var_labels)
	.enter()
	.append("text")
	.attr("class", "var_labels")
	.text((d) => d)
	.attr("y", (d, i) => cyScale(col_names[i]));

	// bar chart of combo percentages
	barScale.domain([d3.max(d_wo_cols, (d) => +d.percent), 0]);

	bar_combos.selectAll("rect")
	.data(d_wo_cols)
	.enter()
	.append("rect")
	.attr("class", "bar_combo")
	.attr("x", (d, i) => cxScale(i) - 10)
	.attr("y", (d) => barScale(+d.percent))
	.attr("height", (d) => d3.max(barScale.range()) - barScale(+d.percent))
	.attr("width", 20);

	// bar chart axis
	bar_combos.append("g")
	.attr("id", "bar_combos_axis")
	.attr("class", "axis")
	.call(bar_combos_axis);


	// axis label
	d3.select("#bar_combos_axis")
	.append("text")
	.attr("class", "axis_label")
	.text("% of observations")
	.attr("x", 0)
	.attr("y", 0)
	.attr("transform", "translate(-35, 17) rotate(270)");
	});

	// bar chart of variable percentages
	d3.csv("freetrade_missing_data_by_variable.csv", (d) => {

	// first item in array is object of variable names and % values
	const var_percents = d[0];

	const col_names = Object.keys(var_percents);

	cyScale.domain(col_names);
	barScale.domain([0, d3.max(Object.values(var_percents))]);

	bar_elements.selectAll("rect")
	.data(col_names)
	.enter()
	.append("rect")
	.attr("class", "bar_absent")
	.attr("x", 0)
	.attr("y", (d) => cyScale(d) - 10)
	.attr("height", 20)
	.attr("width", (d) => barScale(var_percents[d]));

	// bar chart axis
	bar_elements.append("g")
	.attr("id", "bar_vars_axis")
	.attr("class", "axis")
	.attr("transform", "translate(0, " + dot_plot_dy + ")")
	.call(bar_element_axis);

	// axis label
	d3.select("#bar_vars_axis")
	.append("text")
	.attr("class", "axis_label")
	.text("% missing")
	.attr("x", 0)
	.attr("y", 0)
	.attr("transform", "translate(47, 30)");
	});
	</script>
	</body>

	library(Amelia)
	library(magrittr)
	library(plyr)
	library(dplyr)

	data(freetrade)

	# % missing by variable
	n_NA <- plyr::colwise(function(x) { is.na(x) %>% sum() })
	(n_NA(freetrade) / nrow(freetrade)) %>% write.csv("freetrade_missing_data_by_variable.csv", row.names = FALSE)

	# count (freq) and % of total for each combination of missing data elements
	# across all observations
	freetrade %>%
	is.na() %>%
	not() %>%
	plyr::count() %>%
	dplyr::mutate(percent = freq/sum(freq)) %>%
	dplyr::arrange(desc(percent)) %>%
	write.csv("freetrade_missing_data_combos.csv", row.names = FALSE)