Last active
November 5, 2016 17:47
-
-
Save tomhopper/d326df1404a1a9773f9f to your computer and use it in GitHub Desktop.
Examples of aligning against a common baseline, using Cleveland-style dot plots
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Response to a post at Storytelling with Data: | |
# \url{http://www.storytellingwithdata.com/blog/orytellingwithdata.com/2015/07/align-against-common-baseline.html} | |
# Demonstrates | |
# * Cleveland-style dot plots (improvement over pie and bar charts) | |
# * Sorting categorical data by a numerical variable with more than one grouping variable | |
# * Highlighting differences between groups graphically | |
library(ggplot2) | |
library(scales) | |
# Create the base data | |
df <- data.frame(group = rep(c("African-American","White"), each = 6), | |
cause = rep(c("Homicide","Intoxication","Accident","Unknown","Suicide","Natural causes"), times = 2), | |
value = c(3.4, 0.8, 0.4, 0.4, 0.3, 0.3, 1.8, 0.3, 0.1, 0.1, 0.5, 0.2)) | |
temp_df <- df[df$group == "African-American",] | |
levels_order <- temp_df[order(temp_df$value, decreasing = FALSE),]$cause | |
df$cause <- factor(df$cause, levels = levels_order) | |
# Create a dotplot version of the improved bar chart | |
ggplot(df) + | |
geom_point(aes(x = cause, y = value, colour = group, shape = group)) + | |
scale_shape_manual(values = c(16, 4)) + | |
ylab("Deaths per 100,000 arrests by race in the U.S., 2003 - 09") + | |
ggtitle("Being Arrested is Deadlier for African-Americans") + | |
coord_flip() + | |
theme_minimal() + | |
theme(axis.title.y = element_blank(), | |
legend.background = element_rect(fill = alpha("white", 0.75), colour = "white"), | |
legend.position = c(1,0), | |
legend.justification = c(1,0)) | |
#ggsave(filename = "~/Dropbox/Analyses/Align_Against_Common_Baseline/common_baseline_dot_plot.png", width = 6, height = 3, units = "in") | |
# Since the story is about the difference, create a new data frame with percent difference | |
# using (a - b) / b. This gives equal weight when a > b and when b > a (as opposed to, | |
# e.g., a / b, which crams all b > a between 0 and 1 while all a > b range > 1...inf. | |
df_diff <- data.frame(cause = df$cause[1:6], | |
value = (df$value[1:6] - df$value[7:12])/df$value[7:12]) | |
levels_order <- df[order(df_diff$value, decreasing = FALSE),]$cause | |
df_diff$cause <- factor(df_diff$cause, levels = levels_order) | |
# Plot the differences, again using a Clevelend-style dot plot. | |
# Some work is needed on the colour gradient to ensure negative differences stand out from positive ones--the | |
# transition from green to red is not abrupt enough. | |
ggplot(df_diff) + | |
geom_point(aes(x = cause, y = value, colour = value)) + | |
scale_y_continuous(labels = percent) + | |
scale_color_gradient2(high = muted("red"), mid = "grey50", low = muted("green"), guide = "none") + | |
geom_hline(yintercept = 0, colour = "grey50") + | |
ggtitle("Being Arrested is Deadlier for African Americans") + | |
ylab("Percent increase in deaths per arrest for African- vs. White-Americans") + | |
coord_flip() + | |
theme_minimal() + | |
theme(axis.title.y = element_blank()) | |
#ggsave(filename = "~/Dropbox/Analyses/Align_Against_Common_Baseline/common_baseline_diff_plot.png", width = 6, height = 3, units = "in") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment