Skip to content

Instantly share code, notes, and snippets.

@ikashnitsky
Last active November 8, 2019 20:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ikashnitsky/2800295e304b4858be553432de4a0d11 to your computer and use it in GitHub Desktop.
Save ikashnitsky/2800295e304b4858be553432de4a0d11 to your computer and use it in GitHub Desktop.
Simple rules and tricks to improve dataviz communication | slides: https://ikashnitsky.github.io/share/1911-prc20-dataviz/slides.html#/
#===============================================================================
# 2019-11-08 PRC dataviz workshop
# Simple rules and tricks to improve dataviz communication
# slides: https://ikashnitsky.github.io/share/1911-prc20-dataviz/slides.html#/
# Ilya Kashnitsky, ilya.kashnitsky@gmail.com
#===============================================================================
library(tidyverse)
library(magrittr)
# pipes
# https://twitter.com/andrewheiss/status/1173743447171354624
# https://twitter.com/dmi3k/status/1191824875842879489
# RULE 0 -- DO VISUALIZE YOUR DATA ---------------------------------------
# https://twitter.com/JustinMatejka/status/859075295059562498
# RULE 1 -- text should be horizontal -------------------------------------
# Initial post that I read on the topic
# http://www.b-eye-network.com/view/2468
# Post by Griffith Feeney
# http://demographer.com/dsitl/08-cleveland-dot-plots
# example of a nice usage here
# https://twitter.com/jburnmurdoch/status/1163429037613703175
# and here
# https://barcanumbers.wordpress.com/2018/12/06/who-are-the-best-finishers-in-contemporary-football
# My post
# https://ikashnitsky.github.io/2019/dotplot/
# We are going to improve Figure 2 from
# Janssen, F., & de Beer, J. (2019). The timing of the transition from mortality compression to mortality delay in Europe, Japan and the United States. Genus, 75(1), 10.
# https://doi.org/10/ggbtpx
jb <- tibble::tribble(
~period, ~region, ~sex, ~delay, ~compression,
"1950-1979", "JP", "m", "6.35", "9.28",
"1950-1979", "JP", "f", "6.61", "10.91",
"1950-1979", "US", "m", "2.76", "1.84",
"1950-1979", "US", "f", "4.93", "1.76",
"1950-1979", "NEur", "m", "0.15", "3.90",
"1950-1979", "NEur", "f", "3.78", "3.18",
"1950-1979", "WEur", "m", "1.33", "3.77",
"1950-1979", "WEur", "f", "4.15", "3.27",
"1950-1979", "SEur", "m", "1.28", "8.85",
"1950-1979", "SEur", "f", "3.68", "8.36",
"1950-1979", "EEur", "m", "-1.21", "4.20",
"1950-1979", "EEur", "f", "1.35", "4.38",
"1980-2014", "JP", "m", "6.70", "0.47",
"1980-2014", "JP", "f", "7.70", "0.28",
"1980-2014", "US", "m", "8.17", "-1.23",
"1980-2014", "US", "f", "3.56", "0.47",
"1980-2014", "NEur", "m", "8.65", "-0.32",
"1980-2014", "NEur", "f", "5.25", "0.69",
"1980-2014", "WEur", "m", "8.61", "0.36",
"1980-2014", "WEur", "f", "5.71", "0.80",
"1980-2014", "SEur", "m", "7.36", "1.78",
"1980-2014", "SEur", "f", "6.13", "1.54",
"1980-2014", "EEur", "m", "0.35", "4.93",
"1980-2014", "EEur", "f", "4.38", "0.86"
) %>%
# pivot_ family of functions is the new way to do reshaping
# https://www.tidyverse.org/blog/2019/09/tidyr-1-0-0/
# also, have a look at this tutorial:
# https://twitter.com/yutannihilat_en/status/1172767005784457218
pivot_longer(names_to = "component", cols = delay:compression) %>%
mutate(value = value %>% as.numeric()) %>%
mutate_if(is_character, as_factor)
# replicate the figure
jb %>%
mutate(region_sex = paste(region, sex, sep = "_") %>%
as_factor()) %>%
ggplot(aes(region_sex, value, fill = component))+
geom_col()+
facet_grid(~period)
# almost -- delay should be mapped first
# fix the ordering of stacked bar
jb %>%
mutate(region_sex = paste(region, sex, sep = "_") %>%
as_factor()) %>%
ggplot(aes(region_sex, value, fill = component))+
geom_col(position = position_stack(reverse = TRUE))+
facet_grid(~period)
# flip the coordinates
jb %>%
mutate(region_sex = paste(region, sex, sep = "_") %>%
as_factor() %>%
fct_rev()) %>% # ggplot goes bottom-up on y axis
ggplot(aes(region_sex, value, fill = component))+
geom_col(position = position_stack(reverse = TRUE))+
coord_flip()+
facet_grid(~period)
# the legend eats up space
jb %>%
mutate(region_sex = paste(region, sex, sep = "_") %>%
as_factor() %>%
fct_rev()) %>%
ggplot(aes(region_sex, value, fill = component))+
geom_col(position = position_stack(reverse = TRUE))+
coord_flip()+
facet_grid(~period)+
theme(legend.position = "bottom") # here!
# more faceting options to explore data dimensions
# focus on region comparison
jb %>%
mutate(region = region %>% fct_rev()) %>%
ggplot(aes(region, value, fill = component))+
geom_col(position = position_stack(reverse = TRUE))+
coord_flip()+
facet_grid(sex~period)+ # <-
theme(legend.position = "bottom")
# focus on period comparison
jb %>%
mutate(region = region %>% fct_rev()) %>%
ggplot(aes(period, value, fill = component))+
geom_col(position = position_stack(reverse = TRUE))+
coord_flip()+
facet_grid(region~sex)+ # <-
theme(legend.position = "bottom")
# focus on sex comparison -- as do the authors
jb %>%
mutate(region = region %>% fct_rev()) %>%
ggplot(aes(sex, value, fill = component))+
geom_col(position = position_stack(reverse = TRUE))+
coord_flip()+
facet_grid(region~period)+ # <-
theme(legend.position = "bottom")
# unlike mapping over y axis, faceting goes top-bottom
jb %>%
# mutate(region = region %>% fct_rev()) %>% # we don't need this one
ggplot(aes(sex, value, fill = component))+
geom_col(position = position_stack(reverse = TRUE))+
coord_flip()+
facet_grid(region~period)+ # <-
theme(legend.position = "bottom")
# change facet labels -- they are called strips in ggplot
jb %>%
ggplot(aes(sex, value, fill = component))+
geom_col(position = position_stack(reverse = TRUE))+
coord_flip()+
facet_grid(region~period)+
theme(
legend.position = "bottom",
strip.text.y = element_text(angle = 0) # <-
)
# we now have enough space to write out full names !!
jb_names <- jb %>%
mutate(
region = region %>% as_factor() %>%
lvls_revalue(
c(
"Japan", "Unites States", "Northern Europe",
"Western Europe", "Southern Europe", "Eastern Europe"
)
),
sex = sex %>% as_factor() %>% lvls_revalue(c("Males", "Females"))
)
jb_names %>%
ggplot(aes(sex, value, fill = component))+
geom_col(position = position_stack(reverse = TRUE))+
coord_flip()+
facet_grid(region~period)+
theme(
legend.position = "bottom",
strip.text.y = element_text(angle = 0)
)
# final touch -- some themeing
# get a nice font, I love Roboto family developed by Google
# Roboto Condensed is nice because it's narrow but still very readable
library(hrbrthemes); import_roboto_condensed()
jb_names %>%
ggplot(aes(sex, value, fill = component))+
geom_col(position = position_stack(reverse = TRUE))+
geom_hline(yintercept = 0)+ # add vetical line at 0
coord_flip()+
facet_grid(region~period)+
theme_minimal(base_family = font_rc)+ # change theme & nice font
theme(
legend.position = "bottom",
strip.text.y = element_text(angle = 0)
)+
scale_fill_manual(values = c("black", "gray"))+
labs(
title = "Increase in life expectancy at birth",
subtitle = "Decomposition into the effects of compression and delay",
fill = NULL,
x = NULL, # attention! we used coor_flip
y = "Increase in life expectancy at birth over the period"
)
# RULE 2 -- as large as possible text for presentations -------------------
gg_jb <- last_plot()
ggsave(
filename = "out.png"
)
# control width and height, in ggplot they are given in inches
ggsave(
filename = "out.png",
width = 7, height = 5
)
# for presentation we need to increase all the text elements
ggsave("out.png", width = 4, height = 3)
# not nice
# let's increase the text size in the plot
gg_jb +
theme_minimal(base_size = 16)
# oups, now we've lost additional theme options
gg_jb +
theme_minimal(base_family = font_rc, base_size = 16)+
theme(
legend.position = "bottom",
strip.text.y = element_text(angle = 0)
)
ggsave("out.png", width = 7, height = 5)
# we can change specific elements separately in theme()
gg_jb +
theme_minimal(base_family = font_rc, base_size = 16)+
theme(
legend.position = "bottom",
strip.text.y = element_text(angle = 0, size = 20, face = 2, hjust = 0)
)
ggsave("out.png", width = 7, height = 5)
# let's change the title
gg_jb +
theme_minimal(base_family = font_rc, base_size = 16)+
theme(
legend.position = "bottom",
strip.text.y = element_text(angle = 0, size = 20, face = 2, hjust = 0),
plot.title = element_text(size = 30, family = "Roboto Slab")
)
ggsave("out.png", width = 7, height = 5)
# too big, decrease a bit
gg_jb +
theme_minimal(base_family = font_rc, base_size = 16)+
theme(
legend.position = "bottom",
strip.text.y = element_text(angle = 0, size = 20, face = 2, hjust = 0),
plot.title = element_text(size = 24, family = "Roboto Slab")
)
ggsave("out.png", width = 7, height = 5)
# if we need a smaller size file, we can reduce dpi parameter
# digits per inch, the default is 300
ggsave("out.png", width = 7, height = 5, dpi = 100)
# for better PNG quality and optimization use Cairo graphic device
ggsave("out.png", width = 7, height = 5, dpi = 100, type = "cairo")
# for the best quality use PDF export
ggsave("out.pdf", width = 7, height = 5)
# oups, problems with exporting fonts, again use Cairo graphics
ggsave("out.pdf", width = 7, height = 5, device = cairo_pdf)
# BONUS: own ggsave functions -----------------------------------------
# one can define own convenience functions for better ggsave options
ggsave_png <- function(
gg = ggplot2::last_plot(),
path = "out.png",
ar = 0.5625,
w = 7,
h = w*ar,
type = "cairo",
dpi = 300
){
ggsave(filename = path, plot = gg, width = w, height = h, type = type, dpi = dpi)
}
ggsave_pdf <- function(
gg = ggplot2::last_plot(),
path = "out.pdf",
ar = 0.5625, # this default is to match 16/9 screens
w = 7,
h = w*ar,
device = cairo_pdf
){
ggsave(filename = path, plot = gg, width = w, height = h, device = device)
}
# test the functions
gg_jb +
theme_minimal(base_family = font_rc, base_size = 16)+
theme(
legend.position = "bottom",
strip.text.y = element_text(angle = 0, size = 20, face = 2, hjust = 0),
plot.title = element_text(size = 24, family = "Roboto Slab")
)
out <- last_plot()
out %>% ggsave_png()
out %>% ggsave_png(ar = 1, dpi = 100)
out %>% ggsave_pdf(w = 7, h = 5)
# RULE 3 -- mind colors, especially regarding colorblind friendliness --------
# colors in ggplot
# Parameter "color" changes the color of lines and points
# Parameter "fill" changes the color of shapes (see the violin example)
# The way you override ggplot"s defaults is to use functions
# scale_color_[...] or scale_fill_[...] (use TAB to see options)
# I really recommend viridis colors as your daily basis
# NOTE: with viridis you need to know if you variable is continuous or categorical
# The video on viridis https://youtu.be/xAoljeRJ3lU
# colorblind friendliness
# https://www.toptal.com/designers/colorfilter?orig_uri=https://infographic.statista.com/normal/chartoftheday_13680_the_legal_status_of_abortion_worldwide_n.jpg&process_type=protan
# paletteer !!!
library(paletteer)
# https://github.com/EmilHvitfeldt/r-color-palettes
# low level manipulations with color palettes
library(prismatic)
# https://github.com/EmilHvitfeldt/prismatic
# random minimal tileplot
crossing(
x = LETTERS %>% extract(1:4),
y = 5:8
) %>%
mutate(z = runif(16)) %>%
ggplot(aes(x, y, fill = z))+
geom_tile()
gg <- ggplot2::last_plot()
# the standard safe choice to go should be viridis
gg +
scale_fill_viridis_c(
"RANDOM",
# begin = .2, end = .9, # try limiting parts of the scale
option = "B"
)
# all the R palettes are conveniently available with ggplot2 via paletteer
gg + scale_fill_paletteer_c("grDevices", "rainbow") # bad old rainbow
gg + scale_fill_paletteer_c(scico, lajolla) # note you can either quote or not
# now let's produce a similar plot with categorical colors
crossing(
x = LETTERS %>% extract(1:4),
y = 5:8
) %>%
mutate(
z = runif(16) %>%
cut_width(1/4)
) %>%
ggplot(aes(x, y, fill = z))+
geom_tile()
ggd <- last_plot()
# for a discrete scale we select a discrete palette
ggd + scale_fill_paletteer_d(awtools, ppalette)
# the good old RColorBrewer
ggd + scale_fill_paletteer_d(RColorBrewer, Pastel1)
# one of the classical colorblind friendly palettes
ggd + scale_fill_paletteer_d(RColorBrewer, Accent)
# if unsure check colorblind and print friendliness using prismatic
RColorBrewer::brewer.pal(8, "Accent") %>% color() # WOW! look at the console!
# how a re-green colorblind person will see the palette
RColorBrewer::brewer.pal(8, "Accent") %>%
color() %>% clr_deutan()
# a base plotting method for the color objects
RColorBrewer::brewer.pal(8, "Accent") %>%
color() %>% clr_deutan() %>% plot
# print priendliness
RColorBrewer::brewer.pal(8, "Accent") %>%
color() %>% clr_grayscale() %>% plot # it seems, up to 6 distinct colors
# Genger colors
# https://blog.datawrapper.de/gendercolor/
# https://twitter.com/d_alburez/status/1184120385899581440
# themes
# check out:
# https://evamaerey.github.io/little_flipbooks_library/taming_themes_in_ggplot/taming_ggplot_themes.html
library(ggthemes)
gg + theme_minimal()
gg + theme_bw()
gg + theme_light()
gg + theme_excel() # ugly, isn"t it?
gg + theme_few() # one of my favorites
gg + theme_economist()
gg + theme_wsj()
gg + theme_fivethirtyeight() # I love this one
gg + theme_solarized()
gg + theme_dark()
# ... feel free to test them all))
esquisse::esquisser()
# turn all themes dark
# https://github.com/nsgrantham/ggdark
library(ggdark)
gg_jb +
dark_theme_minimal(base_family = font_rc, base_size = 16)+
scale_fill_viridis_d(begin = .4)+
theme(
legend.position = "bottom",
strip.text.y = element_text(angle = 0, size = 20, face = 2, hjust = 0),
plot.title = element_text(size = 24, family = "Roboto Slab")
)
# scale _ identity
# https://twitter.com/ikashnitsky/status/937786580231696384
n <- 100
tibble(x = runif(n),
y = runif(n),
size = runif(n, min = 4, max = 20)) %>%
ggplot(aes(x, y, size = size)) +
geom_point(color = "white", pch = 42) +
scale_size_identity() +
coord_cartesian(c(0, 1), c(0, 1)) +
theme_void() +
theme(
panel.background = element_rect(fill = "black"),
plot.background = element_rect(fill = "black")
)
# generate bubbles of random color and size
n <- sample(20:50, 3)
tibble(
x = runif(n),
y = runif(n),
size = runif(n, min = 3, max = 20),
color = rgb(runif(n), runif(n), runif(n))
) %>%
ggplot(aes(x, y, size = size, color = color)) +
geom_point() +
scale_color_identity() +
scale_size_identity() +
coord_cartesian(c(0, 1), c(0, 1)) +
theme_void()
# RULE 4 -- highlight what's important for the story -------------------------
# a great example here
# https://barcanumbers.wordpress.com/2018/12/06/who-are-the-best-finishers-in-contemporary-football
library(gapminder)
gapminder %>%
ungroup() %>%
ggplot(aes(x = year, y = lifeExp, color = continent, group = country)) +
geom_path() +
theme_minimal(base_family = "mono")
# assume we are talking about South Africa
gapminder %>%
ungroup() %>%
ggplot(aes(x = year, y = lifeExp, color = continent, group = country)) +
geom_path() +
scale_color_grey()+
geom_path(data = . %>% filter(country == "South Africa"),
color = "red", size = 2)+
annotate("text", x = 1990, y = 65, label = "South Africa",
size = 7, color = "red", fontface = 2, family = "mono")+
theme_minimal(base_family = "mono")
# BUNUS -- animation ------------------------------------------------------
library(gganimate)
# one animation worth thousands (not words) arguments
# https://twitter.com/mikeleeco/status/876792944396730368
# the power of moving charts
# https://twitter.com/jburnmurdoch/status/1107552367795412992?lang=en
# https://www.ft.com/video/83703ffe-cd5c-4591-9b4f-a3c087aa6d19
gapminder %>%
select(1:4) %>%
group_by(continent, year) %>%
summarise(avg_e0 = lifeExp %>% mean) %>%
ungroup() %>%
ggplot(aes(x = year, y = avg_e0,
color = continent)) +
geom_path() +
theme_minimal(base_family = "mono")
p <- ggplot2::last_plot() # note gganimate re-writes last_plot()
ani <- p +
geom_point()+
transition_reveal(year)+
ease_aes("cubic-in-out")
# define the number of data points
nfr <- gapminder %>% pull(year) %>% unique() %>% length()
animate(
ani,
nframes = nfr * 3,
width = 500, height = 400,
res = 100,
start_pause = 3, end_pause = 10
)
anim_save("test-anim.gif")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment