Skip to content

Instantly share code, notes, and snippets.

@trinker
Created February 16, 2018 05:51
Show Gist options
  • Save trinker/0260a9dfdd9531f9b90d9fad2f7b4b12 to your computer and use it in GitHub Desktop.
Save trinker/0260a9dfdd9531f9b90d9fad2f7b4b12 to your computer and use it in GitHub Desktop.
Likert ggplot2 Odd Number of Responses
###############################
## Plotting Likert Type Data ##
###############################
##------------------------------------------------------------------------
## Note: Plotting horizontal stacked bar plots in ggplot2 with Likert type
## data is a non-trivial task. Stacking is not well defined for mixed
## negative/positive values on a bar. This requires splitting the data
## set into two different parts (positive/negative), plotting each side
## separately, and filling the colors manually. This script adds complexity
## for neutral scales.
##------------------------------------------------------------------------
##=============
## Dependencies
##=============
pacman::p_load(wakefield, tidyverse, numform)
##=========================
## generate random data set
##=========================
lvls <- c(
"Strongly Agree", "Agree", "Somewhat Agree",
"Neutral", "Somewhat Disagree", "Disagree", "Strongly Disagree"
)
set.seed(10)
dat <- wakefield::r_data_frame(200,
is_first_gen = r_sample_binary(x = 0:1),
`Q1. It´s not known, if climate change is real` = likert_7(),
`Q2. In my opinion, the risks of climate change are exaggerated by activists` = likert_7(),
`Q3. Climate change is not as dangerous as it is claimed` = likert_7(),
`Q4. I'm convinced that we can handle climate change` = likert_7(),
`Q5. I'm not ok with this` = likert_7()
) %>%
tidyr::gather(Question, Response, -is_first_gen) %>%
dplyr::mutate(
Response = factor(sample(Response), levels = rev(lvls)),
Level = as.integer(Response)
)
##===================
## Summarize the Data
##===================
## Summarize the data into counts and proportion by group var (question)
sdat <- dat %>%
dplyr::count(Question, Response, Level, is_first_gen) %>%
dplyr::ungroup() %>%
dplyr::group_by(Question, is_first_gen) %>%
dplyr::mutate(
prop = n/sum(n),
Status = case_when(is_first_gen == 0 ~'Peers', TRUE ~ 'First Generation') %>%
factor(levels = c('Peers', 'First Generation'))
) %>%
ungroup()
##===============================
## Make the levels and colors key
##===============================
high <- '#03A89E' #'darkred'
low <- '#CD7F32' #'blue'
mid <- 'grey90'
levels_key <- data_frame(
Response = factor(levels(sdat[['Response']]), levels = levels(sdat[['Response']])),
Level = as.integer(Response)
) %>%
arrange(Level)
levels_key$type <- c(
rep('negative', floor(nrow(levels_key)/2)),
rep('neutral', nrow(levels_key) %% 2),
rep('positive', floor(nrow(levels_key)/2))
)
half_n_levels <- floor(nrow(levels_key)/2) + 1
levels_key$colors <- c(
colorRampPalette(c(low, mid))(half_n_levels)[-half_n_levels],
rep(mid, nrow(levels_key) %% 2),
colorRampPalette(c(mid, high))(half_n_levels)[-1]
)
levels_key$colors <- factor(levels_key$colors, levels = levels_key$colors)
## Add the levels info back onto the Response Key
sdat <- sdat %>%
dplyr::left_join(levels_key, by = c('Response', 'Level'))
## Split the data apart into negative and positive response types
positive <- sdat %>%
dplyr::filter(type %in% c('positive', 'neutral')) %>%
mutate(
colors = factor(colors, levels = rev(levels(colors))),
prop = case_when(type == 'neutral' ~ prop/2, TRUE ~ prop)
)
negative <- sdat %>%
dplyr::filter(type %in% c('neutral', 'negative')) %>%
dplyr::mutate(
prop = case_when(type == 'neutral' ~ prop/2, TRUE ~ prop),
prop = -1 * prop
)
## calculate negative & positive responses data
prop_dat_prime <- sdat %>%
#dplyr::filter(type != 'neutral') %>%
group_by(Question, Status, type) %>%
summarize(
n = sum(n),
) %>%
ungroup() %>%
group_by(Question, Status) %>%
mutate(
prop = n/sum(n),
) %>%
ungroup() %>%
group_by(type) %>%
mutate(
label = numform::f_prop2percent(prop, 0)
) %>%
split(.$type)
prop_dat <- lapply(c('positive', 'negative'), function(x){
y <- left_join(
prop_dat_prime[[x]],
prop_dat_prime[['neutral']] %>%
ungroup() %>%
mutate(prop2 = prop/2) %>%
select(-c(type, label, n, prop)),
by = c('Question', 'Status')
) %>%
mutate(prop = prop + prop2) %>%
select(-prop2)
})
prop_dat <- bind_rows(prop_dat) %>%
group_by(Question, Status, type, label) %>%
summarize(
n = sum(n),
) %>%
group_by(Question, Status) %>%
mutate(
prop = n/sum(n),
proploc = case_when(type == 'negative' ~ -1 * prop, TRUE ~ prop)
) %>%
ungroup() %>%
group_by(type) %>%
mutate(
textloc = 1.13 * max(prop) * sign(proploc)
) %>%
split(.$type)
prop_dat$neutral <- prop_dat_prime$neutral %>%
mutate(textloc = 0)
##=====================================
## Plot the horizontal stacked bar plot
##=====================================
ggplot() +
geom_bar(
data = positive,
aes(x = Status, y = prop, fill = colors),
position = "stack",
stat = "identity"
) +
geom_bar(
data = negative,
aes(x = Status, y = prop, fill = colors),
position = "stack",
stat = "identity"
) +
coord_flip() +
geom_hline(yintercept = 0, color = 'white', size = 1) +
guides(fill = guide_legend(reverse = TRUE)) +
scale_fill_identity(
labels = levels_key$Response,
breaks = levels_key$colors,
guide = "legend",
name = ''
) +
facet_wrap(~ Question, ncol = 1) +
geom_text(
data = prop_dat$negative,
aes(label = label, x = Status, y = textloc),
hjust = 0, color = 'grey60'
) +
geom_text(
data = prop_dat$positive,
aes(label = label, x = Status, y = textloc),
hjust = 1, color = 'grey60'
) +
geom_text(
data = prop_dat$neutral,
aes(label = label, x = Status, y = textloc),
hjust = .5, color = 'grey60'
) +
scale_y_continuous(expand = c(0, 0), limits = 1.02 * range(bind_rows(prop_dat)$textloc)) +
theme_bw() +
theme(
panel.grid = element_blank(),
axis.text.x = element_blank(),
axis.ticks = element_blank(),
strip.background = element_blank(),
strip.text = element_text(hjust = 0, face = 'bold', size = 11),
panel.border = element_rect(color = 'gray90', linetype = "dashed", fill = NA),
plot.title = element_text(color = '#734A12'),
plot.subtitle = element_text(color = '#734A12'),
plot.caption = element_text(color = 'grey40')
) +
labs(
x = NULL, y = NULL,
title = 'Question Answer by First Generation Status',
subtitle = 'Some other long winded explanation that might make one seem smarter',
caption = 'Note: Captions seem more academic'
)
@trinker
Copy link
Author

trinker commented Feb 16, 2018

image

@trinker
Copy link
Author

trinker commented Feb 16, 2018

Note that this only works for odd number of categories. For an even number see: https://gist.github.com/trinker/06548977f08d9dd92f47bd8a3c958d17

Probably not the best way to handle neutral. Likely shouls be split out like in the likert package so there's a reference point or the negative/positive portions as seen here:

image

Code based on: http://rnotr.com/likert/ggplot/barometer/likert-plots/

@jukicivan
Copy link

This is awesome! Thanks for the code!

I was trying to replicate this with my own data which doesn't have as much "negative type" responses. As a consequence, the "zero" or neutral part is pushed heavily to the left and the plot itself doesn't look as nice as here. Colors are overlapping percentages etc. While I like the plot, I would still try to replicate your figure (i.e., having percentages on the sides and in the middle which would facilitate readability). I would appreciate if you could comment on this.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment