Skip to content

Instantly share code, notes, and snippets.

@clauswilke
Last active May 12, 2020 14:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save clauswilke/b176a046320edd94c62499515a153626 to your computer and use it in GitHub Desktop.
Save clauswilke/b176a046320edd94c62499515a153626 to your computer and use it in GitHub Desktop.
Tidyverse approach to calculating and plotting correlation matrices
library(dplyr)
library(tidyr)
library(purrr)
library(ggplot2)
library(colorspace)
# data to analyze
data <- select(MASS::fgl, -type, -RI, -Si)
# cluster
cr <- cor(as.matrix(data))
clust <- hclust(as.dist(1-cr), method="average")
levels <- clust$labels[clust$order]
# turn data into nested table
data_nested <- data %>%
mutate(i = 1:n()) %>%
gather(variable, value, -i) %>%
group_by(variable) %>%
nest() %>%
mutate(variable = factor(variable, levels = levels)) %>%
arrange(variable)
# calculate correlation data
# this may look overly complicated but the point here is to get not just the matrix
# but also the p value, statistic, parameter, etc.
m <- t(combn(data_nested$variable, 2))
cor_data <- tibble(X1 = m[,1], X2 = m[,2]) %>%
left_join(rename(data_nested, X1 = variable, data1 = data)) %>%
left_join(rename(data_nested, X2 = variable, data2 = data)) %>%
mutate(
cor = map2(data1, data2, ~cor.test(.x$value, .y$value)),
estimate = map_dbl(cor, ~.x$estimate),
p.value = map_dbl(cor, ~.x$p.value),
statistic = map_dbl(cor, ~.x$statistic),
parameter = map_dbl(cor, ~.x$parameter),
se = sqrt((1 - estimate^2)/parameter)
) %>%
select(-data1, -data2, -cor)
# plot
ggplot(cor_data, aes(X1, X2, fill = estimate)) +
geom_tile(color = "white", size = 1) +
scale_x_discrete(position = "top", name = NULL, expand = c(0, 0)) +
scale_y_discrete(name = NULL, expand = c(0, 0)) +
scale_fill_continuous_carto(
palette = "Earth", rev = TRUE,
limits = c(-.5, .5), breaks = c(-.5, 0, .5),
name = "correlation",
guide = guide_colorbar(
direction = "horizontal",
label.position = "bottom",
title.position = "top",
barwidth = grid::unit(140, "pt"),
barheight = grid::unit(17.5, "pt")
)
) +
coord_fixed() +
theme_classic() +
theme(
axis.line = element_blank(),
axis.ticks = element_blank(),
axis.ticks.length = grid::unit(3, "pt"),
legend.position = c(.97, .0),
legend.justification = c(1, 0),
legend.title.align = 0.5
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment