Skip to content

Instantly share code, notes, and snippets.

@cmparlettpelleriti
Last active April 19, 2024 10:45
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cmparlettpelleriti/e893bf86da03e15f0639c7e994649396 to your computer and use it in GitHub Desktop.
Save cmparlettpelleriti/e893bf86da03e15f0639c7e994649396 to your computer and use it in GitHub Desktop.
Show students the relationship between Eigendecomp of Cor/Cov and the % variance explained for PCs
library(tidyverse)
library(MASS)
library(patchwork)
cbPalette <- c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
# generate data with given cor matrix
a <- 0.9
s1 <- matrix(c(1,a,
a,1), ncol = 2)
# data
df <- data.frame(mvrnorm(n = 1000, mu = c(0,0), Sigma = s1))
# find eigenvalues
pca1 <- eigen(cor(df))$values
# print matrix, eigenvals, and % var
print("Correlation Matrix: ")
print(s1)
print("Eigenvalues: ")
print(pca1)
print("% Variance Explained: ")
print(pca1/sum(pca1))
#---eigen----
# unit square
df <- data.frame(x = c(0,0,1,1),
y = c(0,1,0,1))
# plot unit squre
p1 <- ggplot(df, aes(x = x, y = y)) +
coord_fixed() +
geom_rect(data=df,
mapping=aes(xmin=0, xmax=1,
ymin=0, ymax=1),
color= cbPalette[2],
fill = cbPalette[2],
alpha=0.05) +
geom_point(size = 3) +
theme_minimal() +
theme(panel.grid.minor = element_blank()) +
lims(x = c(-0.10,3), y = c(-0.10,3)) +
labs(x = "", y = "",
title = "Unit Square") +
annotate(geom = "text", x = -0.10, y = -0.10, label = "(0,0)") +
annotate(geom = "text", x = 1.10, y = 1.10, label = "(1,1)") +
annotate(geom = "text", x = -0.10, y = 1.10, label = "(0,1)") +
annotate(geom = "text", x = 1.10, y = -0.10, label = "(1,0)")
# multiplied------------
# sanity check of multiplications
multis <- list(s1 %*% c(0,0), s1 %*% c(0,1), s1 %*% c(1,1),s1 %*% c(1,0))
print("Four Corners Transformed: ")
print(multis)
# new paralellogram df
df <- data.frame(x = c(0,a,(1 + a),1),
y = c(0,1,(1 + a),a))
# plot paralellogram
p2 <- ggplot(df, aes(x = x, y = y)) +
geom_rect(data=df,
mapping=aes(xmin=0, xmax=1,
ymin=0, ymax=1),
color= cbPalette[1],
fill = cbPalette[1],
alpha=0.05,
linetype = "dashed") +
coord_fixed() +
geom_polygon(
color= cbPalette[2],
fill = cbPalette[2],
alpha=0.2) +
geom_point(size = 3) +
theme_minimal() +
theme(panel.grid.minor = element_blank()) +
lims(x = c(-0.10,3), y = c(-0.10,3)) +
labs(x = "", y = "",
title = paste0("Transformed Paralellogram ", "r = ",a )) +
annotate(geom = "text", x = -0.10, y = -0.10, label = "(0,0)") +
annotate(geom = "text", x = 1 + a + 0.1, y = 1 + a + 0.1, label = paste0("(", 1 + a, ",", 1 + a, ")")) +
annotate(geom = "text", x = a - 0.1, y = 1.10, label = paste0("(", a, ",", 1 ,")")) +
annotate(geom = "text", x = 1.10, y = a- 0.1, label = paste0("(", 1 + a, ",", a, ")"))
# plot both together
p1 + p2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment