Last active
October 8, 2019 21:56
-
-
Save turgeonmaxime/7dff8371c44b750a12090ab110812d7b to your computer and use it in GitHub Desktop.
Animation of geometry of PCA
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(gganimate) | |
library(mvtnorm) | |
library(rootSolve) | |
# Generate data | |
Sigma <- matrix(c(1, 0.5, | |
0.5, 1), | |
ncol = 2) | |
X <- rmvnorm(100, sigma = Sigma) | |
Pn <- solve(cov(X)) | |
# Create circle | |
n <- 100 | |
theta_vect <- seq(0, 2*pi, length.out = n) | |
circle <- data.frame(X = cos(theta_vect), | |
Y = sin(theta_vect)) | |
# Generate ellipses | |
evalues <- eigen(Pn, only.values = TRUE)$values | |
radii <- seq(sqrt(evalues[1]), sqrt(evalues[2]), | |
length.out = 50) | |
transf_mat <- solve(chol(Pn)) | |
data_ellipse <- purrr::map_df(radii, | |
function(rho) { | |
ellipse <- rho * cbind(cos(theta_vect), | |
sin(theta_vect)) %*% t(transf_mat) | |
data.frame( | |
rho = rho, | |
X = ellipse[,1], | |
Y = ellipse[,2] | |
) | |
}) | |
# Find intersection point between circle and ellipse | |
model <- function(vect, rho = 1) { | |
c(vect %*% Pn %*% vect - rho, | |
sum(vect^2) - 1) | |
} | |
data_vect <- purrr::map_df(radii, | |
function(rho) { | |
roots <- multiroot(model, start = c(1, 0), | |
rho = rho^2) | |
data.frame( | |
rho = rho, | |
X = roots$root[1], | |
Y = roots$root[2] | |
) | |
}) | |
# Compute PCs | |
pca <- prcomp(X) | |
data_pca <- data.frame( | |
X = pca$rotation[1,], | |
Y = pca$rotation[2,], | |
label = c("PC1", "PC2") | |
) | |
# Create animation | |
animation <- data_ellipse %>% | |
ggplot(aes(X, Y)) + | |
geom_path(data = circle) + | |
geom_path() + | |
geom_segment(data = data_vect, | |
aes(xend = X, yend = Y), | |
x = 0, y = 0) + | |
geom_segment(data = data_pca, | |
aes(xend = X, yend = Y, | |
colour = label), | |
x = 0, y = 0) + | |
scale_colour_discrete(name = "") + | |
theme_minimal() + | |
transition_states(rho) + | |
theme(legend.position = 'top') | |
animate(animation, nframes = 200, | |
fps = 50, rewind = TRUE) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
We can see how the principal components can be obtained by looking at the intersection between the unit circle and the family of ellipses defined by the inverse covariance matrix. When maximising the variance, we are minimising the Mahalanobis distance, and therefore we are looking for the intersection between the circle and the smallest ellipse.