Created
March 27, 2017 09:52
-
-
Save AndreCAndersen/4e3e6116e07678d1cfdb7603d6220987 to your computer and use it in GitHub Desktop.
Practical Feature Reduction Using SVD in R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
hilbert <- function(n) { i <- 1:n; 1 / outer(i - 1, i, "+") } | |
X_all_size = 50 | |
n = 6 # Original number of features | |
k = 2 # Reduced number of features | |
split_ratio = 0.8 | |
#G enerate some dummy data | |
X_all <- hilbert(X_all_size)[,1:n] | |
# Split data into training and test sets | |
idx <- sample(seq_len(nrow(X_all)), size = floor(split_ratio * nrow(X_all))) | |
X <- X_all[idx, ] #Training set | |
Z <- X_all[-idx, ] #Test set | |
# Center the features using the average of features in our training dataset X. | |
x_bar <- colMeans(X) # Average of each feature | |
X <- sweep(X, MARGIN=2, x_bar, FUN="-") # Centering features of X using average of X. | |
Z <- sweep(Z, MARGIN=2, x_bar, FUN="-") # Centering features of Z using average of X. | |
# It's important to use the average of the X features and not the test set Z. | |
# You don't have access to Z when doing one-by-one predictions. | |
# Perform singular value decomposition | |
s <- svd(X) | |
# Reduce number of features from n=6 to k=2 | |
U_r <- s$u[,1:k] | |
D_r <- diag(s$d[1:k]) | |
V_r <- s$v[,1:k] | |
# Transfroming to a feature reduced version of the training set X and test set Z | |
X_r <- U_r %*% D_r | |
Z_r <- Z %*% V_r | |
# Then comes use of the new feature set for Z and X. | |
# Y_hat = f(Z_r; X_r) # Outputs predictions | |
# Finally, you can reransfrom back to approximations of the original training set | |
# X and test set Z | |
X_hat <- X_r %*% t(V_r) # == U_r %*% D_r %*% t(V_r) | |
Z_hat <- Z_r %*% t(V_r) | |
# This last part isn't nessary. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment