AndreCAndersen/feature_reduction_using_svd.r

## feature_reduction_using_svd.r
hilbert <- function(n) { i <- 1:n; 1 / outer(i - 1, i, "+") }

X_all_size = 50
n = 6 # Original number of features
k = 2 # Reduced number of features
split_ratio = 0.8

#G enerate some dummy data
X_all <- hilbert(X_all_size)[,1:n]

# Split data into training and test sets
idx <- sample(seq_len(nrow(X_all)), size = floor(split_ratio * nrow(X_all)))
X <- X_all[idx, ] #Training set
Z <- X_all[-idx, ] #Test set

# Center the features using the average of features in our training dataset X.
x_bar <- colMeans(X) # Average of each feature
X <- sweep(X, MARGIN=2, x_bar, FUN="-") # Centering features of X using average of X.
Z <- sweep(Z, MARGIN=2, x_bar, FUN="-") # Centering features of Z using average of X.

# It's important to use the average of the X features and not the test set Z.
# You don't have access to Z when doing one-by-one predictions.

# Perform singular value decomposition
s <- svd(X)

# Reduce number of features from n=6 to k=2
U_r <- s$u[,1:k]
D_r <- diag(s$d[1:k])
V_r <- s$v[,1:k]

# Transfroming to a feature reduced version of the training set X and test set Z
X_r <- U_r %*% D_r
Z_r <- Z %*% V_r

# Then comes use of the new feature set for Z and X.
# Y_hat = f(Z_r; X_r) # Outputs predictions

# Finally, you can reransfrom back to approximations of the original training set
# X and test set Z
X_hat <- X_r %*% t(V_r) # == U_r %*% D_r %*% t(V_r)
Z_hat <- Z_r %*% t(V_r)

# This last part isn't nessary.
	hilbert <- function(n) { i <- 1:n; 1 / outer(i - 1, i, "+") }

	X_all_size = 50
	n = 6 # Original number of features
	k = 2 # Reduced number of features
	split_ratio = 0.8

	#G enerate some dummy data
	X_all <- hilbert(X_all_size)[,1:n]

	# Split data into training and test sets
	idx <- sample(seq_len(nrow(X_all)), size = floor(split_ratio * nrow(X_all)))
	X <- X_all[idx, ] #Training set
	Z <- X_all[-idx, ] #Test set

	# Center the features using the average of features in our training dataset X.
	x_bar <- colMeans(X) # Average of each feature
	X <- sweep(X, MARGIN=2, x_bar, FUN="-") # Centering features of X using average of X.
	Z <- sweep(Z, MARGIN=2, x_bar, FUN="-") # Centering features of Z using average of X.

	# It's important to use the average of the X features and not the test set Z.
	# You don't have access to Z when doing one-by-one predictions.

	# Perform singular value decomposition
	s <- svd(X)

	# Reduce number of features from n=6 to k=2
	U_r <- s$u[,1:k]
	D_r <- diag(s$d[1:k])
	V_r <- s$v[,1:k]

	# Transfroming to a feature reduced version of the training set X and test set Z
	X_r <- U_r %*% D_r
	Z_r <- Z %*% V_r

	# Then comes use of the new feature set for Z and X.
	# Y_hat = f(Z_r; X_r) # Outputs predictions

	# Finally, you can reransfrom back to approximations of the original training set
	# X and test set Z
	X_hat <- X_r %% t(V_r) # == U_r %% D_r %*% t(V_r)
	Z_hat <- Z_r %*% t(V_r)

	# This last part isn't nessary.