joelkr/Test.R

## Test.R
# Try setting up a smaller test set

# Variables to set how large a square of data to test
npatients <- 4
nreadings <- 8
# We are still not converging in 500 trials. Perhaps we have too many unknowns.
#nleads <- 10
nleads <- 12
load("cardiacSetup.rda")

# Chop a human friendly size chunk out of the data
Y <- as.matrix(cardiac_num_train[1:npatients, 1:nreadings])
#R <- Rtrain[1:npatients, 1:nreadings]
# Machine learning course seemed to be setting things up like:
# data X source of data, ie, movie ratings X users, for Y. Thought this might
# be the reason that optim() was zeroing parameters in the second matrix Theta, but probably it is
# zeroing linearly dependent columns, maybe.
Y <- t(Y)

# Zero out same data each time
set.seed(4321)
# May be better way to do this, more like MATLAB
#y <- as.vector(Ynorm)
y <- as.vector(Y)
y[runif(length(y)) > 0.5] <- 0
Yn <- matrix(y, nreadings, npatients)

R <- matrix(0, nreadings, npatients)
R[Yn != 0] <- 1

yl <- normalizeY(Yn, R)

# X and Theta are actually initialized with random data.
# Each reading recorded was created by some mixture of the various leads
X <- matrix(rnorm(nreadings*nleads), nrow=nreadings, ncol=nleads, byrow=T)

# Each patient had a particular set of lead readings
Theta <- matrix(rnorm(npatients*nleads), nrow=npatients, ncol=nleads, byrow=T)
# Must add y intercept parameter to X and Theta
X <- cbind(1, X)
Theta <- cbind(1, Theta)

source("CollaborativeFiltering.R")

params <- c(as.vector(X), as.vector(Theta))

# This seems to run, but it gives me a 1 on convergence rather than a 0.
#p <- optim(par=params, fn=coFilterCost, gr=coFilterGrad, npatients=npatients, nleads=nleads, nreadings=nreadings, Y=Y, R=R, lambda=0)

# Using method="CG"

#p <- optim(par=params, fn=coFilterCost, gr=coFilterGrad, method="CG", npatients=npatients, nleads=nleads, nreadings=nreadings, Y=Y, R=R, lambda=0)

# Using method="BFGS". Seems to converge more reliably.
# This is normalized by subtracting the row mean from each feature. This causes optim() to zero out all values in the Theta
# matrix as returned in pn$par so nothing further can be done.
pn <- optim(par=params, fn=coFilterCost, gr=coFilterGrad, method="BFGS", npatients=npatients, nleads=nleads, nreadings=nreadings, Y=yl[[1]], R=R, lambda=0)
p <- optim(par=params, fn=coFilterCost, gr=coFilterGrad, method="BFGS", npatients=npatients, nleads=nleads, nreadings=nreadings, Y=Yn, R=R, lambda=0)

# convergence 1 means that iteration limit has been reached without
# convergence.
sx <- nreadings * (nleads+1)
st <- npatients * (nleads+1)
Xp <- matrix(p$par[1:sx], nreadings, (nleads+1))
Thetap <- matrix(p$par[(sx + 1):(sx + st)], npatients, (nleads+1))

Yp <- Xp %*% t(Thetap)
cat("Convergence:\n")
cat(p$convergence)
cat("\n")
cat("MSE:\n")
cat(sum((Yp - Y)^2)/(nreadings*nleads))
cat("\n")
	# Try setting up a smaller test set

	# Variables to set how large a square of data to test
	npatients <- 4
	nreadings <- 8
	# We are still not converging in 500 trials. Perhaps we have too many unknowns.
	#nleads <- 10
	nleads <- 12
	load("cardiacSetup.rda")

	# Chop a human friendly size chunk out of the data
	Y <- as.matrix(cardiac_num_train[1:npatients, 1:nreadings])
	#R <- Rtrain[1:npatients, 1:nreadings]
	# Machine learning course seemed to be setting things up like:
	# data X source of data, ie, movie ratings X users, for Y. Thought this might
	# be the reason that optim() was zeroing parameters in the second matrix Theta, but probably it is
	# zeroing linearly dependent columns, maybe.
	Y <- t(Y)

	# Zero out same data each time
	set.seed(4321)
	# May be better way to do this, more like MATLAB
	#y <- as.vector(Ynorm)
	y <- as.vector(Y)
	y[runif(length(y)) > 0.5] <- 0
	Yn <- matrix(y, nreadings, npatients)

	R <- matrix(0, nreadings, npatients)
	R[Yn != 0] <- 1

	yl <- normalizeY(Yn, R)

	# X and Theta are actually initialized with random data.
	# Each reading recorded was created by some mixture of the various leads
	X <- matrix(rnorm(nreadings*nleads), nrow=nreadings, ncol=nleads, byrow=T)

	# Each patient had a particular set of lead readings
	Theta <- matrix(rnorm(npatients*nleads), nrow=npatients, ncol=nleads, byrow=T)
	# Must add y intercept parameter to X and Theta
	X <- cbind(1, X)
	Theta <- cbind(1, Theta)

	source("CollaborativeFiltering.R")

	params <- c(as.vector(X), as.vector(Theta))

	# This seems to run, but it gives me a 1 on convergence rather than a 0.
	#p <- optim(par=params, fn=coFilterCost, gr=coFilterGrad, npatients=npatients, nleads=nleads, nreadings=nreadings, Y=Y, R=R, lambda=0)

	# Using method="CG"

	#p <- optim(par=params, fn=coFilterCost, gr=coFilterGrad, method="CG", npatients=npatients, nleads=nleads, nreadings=nreadings, Y=Y, R=R, lambda=0)

	# Using method="BFGS". Seems to converge more reliably.
	# This is normalized by subtracting the row mean from each feature. This causes optim() to zero out all values in the Theta
	# matrix as returned in pn$par so nothing further can be done.
	pn <- optim(par=params, fn=coFilterCost, gr=coFilterGrad, method="BFGS", npatients=npatients, nleads=nleads, nreadings=nreadings, Y=yl[[1]], R=R, lambda=0)
	p <- optim(par=params, fn=coFilterCost, gr=coFilterGrad, method="BFGS", npatients=npatients, nleads=nleads, nreadings=nreadings, Y=Yn, R=R, lambda=0)

	# convergence 1 means that iteration limit has been reached without
	# convergence.
	sx <- nreadings * (nleads+1)
	st <- npatients * (nleads+1)
	Xp <- matrix(p$par[1:sx], nreadings, (nleads+1))
	Thetap <- matrix(p$par[(sx + 1):(sx + st)], npatients, (nleads+1))

	Yp <- Xp %*% t(Thetap)
	cat("Convergence:\n")
	cat(p$convergence)
	cat("\n")
	cat("MSE:\n")
	cat(sum((Yp - Y)^2)/(nreadings*nleads))
	cat("\n")