joelkr/CollaborativeFiltering.R

## CollaborativeFiltering.R
# Cost and gradient functions for optim(). These are a port of Coursera
# Machine Learning course topic Collaborative Filtering from MATLAB
# to R.

# Cost Function
coFilterCost <- function (params, npatients, nleads, nreadings,Y, R, lambda)
{
  # params: X and Theta flattened to vector
  # npatients, nleads, nreadings: number of patients, leads, and numeric
  # fields in data
  # The values in X and Theta are what we are optimizing, so they need to be
  # flattened into a vector of parameters for optim()
  # X: estimated lead contributions to each recorded reading
  #    Numeric data fields X 12 leads + Lead[0] = 13
  # Theta: estimated Cardiograph lead readings
  #    Number of Patients X 12 leads + Lead[0] = 13
  # Y: actual patient data
  #    Number Readings X Patients
  # R: matrix with 1's in positions of Y with patient data and 0's elsewhere.
  #    Numeric Fields X Patients
  # lambda: regularization parameter.
  # Add l[0] term for y-intercept
  nleads <- nleads + 1
  sx <- nreadings * nleads
  st <- npatients * nleads
  X <- matrix(params[1:sx], nreadings, nleads)
  Theta <- matrix(params[(sx + 1):(sx + st)], npatients, nleads)
  # Must transpose either R and Y or X %*% t(Theta)
  S <- R * (X %*% t(Theta) - Y)
  J <- sum(S^2)/2.0 # This is mean squared error
   # This prevents params growing
   J <- J + (lambda/2.0)*(sum(Theta^2) + sum(X^2))
  return(J)
}

coFilterGrad <- function(params, npatients, nleads, nreadings, Y, R, lambda) {
  # Add l[0] term for y-intercept
  nleads <- nleads + 1
  sx <- nreadings * nleads
  st <- npatients * nleads
  X <- matrix(params[1:sx], nreadings, nleads)
  Theta <- matrix(params[(sx + 1):(sx + st)], npatients, nleads)

  Theta_grad <- matrix(0, npatients, nleads)
  X_grad <- matrix(0, nreadings, nleads)
  S <- R * (X %*% t(Theta) - Y)
  X_grad <- S %*% Theta
  Theta_grad <- t(S) %*% X

  X_grad <- X_grad + (lambda * X)
  Theta_grad <- Theta_grad + (lambda * Theta)

  # Flatten everything to a vector as in octave. May be wrong idea.
  grad <- c(as.vector(X_grad), as.vector(Theta))
  return(grad)
}

normalizeY <- function(Y,R)  {
  # Averaging a quantity changing through time is doubtful. Some readings
  # might possibly be voltages at intervals or something of the sort.
  # Dividing by max(row value) seemed to cause optim() to zero all values in
  # the Theta matrix. This might be due to linear dependence, since it is
  # multiplying all values by a constant, but I am not sure. Subtracting
  # the average also causes optim() to zero out the Theta matrix as returned
  # in $par, but I should normalize the Y matrix somehow, not quite sure what
  # will work.
  rc <- dim(Y)
  Ynorm <- matrix(0, rc[1], rc[2])
  nrm <- vector(length=rc[1])
  # Need average of each feature. Y is nreadings X npatients
  # so need average across rows
  for( i in 1:rc[1]){
    idx <- which(R[i,] == 1)
    if(length(idx) == 0) {
      nrm[i] <- 0
    }
    else {
      nrm[i] <- mean(Y[i,idx])
    }
    Ynorm[i, idx] <- Y[i, idx] - nrm[i]
  }
  yl <- list(Ynorm, nrm)
  return(yl)
}

restoreY <- function(Ynorm, nrm) {
  rc <- dim(Ynorm)
  Y <- matrix(0, rc[1], rc[2])
  for(i in 1:rc[1]) {
    Y[i,] <- Ynorm[i,] + nrm[i]
  }
  return(Y)
}
	# Cost and gradient functions for optim(). These are a port of Coursera
	# Machine Learning course topic Collaborative Filtering from MATLAB
	# to R.

	# Cost Function
	coFilterCost <- function (params, npatients, nleads, nreadings,Y, R, lambda)
	{
	# params: X and Theta flattened to vector
	# npatients, nleads, nreadings: number of patients, leads, and numeric
	# fields in data
	# The values in X and Theta are what we are optimizing, so they need to be
	# flattened into a vector of parameters for optim()
	# X: estimated lead contributions to each recorded reading
	# Numeric data fields X 12 leads + Lead[0] = 13
	# Theta: estimated Cardiograph lead readings
	# Number of Patients X 12 leads + Lead[0] = 13
	# Y: actual patient data
	# Number Readings X Patients
	# R: matrix with 1's in positions of Y with patient data and 0's elsewhere.
	# Numeric Fields X Patients
	# lambda: regularization parameter.
	# Add l[0] term for y-intercept
	nleads <- nleads + 1
	sx <- nreadings * nleads
	st <- npatients * nleads
	X <- matrix(params[1:sx], nreadings, nleads)
	Theta <- matrix(params[(sx + 1):(sx + st)], npatients, nleads)
	# Must transpose either R and Y or X %*% t(Theta)
	S <- R * (X %*% t(Theta) - Y)
	J <- sum(S^2)/2.0 # This is mean squared error
	# This prevents params growing
	J <- J + (lambda/2.0)*(sum(Theta^2) + sum(X^2))
	return(J)
	}

	coFilterGrad <- function(params, npatients, nleads, nreadings, Y, R, lambda) {
	# Add l[0] term for y-intercept
	nleads <- nleads + 1
	sx <- nreadings * nleads
	st <- npatients * nleads
	X <- matrix(params[1:sx], nreadings, nleads)
	Theta <- matrix(params[(sx + 1):(sx + st)], npatients, nleads)

	Theta_grad <- matrix(0, npatients, nleads)
	X_grad <- matrix(0, nreadings, nleads)
	S <- R * (X %*% t(Theta) - Y)
	X_grad <- S %*% Theta
	Theta_grad <- t(S) %*% X

	X_grad <- X_grad + (lambda * X)
	Theta_grad <- Theta_grad + (lambda * Theta)

	# Flatten everything to a vector as in octave. May be wrong idea.
	grad <- c(as.vector(X_grad), as.vector(Theta))
	return(grad)
	}

	normalizeY <- function(Y,R) {
	# Averaging a quantity changing through time is doubtful. Some readings
	# might possibly be voltages at intervals or something of the sort.
	# Dividing by max(row value) seemed to cause optim() to zero all values in
	# the Theta matrix. This might be due to linear dependence, since it is
	# multiplying all values by a constant, but I am not sure. Subtracting
	# the average also causes optim() to zero out the Theta matrix as returned
	# in $par, but I should normalize the Y matrix somehow, not quite sure what
	# will work.
	rc <- dim(Y)
	Ynorm <- matrix(0, rc[1], rc[2])
	nrm <- vector(length=rc[1])
	# Need average of each feature. Y is nreadings X npatients
	# so need average across rows
	for( i in 1:rc[1]){
	idx <- which(R[i,] == 1)
	if(length(idx) == 0) {
	nrm[i] <- 0
	}
	else {
	nrm[i] <- mean(Y[i,idx])
	}
	Ynorm[i, idx] <- Y[i, idx] - nrm[i]
	}
	yl <- list(Ynorm, nrm)
	return(yl)
	}

	restoreY <- function(Ynorm, nrm) {
	rc <- dim(Ynorm)
	Y <- matrix(0, rc[1], rc[2])
	for(i in 1:rc[1]) {
	Y[i,] <- Ynorm[i,] + nrm[i]
	}
	return(Y)
	}