Skip to content

Instantly share code, notes, and snippets.

@joelkr
Created June 22, 2014 23:55
Show Gist options
  • Save joelkr/25c143d21cf986c7b3ed to your computer and use it in GitHub Desktop.
Save joelkr/25c143d21cf986c7b3ed to your computer and use it in GitHub Desktop.
R Functions for Collaborative Filtering Cardiac Arrythmia Data
# Cost and gradient functions for optim(). These are a port of Coursera
# Machine Learning course topic Collaborative Filtering from MATLAB
# to R.
# Cost Function
coFilterCost <- function (params, npatients, nleads, nreadings,Y, R, lambda)
{
# params: X and Theta flattened to vector
# npatients, nleads, nreadings: number of patients, leads, and numeric
# fields in data
# The values in X and Theta are what we are optimizing, so they need to be
# flattened into a vector of parameters for optim()
# X: estimated lead contributions to each recorded reading
# Numeric data fields X 12 leads + Lead[0] = 13
# Theta: estimated Cardiograph lead readings
# Number of Patients X 12 leads + Lead[0] = 13
# Y: actual patient data
# Number Readings X Patients
# R: matrix with 1's in positions of Y with patient data and 0's elsewhere.
# Numeric Fields X Patients
# lambda: regularization parameter.
# Add l[0] term for y-intercept
nleads <- nleads + 1
sx <- nreadings * nleads
st <- npatients * nleads
X <- matrix(params[1:sx], nreadings, nleads)
Theta <- matrix(params[(sx + 1):(sx + st)], npatients, nleads)
# Must transpose either R and Y or X %*% t(Theta)
S <- R * (X %*% t(Theta) - Y)
J <- sum(S^2)/2.0 # This is mean squared error
# This prevents params growing
J <- J + (lambda/2.0)*(sum(Theta^2) + sum(X^2))
return(J)
}
coFilterGrad <- function(params, npatients, nleads, nreadings, Y, R, lambda) {
# Add l[0] term for y-intercept
nleads <- nleads + 1
sx <- nreadings * nleads
st <- npatients * nleads
X <- matrix(params[1:sx], nreadings, nleads)
Theta <- matrix(params[(sx + 1):(sx + st)], npatients, nleads)
Theta_grad <- matrix(0, npatients, nleads)
X_grad <- matrix(0, nreadings, nleads)
S <- R * (X %*% t(Theta) - Y)
X_grad <- S %*% Theta
Theta_grad <- t(S) %*% X
X_grad <- X_grad + (lambda * X)
Theta_grad <- Theta_grad + (lambda * Theta)
# Flatten everything to a vector as in octave. May be wrong idea.
grad <- c(as.vector(X_grad), as.vector(Theta))
return(grad)
}
normalizeY <- function(Y,R) {
# Averaging a quantity changing through time is doubtful. Some readings
# might possibly be voltages at intervals or something of the sort.
# Dividing by max(row value) seemed to cause optim() to zero all values in
# the Theta matrix. This might be due to linear dependence, since it is
# multiplying all values by a constant, but I am not sure. Subtracting
# the average also causes optim() to zero out the Theta matrix as returned
# in $par, but I should normalize the Y matrix somehow, not quite sure what
# will work.
rc <- dim(Y)
Ynorm <- matrix(0, rc[1], rc[2])
nrm <- vector(length=rc[1])
# Need average of each feature. Y is nreadings X npatients
# so need average across rows
for( i in 1:rc[1]){
idx <- which(R[i,] == 1)
if(length(idx) == 0) {
nrm[i] <- 0
}
else {
nrm[i] <- mean(Y[i,idx])
}
Ynorm[i, idx] <- Y[i, idx] - nrm[i]
}
yl <- list(Ynorm, nrm)
return(yl)
}
restoreY <- function(Ynorm, nrm) {
rc <- dim(Ynorm)
Y <- matrix(0, rc[1], rc[2])
for(i in 1:rc[1]) {
Y[i,] <- Ynorm[i,] + nrm[i]
}
return(Y)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment