Skip to content

Instantly share code, notes, and snippets.

@DavidArenburg
Last active August 29, 2015 14:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save DavidArenburg/cca995ec1709c0d4735d to your computer and use it in GitHub Desktop.
Save DavidArenburg/cca995ec1709c0d4735d to your computer and use it in GitHub Desktop.
Vectorized Var, SD, Cov and Cor functions

These functions are meant to replace *apply loops when working row wise

Load the Gist

library(devtools)
source_gist("cca995ec1709c0d4735d")

Some use cases

set.seed(123)
(M <- matrix(sample(25), ncol = 5))
#      [,1] [,2] [,3] [,4] [,5]
# [1,]    8    1   15   13    5
# [2,]   19   11    7    3   12
# [3,]   10   17    9   21   24
# [4,]   20   23   14   18    4
# [5,]   22   25    2    6   16

set.seed(124)
(M2 <- matrix(sample(25), ncol = 5))
#      [,1] [,2] [,3] [,4] [,5]
# [1,]    3    6   19    1    4
# [2,]   10   23   15   20   25
# [3,]   12   22   24   18    8
# [4,]    9   16   11    7   14
# [5,]    5   21   17   13    2

### -- Var per row -- ###
MatVar(M, 1) # Or RowVar(M)
## [1] 32.8 35.8 43.7 54.2 99.2
apply(M, 1, var)
## [1] 32.8 35.8 43.7 54.2 99.2

### -- Var per column (not very afficient because uses `t` (needs some more thinking) -- ###
MatVar(M, 2)
## [1] 40.2 94.8 28.3 58.7 68.2
apply(M, 2, var)
## [1] 40.2 94.8 28.3 58.7 68.2

### -- SD per row -- ###
RowSD(M)
## [1] 5.727128 5.983310 6.610598 7.362065 9.959920
apply(M, 1, sd)
## [1] 5.727128 5.983310 6.610598 7.362065 9.959920

### -- Covariance between two matrices per row -- ###
RowCov(M, M2)
## [1]  17.70 -14.80 -20.20  -4.15 -11.15
sapply(seq_len(nrow(M)), function(i) cov(M[i, ], M2[i, ]))
## [1]  17.70 -14.80 -20.20  -4.15 -11.15

### -- Correlation between two matrices per row -- ###
RowCor(M, M2)
## [1]  0.4314969 -0.4050100 -0.4545079 -0.1545691 -0.1401550
sapply(seq_len(nrow(M)), function(i) cor(M[i, ], M2[i, ]))
## [1]  0.4314969 -0.4050100 -0.4545079 -0.1545691 -0.1401550
## This is a set of vectorized row wise operation I've gathered from my answers on SO for the last year
#@ Gets a matrix and computes variance by row (dim == 1) or by column (dim == 2)
MatVar <- function(x, dim = 1) {
if(dim == 1){
rowSums((x - rowMeans(x))^2)/(dim(x)[2] - 1)
} else if (dim == 2) {
rowSums((t(x) - colMeans(x))^2)/(dim(x)[1] - 1)
} else stop("Please enter valid dimention")
}
MatSD <- function(x, dim = 1) {
if(dim == 1){
sqrt(rowSums((x - rowMeans(x))^2)/(dim(x)[2] - 1))
} else if (dim == 2) {
sqrt(rowSums((t(x) - colMeans(x))^2)/(dim(x)[1] - 1))
} else stop("Please enter valid dimention")
}
#@ Simplified case of the above. Runs rowwise variance
RowVar <- function(x) {
rowSums((x - rowMeans(x))^2)/(dim(x)[2] - 1)
}
#@ Runs rowwise SD
RowSD <- function(x) {
sqrt(rowSums((x - rowMeans(x))^2)/(dim(x)[2] - 1))
}
#@ Runs rowwise Covariance
RowCov <- function(x, y){
rowSums((x - rowMeans(x))*(y - rowMeans(y)))/(dim(x)[2] - 1)
}
#@ Runs rowwise correlation (using the above functions)
RowCor <- function(A, B) RowCov(A, B)/(RowSD(A) * RowSD(B))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment