-
-
Save darkseed/4c02ca4b9995148b1acb to your computer and use it in GitHub Desktop.
Calculating pearson for Breeze vectors
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import breeze.linalg._ | |
import breeze.stats._ | |
import scala.math.sqrt | |
/** | |
* Effecient for sparse vectors. Scales in O(activeSize) | |
*/ | |
// Must take SparseVector, for implicits to be linked correctly | |
def pearson(a: SparseVector[Double], b: SparseVector[Double]): Double = { | |
if (a.length != b.length) | |
throw new IllegalArgumentException("Vectors not of the same length.") | |
val n = a.length | |
val dot = a.dot(b) | |
val adot = a.dot(a) | |
val bdot = b.dot(b) | |
val amean = mean(a) | |
val bmean = mean(b) | |
// See Wikipedia http://en.wikipedia.org/wiki/Pearson_product-moment_correlation_coefficient#For_a_sample | |
(dot - n * amean * bmean ) / ( sqrt(adot - n * amean * amean) * sqrt(bdot - n * bmean * bmean) ) | |
} | |
/** | |
* Works for all all vectors. Scales in O(length) | |
*/ | |
def pearson(a: Vector[Double], b: Vector[Double]): Double = { | |
// Delegate to efficient method if possible, | |
if (a.isInstanceOf[SparseVector] && b.isInstanceOf[SparseVector]) { | |
return pearson(a.asInstanceOf[SparseVector[Double]], b.asInstanceOf[SparseVector[Double]]) | |
} | |
if (a.length != b.length) | |
throw new IllegalArgumentException("Vectors not of the same length.") | |
val n = a.length | |
val dot = a.dot(b) | |
val adot = a.dot(a) | |
val bdot = b.dot(b) | |
val amean = mean(a) | |
val bmean = mean(b) | |
// See Wikipedia http://en.wikipedia.org/wiki/Pearson_product-moment_correlation_coefficient#For_a_sample | |
(dot - n * amean * bmean ) / ( sqrt(adot - n * amean * amean) * sqrt(bdot - n * bmean * bmean) ) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment