Skip to content

Instantly share code, notes, and snippets.

@darkseed
Forked from tbertelsen/pearson.scala
Last active September 18, 2015 09:42
Show Gist options
  • Save darkseed/4c02ca4b9995148b1acb to your computer and use it in GitHub Desktop.
Save darkseed/4c02ca4b9995148b1acb to your computer and use it in GitHub Desktop.
Calculating pearson for Breeze vectors
import breeze.linalg._
import breeze.stats._
import scala.math.sqrt
/**
* Effecient for sparse vectors. Scales in O(activeSize)
*/
// Must take SparseVector, for implicits to be linked correctly
def pearson(a: SparseVector[Double], b: SparseVector[Double]): Double = {
if (a.length != b.length)
throw new IllegalArgumentException("Vectors not of the same length.")
val n = a.length
val dot = a.dot(b)
val adot = a.dot(a)
val bdot = b.dot(b)
val amean = mean(a)
val bmean = mean(b)
// See Wikipedia http://en.wikipedia.org/wiki/Pearson_product-moment_correlation_coefficient#For_a_sample
(dot - n * amean * bmean ) / ( sqrt(adot - n * amean * amean) * sqrt(bdot - n * bmean * bmean) )
}
/**
* Works for all all vectors. Scales in O(length)
*/
def pearson(a: Vector[Double], b: Vector[Double]): Double = {
// Delegate to efficient method if possible,
if (a.isInstanceOf[SparseVector] && b.isInstanceOf[SparseVector]) {
return pearson(a.asInstanceOf[SparseVector[Double]], b.asInstanceOf[SparseVector[Double]])
}
if (a.length != b.length)
throw new IllegalArgumentException("Vectors not of the same length.")
val n = a.length
val dot = a.dot(b)
val adot = a.dot(a)
val bdot = b.dot(b)
val amean = mean(a)
val bmean = mean(b)
// See Wikipedia http://en.wikipedia.org/wiki/Pearson_product-moment_correlation_coefficient#For_a_sample
(dot - n * amean * bmean ) / ( sqrt(adot - n * amean * amean) * sqrt(bdot - n * bmean * bmean) )
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment