Skip to content

Instantly share code, notes, and snippets.

@tyrcho
Last active November 25, 2020 21:37
Show Gist options
  • Save tyrcho/5884241 to your computer and use it in GitHub Desktop.
Save tyrcho/5884241 to your computer and use it in GitHub Desktop.
Principal Component Analysis with Breeze (Scala)
// For an unkown reason, this works when copy/paste in ammonite but not with `amm pca.sc`
import $ivy.`org.scalanlp::breeze-natives:0.13.2`
import $ivy.`org.scalanlp::breeze-viz:0.13.2`
import $ivy.`org.scalanlp::breeze:0.13.2`
import breeze.linalg._
import breeze.linalg.svd._
import breeze.plot._
import scala.util.Random._
val dimensions = 50
val values = 200
def mean(v: Vector[Double]): Double = v.valuesIterator.sum / v.size
def zeroMean(m: DenseMatrix[Double]): DenseMatrix[Double] = {
val copy = m.copy
for (c <- 0 until m.cols) {
val col = copy(::, c)
val colMean = mean(col)
col -= colMean
}
// println("data \n" + m)
// println("mean \n" + copy)
copy
}
def pca(data: DenseMatrix[Double], components: Int): DenseMatrix[Double] = {
val d = zeroMean(data)
val SVD(_, _, v) = svd(d.t)
val model = v(0 until components, ::) //top 'components' eigenvectors
val filter = model.t * model
filter * d
}
//val data= csvread(new File("data.csv"), skipLines = 1).t
// val data = DenseMatrix(
// (2.0, 4.0, 5.1),
// (1.0, 2.5, 3.5),
// (8.0, 3.0, 6.4),
// (8.0, 5.0, 6.5),
// (4.3, 4.5, 6.4))
def generateData: DenseMatrix[Double] = {
val data = DenseMatrix.zeros[Double](values, dimensions)
for (d <- 0 until dimensions) {
val c1 = 2 + 2 * (nextDouble - 0.5)
val r1 = nextDouble * 2
val c2 = -2 + 2 * (nextDouble - 0.5)
val r2 = nextDouble * 2
for (v <- 0 until values / 2) {
val vv = c1 + r1 * nextDouble
// println(f"a:$a b:$b d:$d v:$v vv:$vv%2f")
data.update(v, d, vv)
}
for (v <- values / 2 until values) {
val vv = c2 + r2 * nextDouble
// println(f"a:$a b:$b d:$d v:$v vv:$vv%2f")
data.update(v, d, vv)
}
}
data
}
val data = generateData
val pcaRes = pca(data, 2)
// or better since https://github.com/scalanlp/breeze/commit/913e1229cb3572b43062ee1f756858ac793bb8b8#diff-64378cd6c871f77715faf60c46568a8e
// val pca = princomp(data)
// val pcaRes = pca.scores
println("result pca \n" + pcaRes)
val f1 = Figure("data")
val f2 = Figure("pca")
f1.subplot(0) += scatter(data(::, 0), data(::, 3), { _ => 0.1 })
f2.subplot(0) += scatter(pcaRes(::, 0), pcaRes(::, 1), { _ => 0.1 })
@frgomes
Copy link

frgomes commented Jan 25, 2018

As per Breeze 1.0-RC2, line 22 should be:

val SVD(_, _, v) = svd(d.t)

@tyrcho
Copy link
Author

tyrcho commented Jul 27, 2019

@frgomes thanks ! updated with ammonite

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment