Skip to content

Instantly share code, notes, and snippets.

@MartinBodocky
Last active October 4, 2016 01:21
Show Gist options
  • Save MartinBodocky/90a18cc4cba0c575f7aa to your computer and use it in GitHub Desktop.
Save MartinBodocky/90a18cc4cba0c575f7aa to your computer and use it in GitHub Desktop.
PCA with Accord.Net and FSharp.Charting
// Inspired by http://arxiv.org/abs/1210.7463
// reference accord framework
#r "../packages/Accord.3.0.2/lib/net45/Accord.dll"
#r "../packages/Accord.Controls.3.0.2/lib/net45/Accord.Controls.dll"
#r "../packages/Accord.IO.3.0.2/lib/net45/Accord.IO.dll"
#r "../packages/Accord.Math.3.0.2/lib/net45/Accord.Math.dll"
#r "../packages/Accord.Statistics.3.0.2/lib/net45/Accord.Statistics.dll"
//reference deelde with fsharp charting
#r "../packages/Deedle.1.2.4/lib/net40/Deedle.dll"
#r "../packages/FSharp.Charting.0.90.12/lib/net40/FSharp.Charting.dll"
#I "../packages/FSharp.Charting.0.90.12"
#load "FSharp.Charting.fsx"
open System
open Accord
open Accord.Controls
open Accord.Math
open Accord.Math.Comparers
open Accord.Math.Decompositions
open Accord.Statistics
open Accord.Statistics.Analysis
open FSharp.Charting
let X : double array = [| 1.0; 2.0; 4.0; 6.0; 12.0; 15.0; 25.0; 45.0; 68.0; 67.0; 65.0; 98.0 |]
let xSum = X.Sum() / (float X.Length)
let xMean = X.Mean()
let x1 : double array = [| 0.0; 8.0; 12.0; 20.0 |]
let x2 : double array = [| 8.0; 9.0; 11.0; 12.0 |]
let mean1 = x1.Mean()
let mean2 = x2.Mean()
let stdDev1 = x1.StandardDeviation()
let stdDev2 = x2.StandardDeviation()
printfn "%A - %G" mean1 stdDev1
printfn "%A - %G" mean2 stdDev2
// covariance measure of relationship between two variables
let cov = x1.Covariance(x2)
let data : double [] [] =
[| [| 9.0; 39.0 |]
[| 15.0; 56.0 |]
[| 25.0; 93.0 |]
[| 14.0; 61.0 |]
[| 10.0; 50.0 |]
[| 18.0; 75.0 |]
[| 0.0; 32.0 |]
[| 16.0; 85.0 |]
[| 5.0; 42.0 |]
[| 19.0; 70.0 |]
[| 16.0; 66.0 |]
[| 20.0; 80.0 |] |]
let convarianceMatrix = data.Covariance()
printfn "Convariance matrix %A" (convarianceMatrix.ToString(" 000.00"))
Chart.Point(data |> Array.map (fun arr -> (arr.[0], arr.[1])))
// playing with matrixes
let A : double [] [] =
[| [| 2.0; 3.0 |]
[| 2.0; 1.0 |] |]
// create vector
let u : double [] = [| 1.0; 3.0 |]
// Multiply them
let Au = A.Multiply(u)
//Eigenvectors, Eigenvalues and the Eigendecomposition
let M =
array2D [| [| 3.0; 2.0; 4.0 |]
[| 2.0; 0.0; 2.0 |]
[| 4.0; 2.0; 3.0 |] |]
// create an Eigenvale composition
let evd = new EigenvalueDecomposition(M)
// store the eigenvalues and eigenvectors
let delta = evd.RealEigenvalues
let V = evd.Eigenvectors
// Reconstruct
let R = V.MultiplyByDiagonal(delta).MultiplyByTranspose(V)
printfn "Delta %A , Vector %A" delta V
printfn "%A" R
(* PCA is just another name for projecting the data into the first
orthogonal vectors obtained by Eigendecomposing its covariance matrix *)
let data2 =
[| [| 2.5; 2.4 |]
[| 0.5; 0.7 |]
[| 2.2; 2.9 |]
[| 1.9; 2.2 |]
[| 3.1; 3.0 |]
[| 2.3; 2.7 |]
[| 2.0; 1.6 |]
[| 1.0; 1.1 |]
[| 1.5; 1.6 |]
[| 1.1; 0.9 |] |]
Chart.Point(data2 |> Array.map (fun arr -> (arr.[0], arr.[1])))
// Substract mean
let mean = data2.Mean()
let dataAdjust = array2D(data2.Subtract(mean))
// calculate the covariance matrix
let covAdjustData = dataAdjust.Covariance()
// calculate Eigenvectors pf covariance matrix
let evdAdjustData = new EigenvalueDecomposition(covAdjustData)
let eigenValues = evdAdjustData.RealEigenvalues;
let eigenVectors = evdAdjustData.Eigenvectors;
// Sort eigenvalues and values in descending order
let eigenVectorsSorted = Matrix.Sort(eigenValues, eigenVectors,new GeneralComparer(ComparerDirection.Descending, true))
(* corresponding pairs are defined that each index from eigen values
corresponding to index column/vector from eigen vectors *)
// multiply adjust data with eigen vectors
let finalData = dataAdjust.Multiply(eigenVectors)
printfn "%A" (finalData.ToString(" 0.0000000000; -0.0000000000;"))
(*
SVD - Singular Value Decomposition
*)
let dataSVD =
[| [| 2.5; 2.4 |]
[| 0.5; 0.7 |]
[| 2.2; 2.9 |]
[| 1.9; 2.2 |]
[| 3.1; 3.0 |]
[| 2.3; 2.7 |]
[| 2.0; 1.6 |]
[| 1.0; 1.1 |]
[| 1.5; 1.6 |]
[| 1.1; 0.9 |] |]
Chart.Point(dataSVD |> Array.map (fun arr -> (arr.[0], arr.[1])))
// Substract mean
let meanSVD = dataSVD.Mean()
let dataAdjustSVD = array2D(dataSVD.Subtract(meanSVD))
// We are skipping calculation of covariance matrix and execute SVD
let svd = new SingularValueDecomposition(dataAdjustSVD)
let singularValues = svd.Diagonal
let svdEigenVectors = svd.RightSingularVectors
// Calculate the eigen values as the square of the singular values
let svdEigenValuesTempStep = singularValues.ElementwisePower(2.0)
// because of SVD we need divide eigen values by n - 1 to get the same as from EigenValueDecomposition
let svdEigenValues = svdEigenValuesTempStep.Divide(float (dataSVD.GetLength(0)) - 1.0)
printfn "Feature vector %A" (svdEigenVectors.ToString(" +0.0000000000; -0.0000000000;"))
// create final data for SVD
let svdFinalData = dataAdjustSVD.Multiply(svdEigenVectors)
printfn "%A" (svdFinalData.ToString(" 0.0000000000; -0.0000000000;"))
(* Now we can see how to use built-in PCA fuctionality at once *)
let dataPCA =
[| [| 2.5; 2.4 |]
[| 0.5; 0.7 |]
[| 2.2; 2.9 |]
[| 1.9; 2.2 |]
[| 3.1; 3.0 |]
[| 2.3; 2.7 |]
[| 2.0; 1.6 |]
[| 1.0; 1.1 |]
[| 1.5; 1.6 |]
[| 1.1; 0.9 |] |]
let pca = new PrincipalComponentAnalysis(dataPCA)
// Also we can set to use the analysis by correlation, which is more indicated when analysing data with high different measurement units
pca.Method = AnalysisMethod.Standardize
// and just compute
pca.Compute();
// transform data
let pcaFinalData = pca.Transform(dataPCA)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment