Skip to content

Instantly share code, notes, and snippets.

@sudipto80
Created January 22, 2016 19:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sudipto80/3b99f6bbe9b21b76386d to your computer and use it in GitHub Desktop.
Save sudipto80/3b99f6bbe9b21b76386d to your computer and use it in GitHub Desktop.
Linear Regression
#load "...\packages\MathNet.Numerics.FSharp.3.10.0\MathNet.Numerics.fsx"
open MathNet.Numerics.LinearAlgebra
open System.IO
let velocities = vector[23.;4.;5.;2.]
//let y = matrix [[1.;3.]
// [1.;5.]
// [1.;4.]]
//Loading values of the csv file and generating a dense matrix
//Please modify the file path to point it in your local disc
let rows = File.ReadAllLines("C:\\mpg.csv")
|> Array.map ( fun t -> t.Split(',')
|> Array.map(fun t -> float t))
let mpgData = DenseMatrix.ofRowArrays rows
//let myMat = matrix [[1. ;2.; 3.]
// [4. ;5.; 2.]
// [7. ;0.8; 9.]]
//
//let myMat' = myMat.Inverse()
//let myMat = matrix [[1.;2.;3.]
// [4.;5.;2.]
// [7.;0.8;9.]]
//
//let myMat' = myMat.Inverse()
//let myMat = matrix [[1. ;2.; 3.]
// [4. ;5.; 2.]
// [7. ;0.8; 9.]]
//
//let myMatTrace = myMat.Trace()
let myMat = matrix [[1.0;2.0;3.0]
[4.0;5.0;2.0]
[7.0;0.8;9.0]]
let qr = myMat.QR()
let svdR = myMat.Svd(true)
//Gets the singular values of matrix in ascending value.
let s= svdR.S
//Gets the transpose right singular vectors
//(transpose of V, an n-by-n unitary matrix)
let v' = svdR.VT
//Gets the left singular vectors (U - m-by-m unitary matrix)
let u = svdR.U
//Returns the singular values as a diagonal Matrix<T>.
let w = svdR.W
//Generating the original matrix again
let myMatSVD = u*w*v'
printfn "%A" myMatSVD
////Represent the number of Disk-I/Os
//let x = [14;16;27;42;39;50;83]
////Represent the time processor takes
//let y = [02;05;07;09;10;13;20]
//Locate the file "FsPlotBootstrap.fsx" and provide that path here
#load "..\packages\FsPlot.0.6.6\FsPlotBootstrap.fsx"
open System
open FsPlot.Data
open FsPlot.Highcharts.Charting
let x = [14;16;27;42;39;50;83]
let y = [02;05;07;09;10;13;20]
let y' = [3;4;5;7;23;21;34]
//Here you shall be using the values of b0 and b1 calculated before
//let b0 = -0.00828236493374135
//let b1 = 0.243756371049949
let regressionPairs = x |> List.map ( fun xElem -> (xElem, b0 + b1* float xElem ))
let pairs = List.zip x y
let scatter = Series.Scatter pairs
let regressionLine = Series.Line regressionPairs
let chart =
[scatter;regressionLine]
|> Chart.Combine
|> Chart.WithNames ["Actual data"; "Regression Line"]
|> Chart.WithTitle "Processor Time and Disk I/O"
|> Chart.WithLegend true
open MathNet.Numerics.LinearRegression
open MathNet.Numerics.Fit
open MathNet.Numerics.LinearAlgebra
let xV = [|14.;16.;27.;42.;39.;50.;83.|]
let yV = [|02.;05.;07.;09.;10.;13.;20.|]
let (b0,b1) = SimpleRegression.Fit(xV,yV)
let genRandomTemps count =
let rnd = System.Random()
List.init count (fun _ -> rnd.Next (40,100))
let temps = genRandomTemps 50
let t_d = 19
let RH_Formula = temps |> List.map ( fun t -> float ((100 - 5 * ( t - t_d))))
|> List.toArray
let temp_Array = temps|>List.map ( fun t -> float t)
|>List.toArray
let from_Formula = Array.zip temp_Array RH_Formula
let (rhB0, rhB1) = SimpleRegression.Fit from_Formula
let regressionPairs = temp_Array |> Array.map ( fun t -> (t, rhB0 + rhB1* t ))
let formulaSpots = Series.Scatter from_Formula
let regressionLine = Series.Line regressionPairs
let chart =
[formulaSpots;regressionLine]
|> Chart.Combine
|> Chart.WithNames ["Actual data"; "Regression Line"]
|> Chart.WithTitle "Predicting Relative Humidity"
|> Chart.WithLegend true
//
//Here we have the predictor variables
let X = matrix[[1. ;2.; 3.]
[4. ;5.; 2.]
[7. ;0.8; 9.]]
//This is the new set of predictor values for an observation
let X_unseen = vector [4.;5.;1.89]
//These are the values for the set of observations seen
let Y = matrix[[3.]
[4.]
[5.]]
//Calculating theta from the above formula
let theta = ((X.Transpose() * X).Inverse() * X.Transpose()) * Y
//Calculating the prediction for the new Y for the new set of
//predictor values.
let predicted_Y = theta.Transpose() * X_unseen
//
let rows = File.ReadAllLines("C:\\mpgdata.csv")
|> Array.map ( fun t -> t.Split(',')|> Array.toSeq |> Seq.take 6
|> Seq.toArray
|> Array.map(fun t -> float t))
|> Array.toSeq
|> Seq.take 350
|> Seq.toArray
//A matrix is created with all the numeric columns and 350 rows
let created1 = DenseMatrix.ofRowArrays rows
//Values for the predicted variable is extracted.
let milesPerGallon = created1.Column(0)
//After extracting the predicted column let's remove it
//to get the matrix to calculate the theta
let created2 = created1.RemoveColumn(0)
//Storing predicted values in another variable
let Y_MPG = milesPerGallon
//Calculating Theta as per the given formula
let Theta_MPG = (created2.Transpose() * created2).Inverse()
* created2.Transpose() * milesPerGallon
//Details on an unknown car
let unknownCarDetails = vector [4.;140.;90.;2264.;15.5]
//Calculating the predicted mpg value of the new unknown car
let predictedMPG = Theta_MPG * unknownCarDetails
//
let m = matrix[[2.;3.;4.;5.]
[4.;55.;2.;4.]
[3.;4.;2.;3.]
[2.;4.;2.;1.]]
let x_n = vector[1.;2.2;31.;4.1]
//the weight matrix. The following line has to be in a single line
let W = DiagonalMatrix.ofDiagArray[|for i in 0 .. 3 -> (m.Row i).Subtract(x_n).L2Norm()|]
//
//Experiment with several values of tau
let tau = 1.
//the weight matrix
let W = DiagonalMatrix.ofDiagArray[|for i in 0 .. 3 -> (m.Row i).Subtract(x_n).L2Norm() / (2.*tau**2.)|]
//
let unknownCarDetails = vector [4.;140.;90.;2264.;15.5]
let values = [|for i in 0 .. 349 -> (created2.Row i).Subtract(unknownCarDetails).L2Norm() / (2.*tau**2.)|]
let Weights_MPG = DiagonalMatrix.ofDiagArray values
let Theta_MPG = (created2.Transpose() * Weights_MPG * created2).Inverse()
* created2.Transpose() * Weights_MPG * milesPerGallon
let predictedMPG = Theta_MPG * unknownCarDetails
//---
let mpgPairs = [|for i in 0 .. 349 -> (i, milesPerGallon.At(i))|]
let predictedMPGPairs = [|for i in 0 .. 349 -> (i, Theta_MPG* created2.Row(i))|]
let scatterMPG = Series.Scatter mpgPairs
let linearRegSpline = Series.Line predictedMPGPairs
let chartMPG =
[scatterMPG;linearRegSpline]
|> Chart.Combine
|> Chart.WithNames ["Actual data"; "Multiple Regression Line"]
|> Chart.WithTitle "Miles per gallon prediction using Multiple Linear Regression"
|> Chart.WithLegend true
let mpgResiduals = [|for i in 0 .. 349 ->(milesPerGallon.At(i),
Theta_MPG* created2.Row(i),
milesPerGallon.At(i)-Theta_MPG* created2.Row(i))|]
|> Seq.ofArray
|> Seq.take 5
|> Seq.toArray
////
let mpgResidualPairs = Series.Column [|for i in 0 .. 349 -> (i,abs ( milesPerGallon.At(i) - Theta_MPG* created2.Row(i)))|]
let predictedMPGPairs = Series.Line [|for i in 0 .. 349 -> (i, Theta_MPG* created2.Row(i))|]
let actualMPGRecords = Series.Spline [|for i in 0 .. 349 -> (i, milesPerGallon.At(i))|]
let scatterMPG = Series.Scatter mpgPairs
let chartMPGResidue =
[mpgResidualPairs;scatterMPG;predictedMPGPairs]
|> Chart.Combine
|> Chart.WithNames ["Residuals";"Actual data"; "Multiple Regression Line"]
|> Chart.WithTitle "Miles per gallon prediction using Multiple Linear Regression"
|> Chart.WithLegend true
//
//Let's say we have details about several houses
//With "bedrooms","Area","Bathroom" count as listed below
let houseDetails = matrix[[3.5;4000.;3.]
[5.;4542.;3.]
[3.;2545.;4.]
[2.;1150.;2.]
[2.;1220.;2.]
[1.;734.;1.]]
let lambda = 11.
let newHouseDetails = vector[1.;750.;1.]
let prices = vector[3400.;2102.;1334.;3432.;5342.;782.;]
let I = DenseMatrix.identity<float> houseDetails.ColumnCount
let ridgeRegressionTheta = (houseDetails.Transpose() * houseDetails
+ lambda * I).Inverse()
* houseDetails.Transpose()
* prices
let newHousePredictedPrice = newHouseDetails * ridgeRegressionTheta
////
//Locate these files and provide correct paths to all these files
#r @"...\packages\Accord.2.15.0\lib\net45\Accord.dll"
#r @"...\packages\Accord.Math.2.15.0\lib\net45\Accord.Math.dll"
#r @"...\packages\Accord.Statistics.2.15.0\lib\net45\Accord.Statistics.dll"
#r @"...\packages\MathNet.Numerics.FSharp.3.10.0\lib\net40\MathNet.Numerics.FSharp.dll"
#r @"...\packages\MathNet.Numerics.3.10.0\lib\net40\MathNet.Numerics.dll"
#load "...\packages\MathNet.Numerics.FSharp.3.10.0\MathNet.Numerics.fsx"
open Accord.Statistics
open Accord.Statistics.Models.Regression.Linear
open MathNet.Numerics.LinearRegression
open MathNet.Numerics.LinearAlgebra
open MathNet.Numerics.LinearAlgebra.Double
open MathNet.Numerics
//Input set of values
let inputs = [|[|1.;1.;1.|];[|2.;1.;1.|];[|3.;1.;1.|]|]
//Output for
let outputs = [|[|2.;3.|];[|4.;6.|];[|6.;9.|]|]
//This is a regression that takes a input variable set of 3
values
//each and projects the result to a two variable output.
//Thus we need a 3 x 2 regression model
let regression = new MultivariateLinearRegression (3, 2);
let error = regression.Regress (inputs, outputs)
printfn "%A" regression.Coefficients
//Let's say we have a new set of values as per the given data
let newInput = DenseMatrix.OfColumns [[2.4;1.2;1.4]]
//Creating a theta from this coefficinets
let theta = DenseMatrix.OfArray regression.Coefficients
//Calculating the predicted value for this new input set.
let newOutputs = theta.Transpose() * newInput
//feature scaling
let avgBedRooms = houseDetails.Column 0 |> Seq.average
let avgArea = houseDetails.Column 1 |> Seq.average
let avgBathRooms = houseDetails.Column 2 |> Seq.average
let rangeBedRooms = (houseDetails.Column 0 |> Seq.max) - (houseDetails.Column 0 |> Seq.min)
let rangeArea = (houseDetails.Column 1 |> Seq.max) - (houseDetails.Column 1 |> Seq.min)
let rangeBathRooms = (houseDetails.Column 2 |> Seq.max) - (houseDetails.Column 2 |> Seq.min)
//However, you can't do this for a very large matrix.
//So the following code does that programmatically for matrix of any size:
//This method performs feature scaling for all the columns
let scaleFeatures (avgs: float []) (ranges: float []) (column : Vector<float>) =
for i in 0 .. avgs.Length - 1 do
column.Storage.At(i,(column.Storage.At(i)- avgs.[i])/ranges.[i])
column
//Finding averages for all columns
let allAvgs = [|for i in 0 .. houseDetails.ColumnCount - 1
-> houseDetails.Column i |> Seq.average|]
//Finding ranges for all columns
let allRanges = [|for i in 0 .. houseDetails.ColumnCount - 1 ->
(houseDetails.Column i |> Seq.max) - (houseDetails.Column i |> Seq.min)|]
let allColumns = [for i in 0 .. houseDetails.ColumnCount - 1 ->
(houseDetails.Column i) ]
//Scaled Column values
let scaledColumns = allColumns
|> List.map ( fun column -> scaleFeatures allAvgs allRanges column)
//Creating a matrix from scaled values.
let scaledHouseDetails = DenseMatrix.ofColumns scaledColumns
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment