Last active
October 16, 2023 11:59
-
-
Save jkone27/de477e362f3ee9f36069c0010e0a1f29 to your computer and use it in GitHub Desktop.
sample test of ml net library for house pricing regression prediction
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#r "nuget:Microsoft.ML" | |
#r "nuget:Microsoft.ML.AutoML" | |
#r "nuget:Microsoft.ML.DataView" | |
#r "nuget:Plotly.NET" | |
#r "nuget:FSharp.Data" | |
open Microsoft.ML | |
open Microsoft.ML.Data | |
open Microsoft.ML.Transforms | |
open Microsoft.ML.Trainers | |
open Plotly.NET | |
open Plotly.NET.TraceObjects | |
open Plotly.NET | |
open System | |
open FSharp.Data | |
[<Literal>] | |
let trainDataPath = "house-price-train.csv" | |
[<Literal>] | |
let testDataPath = "house-price-test.csv" | |
type PriceCsvProvider = CsvProvider<trainDataPath> | |
[<CLIMutable>] | |
type PricePrediction = { | |
[<ColumnName("Score")>] | |
Price: float32 | |
} | |
[<CLIMutable>] | |
type HousePricePerYear = { | |
[<LoadColumn(0)>] | |
Price: float32 | |
[<LoadColumn(1)>] | |
Year: float32 | |
} | |
let mlContext = MLContext(seed=0) | |
let trainSample = PriceCsvProvider.GetSample() | |
let testSample = PriceCsvProvider.Load(testDataPath) | |
let house_price_data_view (row : PriceCsvProvider.Row seq) = | |
row | |
|> Seq.map (fun r -> | |
{ | |
Price = (float32) r.Price | |
Year = (float32) r.Year | |
}) | |
|> Seq.toArray | |
|> mlContext.Data.LoadFromEnumerable<HousePricePerYear> | |
let trainDataView = | |
trainSample.Rows | |
|> house_price_data_view | |
for r in trainDataView.Preview(1).ColumnView do | |
r.Values | |
|> Seq.last | |
|> printfn "%A" | |
let testDataView = | |
testSample.Rows | |
|> house_price_data_view | |
for r in testDataView.Preview(1).ColumnView do | |
r.Values | |
|> Seq.last | |
|> printfn "%A" | |
/// https://github.com/CSBiology/FSharpML/tree/master/src/FSharpML | |
module Estimator = | |
let downcastEstimator (estimator : IEstimator<'a>) = | |
match estimator with | |
| :? IEstimator<ITransformer> as p -> p | |
| _ -> failwith "The estimator has to be an instance of IEstimator<ITransformer>." | |
/// | |
let append (source1 : IEstimator<'a>) (source2 : IEstimator<'b>) = | |
(source2|> downcastEstimator).Append(source1) | |
/// | |
let createEstimatorChainOf (estimators : IEstimator<'a> seq) = | |
estimators | |
|> Seq.fold (fun acc e -> append e acc) (EstimatorChain()) | |
/// | |
let appendCacheCheckpoint (mlContext : MLContext) (pipeline: IEstimator<'a>) = | |
pipeline.AppendCacheCheckpoint mlContext | |
|> downcastEstimator | |
/// https://github.com/CSBiology/FSharpML/tree/master/src/FSharpML | |
module Transformer = | |
let downcastTransformer (transformer : ITransformer) = | |
match transformer with | |
| :? IPredictionTransformer<_> as p -> p | |
| _ -> failwith "The transformer has to be an instance of IPredictionTransformer<IPredictor>." | |
/// | |
let append (source1 : ITransformer) (source2 : ITransformer) = | |
(source2 |> downcastTransformer).Append(source1) | |
/// | |
let createTransformerChainOf (estimators : ITransformer seq) = | |
estimators | |
|> Seq.fold (fun acc e -> append e acc) (TransformerChain()) | |
let pipeline = | |
new EstimatorChain<ITransformer>() | |
|> Estimator.append (mlContext.Transforms.CopyColumns( | |
outputColumnName="Label", | |
inputColumnName="Price" | |
)) | |
//|> withChain | |
// p.Append(mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName="SomeStringEncoded", inputColumnName="SomeString")) |> ignore | |
// append additional encoded features... | |
|> Estimator.append (mlContext.Transforms.Concatenate( | |
outputColumnName="Features", | |
inputColumnNames=[| "Price"; "Year" |] | |
)) | |
// choose the regression algo for prediciton, fast tree in the example | |
|> Estimator.append (mlContext.Regression.Trainers.FastTree()) | |
let model = pipeline.Fit(trainDataView) | |
let predictions = model.Transform(testDataView) | |
let metrics = mlContext.Regression.Evaluate(predictions, "Label", "Score") | |
//evaluate bounty of the model | |
$""" | |
METRICS EVALUATE MODEL | |
RSquared ([0,1] close to 1 better): %.5f{metrics.RSquared} | |
RMS err (lowest the better): %.5f{metrics.RootMeanSquaredError} | |
""" | |
|> printfn "%s" | |
// try predict a price | |
let predictionFunction = mlContext.Model.CreatePredictionEngine<HousePricePerYear, PricePrediction>(model) | |
let predictHousePrice year = | |
let housePircePerYear2100Req = | |
{ | |
Year = (float32 )2100 | |
Price = (float32) 0 | |
} | |
housePircePerYear2100Req | |
|> predictionFunction.Predict | |
for year in [2023..2100] do | |
let predicted = year |> predictHousePrice | |
printfn $"{year} > {predicted}" | |
// EXTRA | |
// try plot ? | |
// (actualData, predictedData) | |
// |> fun (a,p) -> Chart.Line(x = a, y = p) | |
// |> Chart.withTitle("Actual vs. Predicted Prices") | |
// |> Chart.withXAxisStyle("Sample") | |
// |> Chart.withYAxisStyle("Price") | |
// |> Chart.show |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment