Skip to content

Instantly share code, notes, and snippets.

@jkone27
Last active October 16, 2023 11:59
Show Gist options
  • Save jkone27/de477e362f3ee9f36069c0010e0a1f29 to your computer and use it in GitHub Desktop.
Save jkone27/de477e362f3ee9f36069c0010e0a1f29 to your computer and use it in GitHub Desktop.
sample test of ml net library for house pricing regression prediction
#r "nuget:Microsoft.ML"
#r "nuget:Microsoft.ML.AutoML"
#r "nuget:Microsoft.ML.DataView"
#r "nuget:Plotly.NET"
#r "nuget:FSharp.Data"
open Microsoft.ML
open Microsoft.ML.Data
open Microsoft.ML.Transforms
open Microsoft.ML.Trainers
open Plotly.NET
open Plotly.NET.TraceObjects
open Plotly.NET
open System
open FSharp.Data
[<Literal>]
let trainDataPath = "house-price-train.csv"
[<Literal>]
let testDataPath = "house-price-test.csv"
type PriceCsvProvider = CsvProvider<trainDataPath>
[<CLIMutable>]
type PricePrediction = {
[<ColumnName("Score")>]
Price: float32
}
[<CLIMutable>]
type HousePricePerYear = {
[<LoadColumn(0)>]
Price: float32
[<LoadColumn(1)>]
Year: float32
}
let mlContext = MLContext(seed=0)
let trainSample = PriceCsvProvider.GetSample()
let testSample = PriceCsvProvider.Load(testDataPath)
let house_price_data_view (row : PriceCsvProvider.Row seq) =
row
|> Seq.map (fun r ->
{
Price = (float32) r.Price
Year = (float32) r.Year
})
|> Seq.toArray
|> mlContext.Data.LoadFromEnumerable<HousePricePerYear>
let trainDataView =
trainSample.Rows
|> house_price_data_view
for r in trainDataView.Preview(1).ColumnView do
r.Values
|> Seq.last
|> printfn "%A"
let testDataView =
testSample.Rows
|> house_price_data_view
for r in testDataView.Preview(1).ColumnView do
r.Values
|> Seq.last
|> printfn "%A"
/// https://github.com/CSBiology/FSharpML/tree/master/src/FSharpML
module Estimator =
let downcastEstimator (estimator : IEstimator<'a>) =
match estimator with
| :? IEstimator<ITransformer> as p -> p
| _ -> failwith "The estimator has to be an instance of IEstimator<ITransformer>."
///
let append (source1 : IEstimator<'a>) (source2 : IEstimator<'b>) =
(source2|> downcastEstimator).Append(source1)
///
let createEstimatorChainOf (estimators : IEstimator<'a> seq) =
estimators
|> Seq.fold (fun acc e -> append e acc) (EstimatorChain())
///
let appendCacheCheckpoint (mlContext : MLContext) (pipeline: IEstimator<'a>) =
pipeline.AppendCacheCheckpoint mlContext
|> downcastEstimator
/// https://github.com/CSBiology/FSharpML/tree/master/src/FSharpML
module Transformer =
let downcastTransformer (transformer : ITransformer) =
match transformer with
| :? IPredictionTransformer<_> as p -> p
| _ -> failwith "The transformer has to be an instance of IPredictionTransformer<IPredictor>."
///
let append (source1 : ITransformer) (source2 : ITransformer) =
(source2 |> downcastTransformer).Append(source1)
///
let createTransformerChainOf (estimators : ITransformer seq) =
estimators
|> Seq.fold (fun acc e -> append e acc) (TransformerChain())
let pipeline =
new EstimatorChain<ITransformer>()
|> Estimator.append (mlContext.Transforms.CopyColumns(
outputColumnName="Label",
inputColumnName="Price"
))
//|> withChain
// p.Append(mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName="SomeStringEncoded", inputColumnName="SomeString")) |> ignore
// append additional encoded features...
|> Estimator.append (mlContext.Transforms.Concatenate(
outputColumnName="Features",
inputColumnNames=[| "Price"; "Year" |]
))
// choose the regression algo for prediciton, fast tree in the example
|> Estimator.append (mlContext.Regression.Trainers.FastTree())
let model = pipeline.Fit(trainDataView)
let predictions = model.Transform(testDataView)
let metrics = mlContext.Regression.Evaluate(predictions, "Label", "Score")
//evaluate bounty of the model
$"""
METRICS EVALUATE MODEL
RSquared ([0,1] close to 1 better): %.5f{metrics.RSquared}
RMS err (lowest the better): %.5f{metrics.RootMeanSquaredError}
"""
|> printfn "%s"
// try predict a price
let predictionFunction = mlContext.Model.CreatePredictionEngine<HousePricePerYear, PricePrediction>(model)
let predictHousePrice year =
let housePircePerYear2100Req =
{
Year = (float32 )2100
Price = (float32) 0
}
housePircePerYear2100Req
|> predictionFunction.Predict
for year in [2023..2100] do
let predicted = year |> predictHousePrice
printfn $"{year} > {predicted}"
// EXTRA
// try plot ?
// (actualData, predictedData)
// |> fun (a,p) -> Chart.Line(x = a, y = p)
// |> Chart.withTitle("Actual vs. Predicted Prices")
// |> Chart.withXAxisStyle("Sample")
// |> Chart.withYAxisStyle("Price")
// |> Chart.show
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment