public
Created

Sample solution for the Kaggle Digits machine learning problem. It does a bit more as the main function also returns the data in a shape that can be bound to a UI in a 28x28 grid. Excuse the lack of a discriminated union for the Matched / Failed bit at the end - it gets directly bound onto the UI in my project

  • Download Gist
machinelearning.fs
F#
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
module MachineLearning
 
open System
open System.IO
 
let public TRAINING_DATA = "digitssample.csv";
let public LIVE_DATA = "digitscheck.csv"
 
type RawScribble = { Number:int; Pixels:int[] }
type Scribble = { Number:int; PixelData:int[][] }
type Result = { Expected:Scribble; Actual:Scribble; Uncertainty:float; Matched:string }
 
let readlines filename = File.ReadAllLines(filename).[1..]
|> Array.map(fun line -> line.Split(','))
|> Array.map(fun line -> line |> Array.map(Convert.ToInt32))
|> Array.map(fun line -> { Number = line.[0]; Pixels = line.[1..] })
 
let distance (first:int[]) (second:int[]) =
Array.map2 (fun p1 p2 -> (p1-p2)*(p1-p2)) first second
|> Array.sum
|> Convert.ToDouble
|> Math.Sqrt
let classify trainingSet unknownItem =
trainingSet
|> Array.map(fun item -> item, distance item.Pixels unknownItem)
|> Array.minBy(fun (_,distance) -> distance)
let calculateScore trainingData liveData =
let windowPixels data = data
|> Seq.windowed 28
|> Seq.mapi (fun index window -> index % 28 = 0, window)
|> Seq.filter (fun (index,_) -> index = true)
|> Seq.map (fun (_,window) -> window)
|> Seq.toArray
 
let failures = liveData |> Array.Parallel.map (fun line -> line, classify trainingData line.Pixels)
|> Array.Parallel.map (fun (actual,(expected,uncertainty)) ->
{
Expected = { Number = expected.Number; PixelData = windowPixels expected.Pixels }
Actual = { Number = actual.Number; PixelData = windowPixels actual.Pixels }
Uncertainty = uncertainty / 1000.0
Matched = if (actual.Number = expected.Number) then "Matched" else "Failed"
})
let matches = failures |> Array.filter(fun x -> x.Matched = "Matched")
failures, ((double)matches.Length / (double)liveData.Length) * 100.0

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.