Created
June 18, 2013 17:22
-
-
Save isaacabraham/5807426 to your computer and use it in GitHub Desktop.
Sample solution for the Kaggle Digits machine learning problem. It does a bit more as the main function also returns the data in a shape that can be bound to a UI in a 28x28 grid. Excuse the lack of a discriminated union for the Matched / Failed bit at the end - it gets directly bound onto the UI in my project
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module MachineLearning | |
open System | |
open System.IO | |
let public TRAINING_DATA = "digitssample.csv"; | |
let public LIVE_DATA = "digitscheck.csv" | |
type RawScribble = { Number:int; Pixels:int[] } | |
type Scribble = { Number:int; PixelData:int[][] } | |
type Result = { Expected:Scribble; Actual:Scribble; Uncertainty:float; Matched:string } | |
let readlines filename = File.ReadAllLines(filename).[1..] | |
|> Array.map(fun line -> line.Split(',')) | |
|> Array.map(fun line -> line |> Array.map(Convert.ToInt32)) | |
|> Array.map(fun line -> { Number = line.[0]; Pixels = line.[1..] }) | |
let distance (first:int[]) (second:int[]) = | |
Array.map2 (fun p1 p2 -> (p1-p2)*(p1-p2)) first second | |
|> Array.sum | |
|> Convert.ToDouble | |
|> Math.Sqrt | |
let classify trainingSet unknownItem = | |
trainingSet | |
|> Array.map(fun item -> item, distance item.Pixels unknownItem) | |
|> Array.minBy(fun (_,distance) -> distance) | |
let calculateScore trainingData liveData = | |
let windowPixels data = data | |
|> Seq.windowed 28 | |
|> Seq.mapi (fun index window -> index % 28 = 0, window) | |
|> Seq.filter (fun (index,_) -> index = true) | |
|> Seq.map (fun (_,window) -> window) | |
|> Seq.toArray | |
let failures = liveData |> Array.Parallel.map (fun line -> line, classify trainingData line.Pixels) | |
|> Array.Parallel.map (fun (actual,(expected,uncertainty)) -> | |
{ | |
Expected = { Number = expected.Number; PixelData = windowPixels expected.Pixels } | |
Actual = { Number = actual.Number; PixelData = windowPixels actual.Pixels } | |
Uncertainty = uncertainty / 1000.0 | |
Matched = if (actual.Number = expected.Number) then "Matched" else "Failed" | |
}) | |
let matches = failures |> Array.filter(fun x -> x.Matched = "Matched") | |
failures, ((double)matches.Length / (double)liveData.Length) * 100.0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment