Skip to content

Instantly share code, notes, and snippets.

@isaacabraham
Created June 18, 2013 17:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save isaacabraham/5807426 to your computer and use it in GitHub Desktop.
Save isaacabraham/5807426 to your computer and use it in GitHub Desktop.
Sample solution for the Kaggle Digits machine learning problem. It does a bit more as the main function also returns the data in a shape that can be bound to a UI in a 28x28 grid. Excuse the lack of a discriminated union for the Matched / Failed bit at the end - it gets directly bound onto the UI in my project
module MachineLearning
open System
open System.IO
let public TRAINING_DATA = "digitssample.csv";
let public LIVE_DATA = "digitscheck.csv"
type RawScribble = { Number:int; Pixels:int[] }
type Scribble = { Number:int; PixelData:int[][] }
type Result = { Expected:Scribble; Actual:Scribble; Uncertainty:float; Matched:string }
let readlines filename = File.ReadAllLines(filename).[1..]
|> Array.map(fun line -> line.Split(','))
|> Array.map(fun line -> line |> Array.map(Convert.ToInt32))
|> Array.map(fun line -> { Number = line.[0]; Pixels = line.[1..] })
let distance (first:int[]) (second:int[]) =
Array.map2 (fun p1 p2 -> (p1-p2)*(p1-p2)) first second
|> Array.sum
|> Convert.ToDouble
|> Math.Sqrt
let classify trainingSet unknownItem =
trainingSet
|> Array.map(fun item -> item, distance item.Pixels unknownItem)
|> Array.minBy(fun (_,distance) -> distance)
let calculateScore trainingData liveData =
let windowPixels data = data
|> Seq.windowed 28
|> Seq.mapi (fun index window -> index % 28 = 0, window)
|> Seq.filter (fun (index,_) -> index = true)
|> Seq.map (fun (_,window) -> window)
|> Seq.toArray
let failures = liveData |> Array.Parallel.map (fun line -> line, classify trainingData line.Pixels)
|> Array.Parallel.map (fun (actual,(expected,uncertainty)) ->
{
Expected = { Number = expected.Number; PixelData = windowPixels expected.Pixels }
Actual = { Number = actual.Number; PixelData = windowPixels actual.Pixels }
Uncertainty = uncertainty / 1000.0
Matched = if (actual.Number = expected.Number) then "Matched" else "Failed"
})
let matches = failures |> Array.filter(fun x -> x.Matched = "Matched")
failures, ((double)matches.Length / (double)liveData.Length) * 100.0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment