public
Last active

Machine Learning Dojo

  • Download Gist
DigitScript.fsx
F#
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278
// This F# dojo is directly inspired by the
// Digit Recognizer competition from Kaggle.com:
// http://www.kaggle.com/c/digit-recognizer
// The datasets below are simply shorter versions of
// the training dataset from Kaggle.
// The goal of the dojo will be to
// create a classifier that uses training data
// to recognize hand-written digits, and
// evaluate the quality of our classifier
// by looking at predictions on the validation data.
 
// This file provides some guidance through the problem:
// each section is numbered, and
// solves one piece you will need. Sections contain
// general instructions,
// [ YOUR CODE GOES HERE! ] tags where you should
// make the magic happen, and
// <F# QUICK-STARTER> blocks. These are small
// F# tutorials illustrating aspects of the
// syntax which could come in handy. Run them,
// see what happens, and tweak them to fit your goals!
 
// 0. GETTING READY
// Create a new F# Library project, and
// copy the entire contents of this file
// in "Script.fsx"
 
// <F# QUICK-STARTER>
// With F# Script files (.fsx) and F# Interactive,
// you can "live code" and see what happens.
 
// Try typing let x = 42 in the script file,
// right-click and select "Execute in interactive".
// let "binds" the value on the right to a name.
 
// Try now typing x + 3;; in the F# Interactive window.
// ';;' indicates "execute now whatever I just typed".
 
// Now right-click the following 2 lines and execute:
let greet name =
printfn "Hello, %s" name
// let also binds a name to a function.
// greet is a function with one argument, name.
// You should be able to run this in F# Interactive:
// greet "World";;
// </F# QUICK-STARTER>
 
// Then, load data files from the following location:
// training set of 5,000 examples:
// http://brandewinder.blob.core.windows.net/public/trainingsample.csv
// validation set of 500 examples, to test your model:
// http://brandewinder.blob.core.windows.net/public/validationsample.csv
// 1. GETTING SOME DATA
// First let's read the contents of "trainingsample.csv"
 
// We will need System and System.IO to work with files,
// let's right-click / run in interactive,
// to have these namespaces loaded:
open System
open System.IO
 
// the following might come in handy:
//File.ReadAllLines(path)
// returns an array of strings for each line
let trainingSample = File.ReadAllLines("/Users/Onorio_Development/Desktop/trainingsample.csv")
// 2. EXTRACTING COLUMNS
// Break each line of the file into an array of string,
// separating by commas, using Array.map
 
// <F# QUICK-STARTER>
// Array.map quick-starter:
// Array.map takes an array, and transforms it
// into another array by applying a function to it.
// Example: starting from an array of strings:
let strings = [| "Machine"; "Learning"; "with"; "F#"; "is"; "fun" |]
// we can transform it into a new array,
// containing the length of each string:
let lengths = Array.map (fun (s:string) -> s.Length) strings
// We can make it look nicer, using pipe-forward:
let lengths2 = strings |> Array.map (fun s -> s.Length)
// </F# QUICK-STARTER>
// the following function might help
let csvToSplit = "1,2,3,4,5"
let splitResult = csvToSplit.Split(',')
let splitByCommas fileString =
fileString |> Array.map(fun (s:string) -> s.Split(','))
 
let trainingArr = splitByCommas trainingSample
// 3. CLEANING UP HEADERS
// Did you note that the file has headers? We want to get rid of it.
 
// <F# QUICK-STARTER>
// Array slicing quick starter:
// let's start with an Array of ints:
let someNumbers = [| 0 .. 10 |] // create an array from 0 to 10
// you can access Array elements by index:
let first = someNumbers.[0]
// you can also slice the array:
let twoToFive = someNumbers.[ 1 .. 4 ] // grab a slice
let upToThree = someNumbers.[ .. 2 ]
// </F# QUICK-STARTER>
let removeHeaders = arr.[1..]
let noheaders = removeHeaders trainingArr
let myMath = function | Some x -> failwith "Screw you" | _ -> failwith "Whatever"
let myMath2 x =
match x with
| Some y -> failwith "Screw you"
| _ -> failwith "Whatever"
// 4. CONVERTING FROM STRINGS TO INTS
// Now that we have an array containing arrays of strings,
// and the headers are gone, we need to transform it
// into an array of arrays of integers.
// Array.map seems like a good idea again :)
 
// The following might help:
let castedInt = (int)"42"
// or, alternatively:
let convertedInt = Convert.ToInt32("42")
let convert (s:string) =
Convert.ToInt32 s
 
//let processOuterArray a =
// a |> processArray (
//noheaders |> Array.map (fun (a:Array) -> a |> Array.map Convert.ToInt32)
let toIntegers noheaders =
noheaders
|> Array.map (fun line ->
line |> Array.map (fun pix -> convert pix))
let integerPixels = toIntegers noheaders
// 5. CONVERTING ARRAYS TO RECORDS
// Rather than dealing with a raw array of ints,
// for convenience let's store these into an array of Records
 
 
// Record quick starter: we can declare a
// Record (a lightweight, immutable class) type that way:
type Example = { Label:int; Pixels:int[] }
// and instantiate one this way:
//let example = { Label = 1; Pixels = [| 1; 2; 3; |] }
 
type numberImage = { Number:int; Pixels:int[] }
 
let makeNumberImage (arrInt:int[]) = {Number = arrInt.[0]; Pixels = arrInt.[1..]}
 
let trainingSet = Array.map makeNumberImage integerPixels
// 6. COMPUTING DISTANCES
// We need to compute the distance between images
// Math reminder: the euclidean distance is
// distance [ x1; y1; z1 ] [ x2; y2; z2 ] =
// (x1-x2)*(x1-x2) + (y1-y2)*(y1-y2) + (z1-z2)*(z1-z2) + ...
// <F# QUICK-STARTER>
// Array.map2 could come in handy here.
// Array.map2 quick start example
// Suppose we have 2 arrays:
let point1 = [| 0; 1; 2 |]
let point2 = [| 3; 4; 5 |]
// Array.map2 takes 2 arrays at a time
// and maps pairs of elements, for instance:
let map2Example =
Array.map2 (fun p1 p2 -> p1 + p2) point1 point2
// This simply computes the sums for point1 and point2,
// but we can easily turn this into a function now:
let map2PointsExample (P1: int[]) (P2: int[]) =
Array.map2 (fun p1 p2 -> p1 + p2) P1 P2
// </F# QUICK-STARTER>
 
// Having a function like
let distance (p1: int[]) (p2: int[]) = 42
// would come in very handy right now,
// except that in this case,
// 42 is likely not the right answer
 
let findImgDifference (image1Pixels:int[]) (image2Pixels:int[]) =
Array.map2(fun p1 p2 -> (p1 - p2) * (p1 - p2)) image1Pixels image2Pixels |> Array.sum
//printfn "%A" (findImgDifference trainingSet.[0].Pixels trainingSet.[1].Pixels)
// 7. WRITING THE CLASSIFIER FUNCTION
// We are now ready to write a classifier function!
// The classifier should take a set of pixels
// (an array of ints) as an input, search for the
// closest example in our sample, and predict
// the value of that closest element.
// <F# QUICK-STARTER>
// Array.minBy can be handy here, to find
// the closest element in the Array of examples.
// Array.minBy quick start:
// suppose we have an Array of Example:
let someData =
[| { Label = 0; Pixels = [| 0; 1 |] };
{ Label = 1; Pixels = [| 9; 2 |] };
{ Label = 2; Pixels = [| 3; 4 |] }; |]
// We can find for instance
// the element with largest first pixel
let findThatGuy =
someData
|> Array.maxBy (fun x -> x.Pixels.[0])
// </F# QUICK-STARTER>
// <F# QUICK-STARTER>
// F# and closures work very well together
let immutableValue = 42
let functionWithClosure (x: int) =
if x > immutableValue // using outside value
then true
else false
// </F# QUICK-STARTER>
// The classifier function should probably
// look like this - except that this one will
// classify everything as a 0:
let classify (unknown:int[]) =
// do something smart here
// like find the Example with
// the shortest distance to
// the unknown element...
// and use the training examples
// in a closure...
let closestImg = trainingSet
|> Array.minBy (fun trainingItem -> findImgDifference unknown trainingItem.Pixels)
closestImg.Number
// [ YOUR CODE GOES HERE! ]
// 8. EVALUATING THE MODEL AGAINST VALIDATION DATA
// Now that we have a classifier, we need to check
// how good it is.
// This is where the 2nd file, validationsample.csv,
// comes in handy. For each Example in the 2nd file,
// we know what the true Label is, so we can compare
// that value with what the classifier says.
// You could now check for each 500 example in that file
// whether your classifier returns the correct answer,
// and compute the % correctly predicted.
let validationSample = File.ReadAllLines("/Users/Onorio_Development/Desktop/validationsample.csv")
let validationImages = (splitByCommas validationSample) >> removeHeaders
let array = removeHeaders validationImages
//>> removeHeaders
//>> toIntegers
//>> Array.map makeNumberImage

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.