Skip to content

Instantly share code, notes, and snippets.

@realvictorprm
Last active January 8, 2021 17:30
Show Gist options
  • Save realvictorprm/d97410cdfca208922ee3322f46543190 to your computer and use it in GitHub Desktop.
Save realvictorprm/d97410cdfca208922ee3322f46543190 to your computer and use it in GitHub Desktop.
Proof of concept implementation of a simple CNN consisting of 1 convolution layer, 1 pooling layer, and variable amounts of classic neuronal network layers.
#r "nuget: MathNet.Numerics"
#r "nuget: MathNet.Numerics.FSharp"
open System.Runtime.CompilerServices
open Checked
open MathNet.Numerics
open MathNet.Numerics.LinearAlgebra.Double
open MathNet.Numerics.LinearAlgebra
let inline (.*) (m1: LinearAlgebra.Matrix<'T>) (m2: LinearAlgebra.Matrix<'T>) = m1.PointwiseMultiply m2
let inline isNan a =
match a |> box with
| :? System.Double as a -> System.Double.IsNaN a
| _ -> false
//let inline (/) a b =
// if isNan a || isNan b then raise <| System.NotFiniteNumberException()
// elif a = LanguagePrimitives.GenericZero<_> && b = LanguagePrimitives.GenericZero<_> then
// raise <| System.NotFiniteNumberException("a and b are nearly zero which would result in NaN!")
// else Operators.(/) a b
[<AutoOpen>]
module StringExtensions =
open System.Text.RegularExpressions
let private marginRegex =
Regex("^.*?\|", RegexOptions.Multiline ||| RegexOptions.Compiled)
type System.String with
member self.StripMargin =
//printfn "%s" self
let res = marginRegex.Replace(self, "")
//printfn "%s" res
res
type Node = Node of weights: float [,]
type Layer = Layer of nodes: Node [,]
type Network2D = { layers: Layer [] }
type LayerResult =
{ activation: float [,]
intervalues: float [,] list }
type PositionNetwork = { layer: int; node: int * int }
type CNN =
{ filterMatrices: float [,] []
network: Network2D }
module Core =
let max = max
module Array =
let inline private checkNonNull argName arg =
match box arg with
| null -> nullArg argName
| _ -> ()
let foldi<'T, 'State> (folder: 'State -> int -> 'T -> 'State) (state: 'State) (array: 'T []) =
checkNonNull "array" array
let f =
OptimizedClosures.FSharpFunc<_, _, _, _>
.Adapt(folder)
let mutable state = state
for i = 0 to array.Length - 1 do
state <- f.Invoke(state, i, array.[i])
state
module Array2D =
let inline private checkNonNull argName arg =
match box arg with
| null -> nullArg argName
| _ -> ()
let getDimsForArray2D arr =
{| rowCount = arr |> Array2D.length1
columnCount = arr |> Array2D.length2 |}
let foldi<'T, 'State> (folder: 'State -> int -> int -> 'T -> 'State) (state: 'State) (array: 'T [,]) =
checkNonNull "array" array
let f =
OptimizedClosures.FSharpFunc<_, _, _, _, _>
.Adapt(folder)
let mutable state = state
let length1 = Array2D.length1 array
let length2 = Array2D.length2 array
for i = 0 to length1 - 1 do
for j = 0 to length2 - 1 do
state <- f.Invoke(state, i, j, array.[i, j])
state
let fold<'T, 'State> (folder: 'State -> 'T -> 'State) (state: 'State) (array: 'T [,]) =
checkNonNull "array" array
let f =
OptimizedClosures.FSharpFunc<_, _, _>
.Adapt(folder)
let mutable state = state
let length1 = Array2D.length1 array
let length2 = Array2D.length2 array
for i = 0 to length1 - 1 do
for j = 0 to length2 - 1 do
state <- f.Invoke(state, array.[i, j])
state
let inline averageBy (fn: 'a -> 'b) (arr: 'a [,]) =
let dims = getDimsForArray2D arr
let acc =
arr
|> fold (fun acc element -> Checked.op_Addition acc <| fn element) LanguagePrimitives.GenericZero<'b>
LanguagePrimitives.DivideByInt acc (dims.rowCount * dims.columnCount)
let inline max arr = arr |> fold max arr.[0, 0]
let inline maxWithWinnerIndex arr =
let (maxElement, maxElementIndex) =
arr
|> foldi
(fun (maxSoFar, maxSoFarIndex) i j element ->
let newMax = Core.max element maxSoFar
if newMax <> maxSoFar then
(newMax, (i, j))
else
(maxSoFar, maxSoFarIndex))
(arr.[0, 0], (0, 0))
{| maxElement = maxElement
maxElementIndex = maxElementIndex |}
let inline zip arr1 arr2 =
arr1
|> Array2D.mapi (fun i j e1 -> (e1, Array2D.get arr2 i j))
let inline sumBy (fn: 'a -> 'b) (arr: 'a [,]) =
arr
|> fold (fun state e -> Checked.op_Addition state <| fn e) LanguagePrimitives.GenericZero<'b>
let inline toMatrix (arr: 'a [,]) =
let dims = getDimsForArray2D arr
let m1 =
Matrix<'a>
.Build.Dense(dims.rowCount, dims.columnCount)
arr
|> Array2D.iteri (fun i j value -> m1.[i, j] <- value)
m1
let getValues2D (inputX: float [,]) (network: Network2D) sigmaFun =
network.layers
|> Array.fold
(fun (state: LayerResult) (Layer layer) ->
let activation = state.activation
let ls = state.intervalues
let res: float [,] =
layer
|> Array2D.map
(fun (Node weights) ->
Array2D.zip activation weights
|> Array2D.sumBy (fun (a, b) -> a * b)
|> sigmaFun)
{ LayerResult.activation = res
LayerResult.intervalues = List.append ls [ res ] })
{ LayerResult.activation = inputX
LayerResult.intervalues = [] }
(*
E = L(t, y)
net_j = sum(w_ij * o_i)
o_j = phi(net_j) = phi(sum(w_ij * o_i))
a E a E a o_j a E a o_j a net_j
-- = -- * ------- = ---- * ----- * ---
a w_ij a o_j aw_ij a o_j a net_j a w_ij
a net_j a
------- = ---- * sum( w_ij * o_i) = w_ij
a o_i a o_i
a net_j a
------ = --- sum( w_ij * o_i) = o_i
a w_ij a w_ij
// If output neuron take the half of the quadratic error as loss function.
// And assume o_j = y as this is the output
a E a E a 1 2
--- = --- = --- * - * (t - y) = (y - t)
a o_j a y a y 2
// If inner layer
a E a E a o_l a E a o_l a net_l a E a o_l
--- = sum( ---- * --- ) = sum( ---- * --- * ----- ) = sum( ---- * --- * w_jl )
a o_j a o_l a o_j a o_l a net_l a o_j a o_l a net_l
a E a E a o_j
-- = ---- * ----- * o_i
a w_ij a o_j a net_j
// Generally this is what delta_j is
a E a o_j
delta_j = ---- * -----
a o_j a net_j
a E
-- = delta_j * o_i
a w_ij
// If j is an output neuron
a phi(net_j)
delta_j = (y - t) * -----------
a net_j
// small nit, generally delta_j is for an output neuron
a L(o_j,y) a phi(net_j)
delta_j = ---------- * -----------
a o_j a net_j
// If j is an inner neuron
l a phi(net_j)
delta_j = sum( delta_l * w_jl ) * -----------
a net_j
logistic_phi(z) = (1 + e^(-z))^-1
d logistic_phi
------------- = -1 * (1 + e ^(-z))^(-2) * e^(-z) * (-1) = (1 + e^(-z))^(-2) * e^(-z) =
d z
= (1 + e^(-z))^(-1) * (1 - (1 + e^(-z))^(-1))
// If j is an inner neuron with logistic function as phi
l a phi(net_j)
delta_j = sum( delta_l * w_jl ) * -----------
a net_j
*)
let calcDeltas2D (inputX: float [,]) network sigmaFun (expected: float [,]): Map<PositionNetwork, float> * LayerResult =
let phiDerivative o_j = o_j * (1.0 - o_j)
// let e = System.Math.E
// (1.0 + e**(-z))**(-1.0) * (1.0 - (1.0 + e**(-z))**(-1.0))
let layerResult = getValues2D inputX network sigmaFun
let lastLayerIndex = network.layers.Length - 1
let simpleDeltas =
layerResult.intervalues.[lastLayerIndex]
|> Array2D.foldi
(fun ls i k o_j ->
let j = (i, k)
let res =
{ layer = lastLayerIndex; node = j }, (o_j - expected.[i, k]) * phiDerivative o_j
res :: ls)
[]
|> Map
let rec calc layer (deltas) =
if layer >= 0 then
let deltas =
layerResult.intervalues.[layer]
|> Array2D.foldi
(fun (deltas: Map<PositionNetwork, float>) j1 j2 o_j ->
let deltaSum =
let upperLayer = layer + 1
layerResult.intervalues.[upperLayer]
|> Array2D.foldi
(fun acc i k _ ->
let delta_l =
deltas.[{ layer = upperLayer; node = (i, k) }]
let w_jl =
let (Layer nodes) = network.layers.[upperLayer]
let (Node weights) = nodes.[i, k]
weights.[j1, j2]
acc + w_jl * delta_l)
0.0
let delta_j = deltaSum * phiDerivative o_j
deltas
|> Map.add { layer = layer; node = (j1, j2) } delta_j)
deltas
calc (layer - 1) deltas
else
deltas
calc (lastLayerIndex - 1) simpleDeltas, layerResult
let discreteConvolution (section1: _ [,]) (filterMatrix: _ [,]) =
let n = Array2D.length1 section1
let m = Array2D.length2 section1
let getIndexTuple index = int (index / n), index % n
let maxIndex = n * m - 1
let bias = 1.0
// We interpret the input as a discrete finite function with n
// being the last possible index of the input function (in the mathematically sense).
// Therefor we map the 2D indices to 1D indices via going from the left top to the right bottom
seq { 0 .. maxIndex }
|> Seq.fold
(fun acc i ->
let (i1, i2) = getIndexTuple i
let (k1, k2) = getIndexTuple (maxIndex - i)
float section1.[i1, i2]
* float filterMatrix.[k1, k2]
+ acc)
bias
let convolutionStep input filter stride: float [,] =
let inputLength1 = input |> Array2D.length1
let inputLength2 = input |> Array2D.length2
let filterLength1 = filter |> Array2D.length1
let filterLength2 = filter |> Array2D.length2
// The stride is so far always 1 here. Besides we do narrow convolution.
array2D [ for i in 0 .. stride .. (inputLength1 - filterLength1) ->
[ for j in 0 .. stride .. (inputLength2 - filterLength2) do
let currentSelection =
input.[i..(i + filterLength1 - 1), j..(j + filterLength2 - 1)]
discreteConvolution currentSelection filter ] ]
let inline poolingStep (input: 'a [,]) (windowSize: int) stride =
let length1, length2 =
Array2D.length1 input, Array2D.length2 input
//printfn "max index: %d" (length1 - 1)
if (length1 - 1) <> (((length1 - windowSize) / stride) * stride + (windowSize - 1)) then
failwithf "Incompatible values for windowSize and windowStride in poolingStep."
if (length2 - 1) <> (((length2 - windowSize) / stride) * stride + (windowSize - 1)) then
failwithf "Incompatible values for windowSize and windowStride in poolingStep."
let res =
array2D [ for i in 0 .. stride .. (length1 - windowSize) ->
[ for j in 0 .. stride .. (length2 - windowSize) do
let currentSelection =
input.[i..(i + windowSize - 1), j..(j + windowSize - 1)]
//printfn "j: %d" (j + windowSize - 1)
let res =
Array2D.maxWithWinnerIndex currentSelection
let adjustedIndex =
let (x, y) = res.maxElementIndex
(i + x, j + y)
{| res with
maxElementIndex = adjustedIndex |} ] ]
res
let preCalcCNN (input: float [,]) (cnn: CNN) sigmaFun =
// Convolution comes first
let stride = 1
//printfn $"input row count: {input |> Array2D.length1}, column count: {input |> Array2D.length2}"
let convolutionLayers =
cnn.filterMatrices
|> Array.map (fun filterMatrix -> convolutionStep input filterMatrix stride)
let relus =
convolutionLayers
|> Array.map (Array2D.map (fun pixel -> sigmaFun pixel))
// Now apply pooling
let windowSize = 2
let windowStride = 2
let poolingStepResults =
relus
|> Array.map (fun relu -> poolingStep relu windowSize windowStride)
let filterMatrixWidth =
cnn.filterMatrices
|> Array.head
|> Array2D.length2
let poolingStepData, poolingStepResultFixed =
let n =
poolingStepResults
|> Array.head
|> Array2D.length1
let columnCount = poolingStepResults |> Array.head |> Array2D.length2
let m = columnCount * poolingStepResults.Length
// FilterMatrixHeight x (FilterMatrixWidth * FilterMatricesAmount)
// => n x m Matrix
Array2D.init
n
m
(fun j i ->
let x = i % columnCount
let y = j
let overallIndex = int (i / columnCount)
poolingStepResults.[overallIndex].[y, x]
.maxElement)
|> Array2D.toMatrix,
Array2D.init
n
m
(fun i j ->
let x = j % ((poolingStepResults |> Array.head |> Array2D.length2))
let y = i
let overallIndex = int (j / ((poolingStepResults |> Array.head |> Array2D.length2)))
//printfn $"i: {i}, j: {j}"
{| poolingStepResults.[overallIndex].[y, x] with
maxElementIndexInResult = (i, j) |})
let maxElementIndices =
poolingStepResults
|> Array.map (Array2D.fold (fun state element -> element.maxElementIndex :: state) [])
let dim1, dim2 =
let (Layer layer) = cnn.network.layers.[0]
let (Node node) = layer.[0, 0]
node |> Array2D.length1, node |> Array2D.length2
if poolingStepData.RowCount <> dim1
|| poolingStepData.ColumnCount <> dim2 then
failwithf
$"Incompatible dimension of pooling step layer and classic network. Should be {poolingStepData.RowCount} and {poolingStepData.ColumnCount}"
{| maxElementIndices = maxElementIndices
poolingStepData = poolingStepData |> Matrix.toArray2
filterMatrixWidth = filterMatrixWidth
poolingStepResults = poolingStepResultFixed
relusLayer = relus |}
let calcCNN (input: float [,]) (cnn: CNN) sigmaFun =
let preCalcData = preCalcCNN input cnn sigmaFun
getValues2D (preCalcData.poolingStepData) cnn.network sigmaFun
let calcDeltasAndAdjustment (input: float [,]) (network) sigmaFun (expected: float [,]) =
let deltasBeforePoolingLayer, layerResult =
calcDeltas2D input network sigmaFun expected
let getDimsForLayer layer =
let (Layer layer) = network.layers.[layer]
Array2D.getDimsForArray2D layer
seq {
let currentLayer = Array2D.getDimsForArray2D input
let upperLayer = getDimsForLayer 0
yield
Array2D.init
upperLayer.rowCount
upperLayer.columnCount
(fun i j ->
Array2D.init
(currentLayer.rowCount)
(currentLayer.columnCount)
(fun c1 c2 ->
let o_kl = input.[c1, c2]
{| adjustment =
o_kl
* deltasBeforePoolingLayer.[{ layer = 0; node = (i, j) }] |}))
for layer, intervalues in layerResult.intervalues
|> Seq.take (layerResult.intervalues.Length - 1)
|> Seq.indexed do
let currentLayer = getDimsForLayer layer
let upperLayer = getDimsForLayer (layer + 1)
yield
Array2D.init
upperLayer.rowCount
upperLayer.columnCount
(fun i j ->
Array2D.init
(currentLayer.rowCount)
(currentLayer.columnCount)
(fun c1 c2 ->
let o_kl = intervalues.[c1, c2]
{| adjustment =
o_kl
* deltasBeforePoolingLayer.[{ layer = layer + 1; node = (i, j) }] |}))
}
|> Seq.toArray,
layerResult
let gradientDescentNN network sigmaFun inputsWithExpectedOutput abortWhen =
let rec fixit network i =
let results =
inputsWithExpectedOutput
|> Array.map (fun (input, expected) -> calcDeltasAndAdjustment input network sigmaFun expected)
let averageSquareError: float =
let calcAverageErrorForSample layerResult (expected: float [,]): float =
layerResult.activation
// TODO: Replace with Array2D.indexed
|> Array2D.mapi (fun i j activation -> (i, j, activation))
|> Array2D.averageBy (fun (i, j, activation) -> abs (float activation - float expected.[i, j]))
let acc =
Array.zip results inputsWithExpectedOutput
|> Array.sumBy (fun ((_, layerResult), (_, expected)) -> calcAverageErrorForSample layerResult expected)
acc / (float results.Length)
if i % 1 = 0 then
printfn "square error at %20dth iteration: %f" i averageSquareError
// We abort if we exceeded the max proposed amount of iterations
// or if either the average square error is within the abortWhen range
// or if the last iteration did not achieve any change in the average square error
if System.Double.IsNaN averageSquareError
|| System.Double.IsInfinity averageSquareError then
printfn $"Early abort. We seriously screwed up here. Average square error is: {averageSquareError}"
network
elif i > 1_000_000_000 || averageSquareError <= abortWhen
//|| (abs(averageSquareError - lastSquareError) <= 0.000000000001)
then
network
else
let adjustments: {| adjustment: float |} [,] [,] [] =
let n = inputsWithExpectedOutput.Length
let acc =
results
|> Array.map fst
|> Array.reduce
(fun adjustmentsPerInputExpectedPair1 adjustmentsPerInputExpectedPair2 ->
Array.zip adjustmentsPerInputExpectedPair1 adjustmentsPerInputExpectedPair2
|> Array.map
(fun (adjustmentsPerLayer1, adjustmentsPerLayer2) ->
Array2D.zip adjustmentsPerLayer1 adjustmentsPerLayer2
|> Array2D.map
(fun (adjustmentsPerNode1, adjustmentsPerNode2) ->
Array2D.zip adjustmentsPerNode1 adjustmentsPerNode2
|> Array2D.map
(fun (adjustment1, adjustment2) ->
//printfn "adjustment1: %A, adjustment2: %A" adjustment1 adjustment2
{| adjustment = adjustment1.adjustment + adjustment2.adjustment |}))))
acc
|> Array.map (
Array2D.map (Array2D.map (fun adjustment -> {| adjustment = adjustment.adjustment / (float n) |}))
)
let layers =
network.layers
|> Array.mapi
(fun layerNumber (Layer layer) ->
//printfn "%d" layerNumber
layer
|> Array2D.mapi
(fun i j (Node node) ->
//printfn "i: %d, j: %d" i j
//printfn $"{adjustments.[layerNumber] |> Array2D.getDimsForArray2D}"
Array2D.zip node adjustments.[layerNumber].[i, j]
|> Array2D.map
(fun (current, adjustment) ->
//printfn "adjustment %f" adjustment.adjustment
current - 10.0 * adjustment.adjustment)
|> Node)
|> Layer)
let newNetwork = { layers = layers }
fixit newNetwork (i + 1)
fixit network 0
let calcDeltasAndAdjustmentCNN (input: float [,]) (cnn: CNN) sigmaFun (expected: float [,]) =
let preCalcData = preCalcCNN input cnn sigmaFun
let poolingStepData = preCalcData.poolingStepData
let maxElementIndices = preCalcData.maxElementIndices
let filterMatrixWidth = preCalcData.filterMatrixWidth
let deltasBeforePoolingLayer, layerResult =
calcDeltas2D poolingStepData cnn.network sigmaFun expected
let filterMatricesAdjustments =
let dims =
let (Layer layer) = cnn.network.layers.[0]
Array2D.getDimsForArray2D layer
let filterDims =
let dims =
Array2D.getDimsForArray2D cnn.filterMatrices.[0]
{| dims with
columnCount = dims.columnCount * cnn.filterMatrices.Length |}
//printfn "dim1: %d, dim2: %d" dim1 dim2
let phiDerivative o_j = o_j * (1.0 - o_j)
let calcAdjustment m' n' (data: _ seq) =
let calcDelta k l value =
let acc =
seq {
for i in 0 .. dims.rowCount - 1 do
for j in 0 .. dims.columnCount - 1 -> (i, j)
}
|> Seq.sumBy
(fun (i, j) ->
let weight =
let (Layer layer) = cnn.network.layers.[0]
let (Node weights) = layer.[i, j]
weights.[k, l]
//printfn "weight: %f" weight
let deltaUpperLayer =
deltasBeforePoolingLayer.[{ layer = 0; node = (i, j) }]
//printfn "%A" deltaUpperLayer
deltaUpperLayer * weight)
let o = value
acc * phiDerivative o
//printfn "length: %d" <| (data |> Seq.length)
if data |> Seq.isEmpty then
0.
else
data
|> Seq.sumBy
(fun ((i, j), (k, l), value) ->
let delta = calcDelta k l value
//printfn "%f" delta
let o = input.[i + m', j + n']
delta * o)
cnn.filterMatrices
|> Array.mapi
(fun filterMatrixIndex filterMatrix ->
let data =
preCalcData.poolingStepResults.[*, (filterMatrixIndex * filterMatrixWidth)..((filterMatrixIndex + 1)
* filterMatrixWidth
- 1)]
|> Array2D.fold
(fun ls element ->
(element.maxElementIndex, element.maxElementIndexInResult, element.maxElement)
:: ls)
[]
filterMatrix
|> Array2D.mapi (fun m' n' _ -> calcAdjustment m' n' data))
seq {
let filterHeight =
cnn.filterMatrices.[0] |> Array2D.length1
let filterWidth =
cnn.filterMatrices.[0] |> Array2D.length2
let inputHeight = input |> Array2D.length1
let inputWidth = input |> Array2D.length2
// Adjustments for first layer (convolutional layer)
yield
Array2D.init
(filterHeight)
(filterWidth * cnn.filterMatrices.Length)
(fun m n ->
let n' = n % filterWidth
let m' = m
let currentFilterIndex = int (n / filterWidth)
{| adjustment = filterMatricesAdjustments.[currentFilterIndex].[m', n'] |}
|> Array2D.create 1 1)
// Adjustments for other layers (classic neuronal network layers) except the last layer.
// That one needs special handling
let getDimsForLayer layer =
let (Layer layer) = cnn.network.layers.[layer]
Array2D.getDimsForArray2D layer
// Pooling step layer before that
let currentLayer =
Array2D.getDimsForArray2D poolingStepData
let upperLayer = getDimsForLayer 0
yield
Array2D.init
upperLayer.rowCount
upperLayer.columnCount
(fun i j ->
Array2D.init
(currentLayer.rowCount)
(currentLayer.columnCount)
(fun c1 c2 ->
let o_kl = poolingStepData.[c1, c2]
{| adjustment =
o_kl
* deltasBeforePoolingLayer.[{ layer = 0; node = (i, j) }] |}))
for layer, intervalues in layerResult.intervalues
|> Seq.take (layerResult.intervalues.Length - 1)
|> Seq.indexed do
let currentLayer = getDimsForLayer layer
let upperLayer = getDimsForLayer (layer + 1)
yield
Array2D.init
upperLayer.rowCount
upperLayer.columnCount
(fun i j ->
Array2D.init
(currentLayer.rowCount)
(currentLayer.columnCount)
(fun c1 c2 ->
let o_kl = intervalues.[c1, c2]
{| adjustment =
o_kl
* deltasBeforePoolingLayer.[{ layer = layer + 1; node = (i, j) }] |}))
}
|> Seq.toArray,
layerResult
let sigmaFun value = 1. / (1. + System.Math.E ** (-value))
let (|Finite|NonFinite|) a =
if System.Double.IsNaN a then NonFinite
else Finite a
// Seems like if we call random too soon again it will return the same number again.
// This is highly suboptimal for our use case soooo we wait.
let random () =
let random =
new System.Random(System.DateTime.Now.Millisecond)
let res = random.NextDouble() * 2.0 - 1.0
System.Threading.Thread.Sleep(1)
res
// Why this is unsafe see above :D
let unsafeRandom () =
System
.Random(System.DateTime.Now.Millisecond)
.NextDouble()
let gradientDescentCNN2 cnn sigmaFun inputsWithExpectedOutput abortWhen =
inputsWithExpectedOutput
|> Array.iter
(fun (input, expected) ->
let _, initialLayerResult =
calcDeltasAndAdjustmentCNN input cnn sigmaFun expected
printfn $"initial activation: {initialLayerResult.activation.[0, 0]}")
let epsilon = 1e-4
let rec fixit cnn (sumOfGradients: System.Collections.Generic.Dictionary<_, _>) i =
let network = cnn.network
let results =
inputsWithExpectedOutput
|> Array.map (fun (input, expected) -> calcDeltasAndAdjustmentCNN input cnn sigmaFun expected)
let averageSquareError: float =
let calcAverageErrorForSample layerResult (expected: float [,]): float =
layerResult.activation
// TODO: Replace with Array2D.indexed
|> Array2D.mapi (fun i j activation -> (i, j, activation))
|> Array2D.averageBy (fun (i, j, activation) ->
//if System.Double.IsNaN activation then
// printfn "activation is NaN!"
//if System.Double.IsNaN expected.[i, j] then
// printfn "expected is NaN!"
0.5 * (float activation - float expected.[i, j]) ** 2.0)
let acc =
Array.zip results inputsWithExpectedOutput
|> Array.sumBy (fun ((_, layerResult), (_, expected)) -> calcAverageErrorForSample layerResult expected)
acc / (float results.Length)
if i % 10 = 0 then
//System.Console.Clear()
//System.Console.SetCursorPosition(0, 0)
$"square error at %20d{i}th iteration: %.40f{averageSquareError}
|"
.StripMargin
|> System.Console.Write
// We abort if we exceeded the max proposed amount of iterations
// or if either the average square error is within the abortWhen range
// or if the last iteration did not achieve any change in the average square error
if System.Double.IsNaN averageSquareError
|| System.Double.IsInfinity averageSquareError then
printfn $"Early abort. We seriously screwed up here. Average square error is: {averageSquareError}"
cnn
elif i > 1_000_000_000 || averageSquareError <= abortWhen then
cnn
else
let r = random()
let adjustments: {| adjustment: float |} [,] [,] [] =
let n = inputsWithExpectedOutput.Length
let acc =
results
|> Array.Parallel.map fst
|> Array.reduce
(fun adjustmentsPerInputExpectedPair1 adjustmentsPerInputExpectedPair2 ->
Array.zip adjustmentsPerInputExpectedPair1 adjustmentsPerInputExpectedPair2
|> Array.Parallel.map
(fun (adjustmentsPerLayer1, adjustmentsPerLayer2) ->
Array2D.zip adjustmentsPerLayer1 adjustmentsPerLayer2
|> Array2D.map
(fun (adjustmentsPerNode1, adjustmentsPerNode2) ->
Array2D.zip adjustmentsPerNode1 adjustmentsPerNode2
|> Array2D.map
(fun (adjustment1, adjustment2) ->
//if System.Double.IsNaN adjustment1.adjustment then
// printfn "adjustment1 is NaN!"
//if System.Double.IsNaN adjustment2.adjustment then
// printfn "adjustment1 is NaN!"
//printfn "adjustment1: %A, adjustment2: %A" adjustment1 adjustment2
{| adjustment = adjustment1.adjustment + adjustment2.adjustment |}))))
acc
|> Array.map (
Array2D.map (Array2D.map (fun adjustment -> {| adjustment = adjustment.adjustment / (float n) |}))
)
let learningRate = 0.1
let layers =
cnn.network.layers
|> Array.Parallel.mapi
(fun layerNumber (Layer layer) ->
layer
|> Array2D.mapi
(fun i j (Node node) ->
Array2D.zip node adjustments.[layerNumber + 1].[i, j]
|> Array2D.mapi
(fun k l (current, adjustment) ->
// printfn "adjustment %f" adjustment.adjustment
//let res =
// current - learningRate * adjustment.adjustment
let acc =
match sqrt(sumOfGradients.[(layerNumber, i, j, k, l)] + epsilon) with
| Finite res -> res
| NonFinite -> 1.
let res = current - (learningRate / acc) * adjustment.adjustment
sumOfGradients.[(layerNumber, i, j, k, l)] <-
sumOfGradients.[(layerNumber, i, j, k, l)]
+ adjustment.adjustment
res)
|> Node)
|> Layer)
let newNetwork = { layers = layers }
let newFilters =
let adjustmentData = adjustments |> Array.head
cnn.filterMatrices
|> Array.map (
Array2D.mapi
(fun i j currentFilterValue ->
let adjustment = adjustmentData.[i, j].[0, 0].adjustment
//let res =
// currentFilterValue - learningRate * adjustment
let acc =
match sqrt(sumOfGradients.[(-1, i, j, 0, 0)] + epsilon) with
| Finite res -> res
| NonFinite -> 1.
let res = currentFilterValue - (learningRate / acc) * adjustment
sumOfGradients.[(-1, i, j, 0, 0)] <- sumOfGradients.[(-1, i, j, 0, 0)] + adjustment
res)
)
fixit
{ filterMatrices = newFilters
network = newNetwork }
sumOfGradients
(i + 1)
let initialSumOfGradients =
let dict = System.Collections.Generic.Dictionary()
cnn.network.layers
|> Array.iteri
(fun layerNumber (Layer layer) ->
layer
|> Array2D.iteri
(fun i j (Node node) ->
node
|> Array2D.iteri (fun k l _ -> dict.[(layerNumber, i, j, k, l)] <- 0.)))
cnn.filterMatrices
|> Array.iter (Array2D.iteri (fun i j currentFilterValue -> dict.[(-1, i, j, 0, 0)] <- 0.))
dict
fixit cnn initialSumOfGradients 0
let cnnTest () =
let randomWeight () = random ()
let pairs =
let amount = 2
let expected =
[ [ [ 0.0 ]; [ 1.0 ] ] |> array2D
[ [ 1.0 ]; [ 0.0 ] ] |> array2D ]
[
array2D [
[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
[ 1.; 0.; 0.; 0.; 1.; 0.; 0.; 0. ]
[ 0.; 1.; 0.; 1.; 0.; 0.; 0.; 0. ]
[ 0.; 0.; 1.; 0.; 0.; 0.; 0.; 0. ]
[ 0.; 1.; 0.; 1.; 0.; 0.; 0.; 0. ]
[ 1.; 0.; 0.; 0.; 1.; 0.; 0.; 0. ]
], expected.[0]
array2D [
[ 1.; 0.; 1.; 0.; 0.; 0.; 0.; 0. ]
[ 0.; 1.; 0.; 0.; 0.; 0.; 0.; 0. ]
[ 1.; 0.; 1.; 0.; 0.; 0.; 0.; 0. ]
[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ -0.5; 0.; 0.; 0.; 0.; 0.; 0.; -0.5 ]
//[ 0.; -0.5; 0.; 0.; 0.; 0.; -0.5; 0. ]
//[ 0.; 0.; -0.5; 0.; 0.; -0.5; 0.; 0. ]
//[ 0.; 0.; 0.; -0.5; -0.5; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; -0.5; -0.5; 0.; 0.; 0. ]
//[ 0.; 0.; -0.5; 0.; 0.; -0.5; 0.; 0. ]
//[ 0.; -0.5; 0.; 0.; 0.; 0.; -0.5; 0. ]
//[ -0.5; 0.; 0.; 0.; 0.; 0.; 0.; -0.5 ]
],
expected.[0]
array2D [ [ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
[ 0.; 0.; 1.; 1.; 1.; 1.; 0.; 0. ]
[ 0.; 0.; 1.; 0.; 0.; 1.; 0.; 0. ]
[ 0.; 0.; 1.; 0.; 0.; 1.; 0.; 0. ]
[ 0.; 0.; 1.; 1.; 1.; 1.; 0.; 0. ]
[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 1.; 1.; 1.; 1.; 1.; 1.; 0. ]
//[ 0.; 1.; 1.; 1.; 1.; 1.; 1.; 0. ]
//[ 0.; 1.; 0.; 0.; 0.; 0.; 1.; 0. ]
//[ 0.; 1.; 0.; 0.; 0.; 0.; 1.; 0. ]
//[ 0.; 1.; 0.; 0.; 0.; 0.; 1.; 0. ]
//[ 0.; 1.; 0.; 0.; 0.; 0.; 1.; 0. ]
//[ 0.; 1.; 0.; 0.; 0.; 0.; 1.; 0. ]
//[ 0.; 1.; 1.; 1.; 1.; 1.; 1.; 0. ]
],
expected.[1]
]
|> List.toArray
|> Array.map(fun (values, expected) ->
values |> Array2D.map(fun value -> value - random() * 0.05), expected)
let inputHeight, inputWidth = pairs |> Array.head |> (fun (values, _) -> values |> Array2D.length1, values |> Array2D.length2)
let filterSize = 3
let randomFilter _ =
Array2D.init filterSize filterSize (fun _ _ -> randomWeight ())
let filters = Array.init 4 randomFilter
let convolutionStride = 1
let poolingLayerStride = 2
let poolingLayerWindowSize = 2
let network2D_1 =
{ Network2D.layers =
let layer1X = 4
let layer1Y = 2
let poolingLayerHeight =
let n = ((inputHeight - filterSize) / convolutionStride) + 1 // seq { 0 .. convolutionStride .. (inputHeight - filterSize) } |> Seq.
let m = ((n - poolingLayerWindowSize) / poolingLayerStride) + 1
m
// ((1 + (inputHeight - filterSize) / convolutionStride) / poolingLayerStride)
let poolingLayerWidth =
let n = ((inputWidth - filterSize) / convolutionStride) + 1
let m = ((n - poolingLayerWindowSize) / poolingLayerStride) + 1
m * filters.Length
printfn $"{poolingLayerHeight} {poolingLayerWidth}"
[| Layer
<| Array2D.create
layer1X
layer1Y
(Node
<| Array2D.init poolingLayerHeight poolingLayerWidth (fun _ _ -> randomWeight ()))
Layer
<| Array2D.create
2
1
(Node
<| Array2D.init layer1X layer1Y (fun _ _ -> randomWeight ())) |] }
let optimizedNetwork =
//gradientDescentNN
// network2D_1
// sigmaFun
// pairs
// 0.08
gradientDescentCNN2
{ CNN.filterMatrices = filters
CNN.network = network2D_1 }
sigmaFun
pairs
0.001
let testSamples =
[ [ let r() = -random ()
[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
[ 1.; 0.; 0.; 0.; 1.; 0.; 0.; 0. ]
[ 0.; 1.; 0.; 1.; 0.; 0.; 0.; 0. ]
[ 0.; 0.; 1.; 0.; 0.; 0.; 0.; 0. ]
[ 0.; 1.; 0.; 1.; 0.; 0.; 0.; 0. ]
[ 1.; 0.; 0.; 0.; 1.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
]
|> array2D |> Array2D.map(function 1.0 -> 1.0 - random() * 0.1 | _ -> random() * 0.1),
[ [ 0.0 ]; [ 1.0 ] ] |> array2D
[
[ 0.; 0.; 1.; 1.; 1.; 1.; 0.; 0. ]
[ 0.; 0.; 1.; 0.; 0.; 1.; 0.; 0. ]
[ 0.; 0.; 1.; 0.; 0.; 1.; 0.; 0. ]
[ 0.; 0.; 1.; 1.; 1.; 1.; 0.; 0. ]
[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
]
|> array2D,
[ [ 1.0 ]; [ 0.0 ] ] |> array2D
[ [ 1.; 1.; 1.; 1.; 1.; 1.; 0.; 0. ]
[ 1.; 0.; 0.; 0.; 0.; 1.; 0.; 0. ]
[ 1.; 0.; 0.; 0.; 0.; 1.; 0.; 0. ]
[ 1.; 0.; 0.; 0.; 0.; 1.; 0.; 0. ]
[ 1.; 0.; 0.; 0.; 0.; 1.; 0.; 0. ]
[ 1.; 1.; 1.; 1.; 1.; 1.; 0.; 0. ]
[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
]
|> array2D,
[ [ 1.0 ]; [ 0.0 ] ] |> array2D
[
[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
[ 0.; 1.; 1.; 1.; 1.; 1.; 1.; 0. ]
[ 0.; 1.; 0.; 0.; 0.; 0.; 1.; 0. ]
[ 0.; 1.; 0.; 0.; 0.; 0.; 1.; 0. ]
[ 0.; 1.; 0.; 0.; 0.; 0.; 1.; 0. ]
[ 0.; 1.; 0.; 0.; 0.; 0.; 1.; 0. ]
[ 0.; 1.; 1.; 1.; 1.; 1.; 1.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
//[ 0.; 0.; 0.; 0.; 0.; 0.; 0.; 0. ]
]
|> array2D
|> Array2D.map (fun value -> value * random () * 0.5 + 0.5),
[ [ 1.0 ]; [ 0.0 ] ] |> array2D ]
|> List.toArray
testSamples
|> Array.append pairs
|> Array.map
(fun (input, expected) ->
(calcCNN input optimizedNetwork sigmaFun)
.activation,
expected)
cnnTest ()
|> printfn "%A"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment