sslipchenko/SMO.fs

## SMO.fs
// Copyright 2016 Serge Slipchenko (Serge.Slipchenko@gmail.com)
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

namespace Semagle.MachineLearning.SVM

open LanguagePrimitives

[<Measure>] type MB

/// Implementation of Sequential Minimal Optimization (SMO) algorithm
module SMO =
    [<Literal>]
    let private tau = 1e-12

    [<Literal>]
    let private not_found = -1

    [<Literal>]
    let private shrinking_iterations = 1000

    type WSSStrategy = MaximalViolatingPair | SecondOrderInformation

    type C_SVM = { iterations : int; epsilon : float; C : float; strategy : WSSStrategy; shrinking : bool; cacheSize : int<MB> }

    let inline swap (a : 'A[]) i j =
        let tmp = a.[i]
        a.[i] <- a.[j]
        a.[j] <- tmp

    /// LRU list of computed columns
    type private LRU(capacity : int, N : int, Q : int -> int -> float) =
        let indices = Array.zeroCreate<int> capacity
        let columns = Array.zeroCreate<float[]> capacity
        let lengths = Array.zeroCreate<int> capacity
        let mutable first = 0
        let mutable last = 0

        /// Returns cache capacity
        member lru.Capacity = capacity

        /// Returns L elements of j-th column of Q matrix
        member lru.Get (j : int) (L : int) =
            let index = lru.tryFindIndex j
            if index <> not_found then
                let column = columns.[index]
                let length = lengths.[index]
                if length < L then
                    for i = length to L-1 do
                        column.[i] <- Q i j
                    lengths.[index] <- L
                column
            else
                let column = Array.init N (fun i -> if i < L then Q i j else 0.0)
                lru.insert j column L
                column

        /// Swap column elements
        member lru.Swap (i : int) (j : int) =
            let mutable index_i = not_found
            let mutable index_j = not_found

            let mutable k = first
            while k <> last do
                if indices.[k] = i then index_i <- k
                if indices.[k] = j then index_j <- k
                swap columns.[k] i j
                k <- (k + 1) % capacity

            if index_i <> not_found && index_j <> not_found then
                swap lengths index_i index_j

            if index_i <> not_found then indices.[index_i] <- j
            if index_j <> not_found then indices.[index_j] <- i

        /// Try to find computed column values
        member private lru.tryFindIndex t =
            let mutable i = first
            while (i <> last) && (indices.[i] <> t) do
                i <- (i + 1) % capacity

            if i <> last then i else not_found

        /// Insert new computed column values
        member private lru.insert index column length =
            indices.[last] <- index
            columns.[last] <- column
            lengths.[last] <- length
            last <- (last + 1) % capacity

            if first = last then first <- (first + 1) % capacity

        /// Returns required capacity for the specified cache size and column length
        static member capacity (cacheSize : int) (length : int) =
           let columnSize = sizeof<float>*length + sizeof<int> + sizeof<float[]>
           max 2 (cacheSize / columnSize)

    let C_SVC (X : 'X[]) (Y : float[]) (K : Kernel<'X>) (parameters : C_SVM) =
        if Array.length X <> Array.length Y then
            invalidArg "X and Y" "have different lengths"

        let info = printfn

        let N = Array.length X

        let C = Array.create N parameters.C
        let epsilon = parameters.epsilon

        let A = Array.zeroCreate N  // optimization variables
        let G = Array.create N -1.0 // gradient
        let G' = Array.create N 0.0 //

        // helper functions
        let inline _y_gf i = -G.[i]*Y.[i]
        let inline Q i j = (K X.[i] X.[j])*Y.[i]*Y.[j]
        let Q_lru = new LRU(LRU.capacity ((int parameters.cacheSize)*1024*1024) N, N, Q)
        info "cache capacity = %d" Q_lru.Capacity

        let inline isUnbound i = 0.0 < A.[i] && A.[i] < C.[i]
        let inline isUpBound i = Y.[i] = +1.0 && A.[i] = C.[i] || Y.[i] = -1.0 && A.[i] = 0.0
        let inline isLowBound i = Y.[i] = +1.0 && A.[i] = 0.0 || Y.[i] = -1.0 && A.[i] = C.[i]

        let inline maxUp L =
            let mutable max_i = not_found
            let mutable max_v = System.Double.NegativeInfinity
            for i = 0 to L-1 do
                if not (isUpBound i) then
                    let v = _y_gf i
                    if v > max_v then
                        max_i <- i
                        max_v <- v
            max_i

        let inline minLow L =
            let mutable min_i = not_found
            let mutable min_v = System.Double.PositiveInfinity
            for i = 0 to L-1 do
                if not (isLowBound i) then
                    let v = _y_gf i
                    if v < min_v then
                        min_i <- i
                        min_v <- v
            min_i

        let inline minLowTo s L =
            let Q_s = Q_lru.Get s L
            let inline objective t =
                let a_ts = (Q t t) + (Q_s.[s]) - 2.0*(Q_s.[t])*Y.[t]*Y.[s]
                let b_ts = _y_gf t - _y_gf s
                -b_ts*b_ts/(if a_ts > 0.0 then a_ts else tau)

            let mutable min_i = not_found
            let mutable min_v = System.Double.PositiveInfinity
            for i = 0 to L-1 do
                if not (isLowBound i) && (_y_gf i < _y_gf s) then
                    let v =objective i
                    if v < min_v then
                        min_i <- i
                        min_v <- v
            min_i

        /// Maximal violating pair working set selection strategy
        let maximalViolatingPair L =
            let i = maxUp L
            if i = not_found then None else Some (i, minLow L)

        /// Second order information working set selection strategy
        let secondOrderInformation L =
            let i = maxUp L
            if i = not_found then None else Some (i, minLowTo i L)

        let selectWorkingSet =
            match parameters.strategy with
                | MaximalViolatingPair -> maximalViolatingPair
                | SecondOrderInformation -> secondOrderInformation

        /// Solve an optimization sub-problem
        let inline solve i j L =
            let Q_i = Q_lru.Get i L
            let Q_j = Q_lru.Get j L
            let a_ij = (Q_i.[i]) + (Q_j.[j]) - 2.0*(Q_i.[j])*Y.[i]*Y.[j]
            if Y.[i] <> Y.[j] then
                let delta = (-G.[i]-G.[j])/(if a_ij < 0.0 then tau else a_ij)
                let diff = A.[i] - A.[j]
                match (A.[i] + delta, A.[j] + delta) with
                    | _, a_j when diff > 0.0 && a_j < 0.0 -> (diff, 0.0)
                    | a_i, _ when (* diff <= 0.0 && *) a_i < 0.0 -> (0.0, diff)
                    | _, a_j when diff <= C.[i] - C.[j] && a_j > C.[j] -> (C.[j]+diff, C.[j])
                    | a_i, _ when (* diff > C.[i] - C.[j] && *) a_i > C.[i] -> (C.[i], C.[i] - diff)
                    | a_i, a_j -> a_i, a_j
            else
                let delta = (G.[i]-G.[j])/(if a_ij < 0.0 then tau else a_ij)
                let sum = A.[i] + A.[j]
                match (A.[i] - delta, A.[j] + delta) with
                    | a_i, _ when sum > C.[i] && a_i > C.[i] -> (C.[i], sum - C.[i])
                    | _, a_j when (* sum <= C.[i] && *) a_j < 0.0 -> (sum, 0.0)
                    | _, a_j when sum > C.[j] && a_j > C.[j] -> (sum - C.[j], C.[j])
                    | a_i, _ when (* sum <= C.[j] && *) a_i < 0.0 -> (0.0, sum)
                    | a_i, a_j -> a_i, a_j

        /// Update gradient
        let inline updateG i j a_i a_j L =
            let Q_i = Q_lru.Get i L
            let Q_j = Q_lru.Get j L

            for t = 0 to L-1 do
                G.[t] <- G.[t] + (Q_i.[t])*(a_i - A.[i]) + (Q_j.[t])*(a_j - A.[j])

        let inline updateG' i a =
            let sign = match a, A.[i] with
                       | _ when a = C.[i] && A.[i] <> C.[i] -> +1.0
                       | _ when a <> C.[i] && A.[i] = C.[i] -> -1.0
                       | _ -> 0.0

            if sign <> 0.0 then
                let Q_i = Q_lru.Get i N
                for t = 0 to N-1 do
                    G'.[t] <- G'.[t] + sign * C.[i]*Q_i.[t]

        /// Reconstruct gradient
        let inline reconstructG L =
            info "reconstruct gradient"
            for t = L to N-1 do
                G.[t] <- G'.[t] - 1.0

            let unbound = seq { 0..L-1 } |> Seq.filter isUnbound |> Seq.length

            if unbound*L > 2*L*(N-L) then
                for i = L to N-1 do
                    let Q_i = Q_lru.Get i L
                    for j = 0 to L-1 do
                        if isUnbound j then
                            G.[i] <- G.[i] + A.[j]*Q_i.[j]
            else
                for i = 0 to L-1 do
                    if isUnbound i then
                        let Q_i = Q_lru.Get i N
                        for j = L to N-1 do
                            G.[j] <- G.[j] + A.[i]*Q_i.[j]

        let inline m L =
            let mutable max_v = System.Double.NegativeInfinity
            for i = 0 to L-1 do
                if not (isUpBound i) then
                    let v = _y_gf i
                    if v > max_v then
                        max_v <- v
            max_v

        let inline M L =
            let mutable min_v = System.Double.PositiveInfinity
            for i = 0 to L-1 do
                if not (isLowBound i) then
                    let v = _y_gf i
                    if v < min_v then
                        min_v <- v
            min_v

        let inline shrink m M L =
            let inline isShrinked i = (isUpBound i) && (_y_gf i) > m || (isLowBound i) && (_y_gf i) < M

            let inline swapAll i j =
                swap X i j; swap Y i j
                swap C i j; swap A i j
                swap G i j; swap G' i j
                Q_lru.Swap i j

            let mutable swaps = 0
            let mutable i = 0
            let mutable j = L - 1
            let mutable k = L - 1

            let mutable shrinked = 0

            while i <= j do
                match (isShrinked i), (isShrinked j) with
                | false, false -> i <- i + 1; j <- j - 1
                | true, false -> j <- j - 1
                | false, true -> i <- i + 1
                | _ ->
                    if j = k then
                        j <- j - 1
                    else
                        swaps <- swaps + 1
                        swapAll i k
                        i <- i + 1
                    shrinked <- shrinked + 1
                    k <- k - 1

            if shrinked > 0 then
                info "shrinked = %d, active = %d, swaps = %d" shrinked (L - shrinked) swaps

            L - shrinked

        let inline isOptimal m M epsilon = (m - M) <= epsilon

        /// Sequential Minimal Optimization (SMO) Algorithm
        let inline optimize_solve L =
            // Find a pair of elements that violate the optimality condition
            match selectWorkingSet L with
                | Some (i, j) ->
                    // Solve the optimization sub-problem
                    let a_i, a_j = solve i j L
                    // Update the gradient
                    updateG i j a_i a_j L
                    if parameters.shrinking then
                        updateG' i a_i
                        updateG' j a_j
                    // Update the solution
                    A.[i] <- a_i; A.[j] <- a_j
                    true
                | None -> false

        /// Optimize with shrinking every 1000 iterations
        let rec optimize_shrinking k s L reconstructed =
            let inline optimize_shrink m M L =
                let mutable reconstructed = reconstructed
                if not reconstructed && isOptimal m M (10.0*epsilon) then
                    reconstructG L
                    reconstructed <- true
                reconstructed, shrink m M L

            if k < parameters.iterations then
                let m_k = m L
                let M_k = M L

                // shrink active set
                let reconstructed, L =
                    if s = 0 then
                        optimize_shrink m_k M_k L
                    else
                        reconstructed, L

                // check optimality for active set
                if isOptimal m_k M_k epsilon then
                    reconstructG L

                    // check optimality for full set
                    if not(isOptimal (m L) (M L) epsilon) && optimize_solve N then
                        // shrink on next iteration
                        optimize_shrinking k 1 N reconstructed
                    else
                        k
                else
                    if optimize_solve L then
                        optimize_shrinking (k + 1) (if s > 0 then (s - 1) else shrinking_iterations) L reconstructed
                    else
                        k
             else
                failwith "Exceeded iterations limit"

        /// Optimize without shrinking every 1000 iterations
        let rec optimize_non_shrinking k =
            if k < parameters.iterations then
                let m_k = m N
                let M_k = M N

                if not (isOptimal m_k M_k epsilon) && optimize_solve N then
                    optimize_non_shrinking (k + 1)
                else
                    k
            else
                failwith "Exceeded iterations limit"

        let iterations =
            if parameters.shrinking then
                optimize_shrinking 0 shrinking_iterations N false
            else
                optimize_non_shrinking 0
        info "#iterations = %d" iterations

        /// Reconstruction of hyperplane bias
        let bias =
            let mutable b = 0.0
            let mutable M = 0
            for i = 0 to N-1 do
                if isUnbound i then
                    b <- b + _y_gf i
                    M <- M + 1
            DivideByInt b M

        /// Remove support vectors with A.[i] = 0.0 and compute Y.[i]*A.[i]
        let N' = Array.sumBy (fun a -> if a <> 0.0 then 1 else 0) A
        let X' = Array.zeroCreate N'
        let A' = Array.zeroCreate N'

        let mutable j = 0
        for i = 0 to N-1 do
            if A.[i] <> 0.0 then
                X'.[j] <- X.[i]
                A'.[j] <- Y.[i]*A.[i]
                j <- j + 1

        info "support vectors = %d" N'

        SVM(K,X',A',bias)
	// Copyright 2016 Serge Slipchenko (Serge.Slipchenko@gmail.com)
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.

	namespace Semagle.MachineLearning.SVM

	open LanguagePrimitives

	[<Measure>] type MB

	/// Implementation of Sequential Minimal Optimization (SMO) algorithm
	module SMO =
	[<Literal>]
	let private tau = 1e-12

	[<Literal>]
	let private not_found = -1

	[<Literal>]
	let private shrinking_iterations = 1000

	type WSSStrategy = MaximalViolatingPair \| SecondOrderInformation

	type C_SVM = { iterations : int; epsilon : float; C : float; strategy : WSSStrategy; shrinking : bool; cacheSize : int<MB> }

	let inline swap (a : 'A[]) i j =
	let tmp = a.[i]
	a.[i] <- a.[j]
	a.[j] <- tmp

	/// LRU list of computed columns
	type private LRU(capacity : int, N : int, Q : int -> int -> float) =
	let indices = Array.zeroCreate<int> capacity
	let columns = Array.zeroCreate<float[]> capacity
	let lengths = Array.zeroCreate<int> capacity
	let mutable first = 0
	let mutable last = 0

	/// Returns cache capacity
	member lru.Capacity = capacity

	/// Returns L elements of j-th column of Q matrix
	member lru.Get (j : int) (L : int) =
	let index = lru.tryFindIndex j
	if index <> not_found then
	let column = columns.[index]
	let length = lengths.[index]
	if length < L then
	for i = length to L-1 do
	column.[i] <- Q i j
	lengths.[index] <- L
	column
	else
	let column = Array.init N (fun i -> if i < L then Q i j else 0.0)
	lru.insert j column L
	column

	/// Swap column elements
	member lru.Swap (i : int) (j : int) =
	let mutable index_i = not_found
	let mutable index_j = not_found

	let mutable k = first
	while k <> last do
	if indices.[k] = i then index_i <- k
	if indices.[k] = j then index_j <- k
	swap columns.[k] i j
	k <- (k + 1) % capacity

	if index_i <> not_found && index_j <> not_found then
	swap lengths index_i index_j

	if index_i <> not_found then indices.[index_i] <- j
	if index_j <> not_found then indices.[index_j] <- i

	/// Try to find computed column values
	member private lru.tryFindIndex t =
	let mutable i = first
	while (i <> last) && (indices.[i] <> t) do
	i <- (i + 1) % capacity

	if i <> last then i else not_found

	/// Insert new computed column values
	member private lru.insert index column length =
	indices.[last] <- index
	columns.[last] <- column
	lengths.[last] <- length
	last <- (last + 1) % capacity

	if first = last then first <- (first + 1) % capacity

	/// Returns required capacity for the specified cache size and column length
	static member capacity (cacheSize : int) (length : int) =
	let columnSize = sizeof<float>*length + sizeof<int> + sizeof<float[]>
	max 2 (cacheSize / columnSize)

	let C_SVC (X : 'X[]) (Y : float[]) (K : Kernel<'X>) (parameters : C_SVM) =
	if Array.length X <> Array.length Y then
	invalidArg "X and Y" "have different lengths"

	let info = printfn

	let N = Array.length X

	let C = Array.create N parameters.C
	let epsilon = parameters.epsilon

	let A = Array.zeroCreate N // optimization variables
	let G = Array.create N -1.0 // gradient
	let G' = Array.create N 0.0 //

	// helper functions
	let inline _y_gf i = -G.[i]*Y.[i]
	let inline Q i j = (K X.[i] X.[j])Y.[i]Y.[j]
	let Q_lru = new LRU(LRU.capacity ((int parameters.cacheSize)10241024) N, N, Q)
	info "cache capacity = %d" Q_lru.Capacity

	let inline isUnbound i = 0.0 < A.[i] && A.[i] < C.[i]
	let inline isUpBound i = Y.[i] = +1.0 && A.[i] = C.[i] \|\| Y.[i] = -1.0 && A.[i] = 0.0
	let inline isLowBound i = Y.[i] = +1.0 && A.[i] = 0.0 \|\| Y.[i] = -1.0 && A.[i] = C.[i]

	let inline maxUp L =
	let mutable max_i = not_found
	let mutable max_v = System.Double.NegativeInfinity
	for i = 0 to L-1 do
	if not (isUpBound i) then
	let v = _y_gf i
	if v > max_v then
	max_i <- i
	max_v <- v
	max_i

	let inline minLow L =
	let mutable min_i = not_found
	let mutable min_v = System.Double.PositiveInfinity
	for i = 0 to L-1 do
	if not (isLowBound i) then
	let v = _y_gf i
	if v < min_v then
	min_i <- i
	min_v <- v
	min_i

	let inline minLowTo s L =
	let Q_s = Q_lru.Get s L
	let inline objective t =
	let a_ts = (Q t t) + (Q_s.[s]) - 2.0(Q_s.[t])Y.[t]*Y.[s]
	let b_ts = _y_gf t - _y_gf s
	-b_ts*b_ts/(if a_ts > 0.0 then a_ts else tau)

	let mutable min_i = not_found
	let mutable min_v = System.Double.PositiveInfinity
	for i = 0 to L-1 do
	if not (isLowBound i) && (_y_gf i < _y_gf s) then
	let v =objective i
	if v < min_v then
	min_i <- i
	min_v <- v
	min_i

	/// Maximal violating pair working set selection strategy
	let maximalViolatingPair L =
	let i = maxUp L
	if i = not_found then None else Some (i, minLow L)

	/// Second order information working set selection strategy
	let secondOrderInformation L =
	let i = maxUp L
	if i = not_found then None else Some (i, minLowTo i L)

	let selectWorkingSet =
	match parameters.strategy with
	\| MaximalViolatingPair -> maximalViolatingPair
	\| SecondOrderInformation -> secondOrderInformation

	/// Solve an optimization sub-problem
	let inline solve i j L =
	let Q_i = Q_lru.Get i L
	let Q_j = Q_lru.Get j L
	let a_ij = (Q_i.[i]) + (Q_j.[j]) - 2.0(Q_i.[j])Y.[i]*Y.[j]
	if Y.[i] <> Y.[j] then
	let delta = (-G.[i]-G.[j])/(if a_ij < 0.0 then tau else a_ij)
	let diff = A.[i] - A.[j]
	match (A.[i] + delta, A.[j] + delta) with
	\| _, a_j when diff > 0.0 && a_j < 0.0 -> (diff, 0.0)
	\| a_i, _ when (* diff <= 0.0 && *) a_i < 0.0 -> (0.0, diff)
	\| _, a_j when diff <= C.[i] - C.[j] && a_j > C.[j] -> (C.[j]+diff, C.[j])
	\| a_i, _ when (* diff > C.[i] - C.[j] && *) a_i > C.[i] -> (C.[i], C.[i] - diff)
	\| a_i, a_j -> a_i, a_j
	else
	let delta = (G.[i]-G.[j])/(if a_ij < 0.0 then tau else a_ij)
	let sum = A.[i] + A.[j]
	match (A.[i] - delta, A.[j] + delta) with
	\| a_i, _ when sum > C.[i] && a_i > C.[i] -> (C.[i], sum - C.[i])
	\| _, a_j when (* sum <= C.[i] && *) a_j < 0.0 -> (sum, 0.0)
	\| _, a_j when sum > C.[j] && a_j > C.[j] -> (sum - C.[j], C.[j])
	\| a_i, _ when (* sum <= C.[j] && *) a_i < 0.0 -> (0.0, sum)
	\| a_i, a_j -> a_i, a_j

	/// Update gradient
	let inline updateG i j a_i a_j L =
	let Q_i = Q_lru.Get i L
	let Q_j = Q_lru.Get j L

	for t = 0 to L-1 do
	G.[t] <- G.[t] + (Q_i.[t])(a_i - A.[i]) + (Q_j.[t])(a_j - A.[j])

	let inline updateG' i a =
	let sign = match a, A.[i] with
	\| _ when a = C.[i] && A.[i] <> C.[i] -> +1.0
	\| _ when a <> C.[i] && A.[i] = C.[i] -> -1.0
	\| _ -> 0.0

	if sign <> 0.0 then
	let Q_i = Q_lru.Get i N
	for t = 0 to N-1 do
	G'.[t] <- G'.[t] + sign * C.[i]*Q_i.[t]

	/// Reconstruct gradient
	let inline reconstructG L =
	info "reconstruct gradient"
	for t = L to N-1 do
	G.[t] <- G'.[t] - 1.0

	let unbound = seq { 0..L-1 } \|> Seq.filter isUnbound \|> Seq.length

	if unboundL > 2L*(N-L) then
	for i = L to N-1 do
	let Q_i = Q_lru.Get i L
	for j = 0 to L-1 do
	if isUnbound j then
	G.[i] <- G.[i] + A.[j]*Q_i.[j]
	else
	for i = 0 to L-1 do
	if isUnbound i then
	let Q_i = Q_lru.Get i N
	for j = L to N-1 do
	G.[j] <- G.[j] + A.[i]*Q_i.[j]

	let inline m L =
	let mutable max_v = System.Double.NegativeInfinity
	for i = 0 to L-1 do
	if not (isUpBound i) then
	let v = _y_gf i
	if v > max_v then
	max_v <- v
	max_v

	let inline M L =
	let mutable min_v = System.Double.PositiveInfinity
	for i = 0 to L-1 do
	if not (isLowBound i) then
	let v = _y_gf i
	if v < min_v then
	min_v <- v
	min_v

	let inline shrink m M L =
	let inline isShrinked i = (isUpBound i) && (_y_gf i) > m \|\| (isLowBound i) && (_y_gf i) < M

	let inline swapAll i j =
	swap X i j; swap Y i j
	swap C i j; swap A i j
	swap G i j; swap G' i j
	Q_lru.Swap i j

	let mutable swaps = 0
	let mutable i = 0
	let mutable j = L - 1
	let mutable k = L - 1

	let mutable shrinked = 0

	while i <= j do
	match (isShrinked i), (isShrinked j) with
	\| false, false -> i <- i + 1; j <- j - 1
	\| true, false -> j <- j - 1
	\| false, true -> i <- i + 1
	\| _ ->
	if j = k then
	j <- j - 1
	else
	swaps <- swaps + 1
	swapAll i k
	i <- i + 1
	shrinked <- shrinked + 1
	k <- k - 1

	if shrinked > 0 then
	info "shrinked = %d, active = %d, swaps = %d" shrinked (L - shrinked) swaps

	L - shrinked

	let inline isOptimal m M epsilon = (m - M) <= epsilon

	/// Sequential Minimal Optimization (SMO) Algorithm
	let inline optimize_solve L =
	// Find a pair of elements that violate the optimality condition
	match selectWorkingSet L with
	\| Some (i, j) ->
	// Solve the optimization sub-problem
	let a_i, a_j = solve i j L
	// Update the gradient
	updateG i j a_i a_j L
	if parameters.shrinking then
	updateG' i a_i
	updateG' j a_j
	// Update the solution
	A.[i] <- a_i; A.[j] <- a_j
	true
	\| None -> false

	/// Optimize with shrinking every 1000 iterations
	let rec optimize_shrinking k s L reconstructed =
	let inline optimize_shrink m M L =
	let mutable reconstructed = reconstructed
	if not reconstructed && isOptimal m M (10.0*epsilon) then
	reconstructG L
	reconstructed <- true
	reconstructed, shrink m M L

	if k < parameters.iterations then
	let m_k = m L
	let M_k = M L

	// shrink active set
	let reconstructed, L =
	if s = 0 then
	optimize_shrink m_k M_k L
	else
	reconstructed, L

	// check optimality for active set
	if isOptimal m_k M_k epsilon then
	reconstructG L

	// check optimality for full set
	if not(isOptimal (m L) (M L) epsilon) && optimize_solve N then
	// shrink on next iteration
	optimize_shrinking k 1 N reconstructed
	else
	k
	else
	if optimize_solve L then
	optimize_shrinking (k + 1) (if s > 0 then (s - 1) else shrinking_iterations) L reconstructed
	else
	k
	else
	failwith "Exceeded iterations limit"

	/// Optimize without shrinking every 1000 iterations
	let rec optimize_non_shrinking k =
	if k < parameters.iterations then
	let m_k = m N
	let M_k = M N

	if not (isOptimal m_k M_k epsilon) && optimize_solve N then
	optimize_non_shrinking (k + 1)
	else
	k
	else
	failwith "Exceeded iterations limit"

	let iterations =
	if parameters.shrinking then
	optimize_shrinking 0 shrinking_iterations N false
	else
	optimize_non_shrinking 0
	info "#iterations = %d" iterations

	/// Reconstruction of hyperplane bias
	let bias =
	let mutable b = 0.0
	let mutable M = 0
	for i = 0 to N-1 do
	if isUnbound i then
	b <- b + _y_gf i
	M <- M + 1
	DivideByInt b M

	/// Remove support vectors with A.[i] = 0.0 and compute Y.[i]*A.[i]
	let N' = Array.sumBy (fun a -> if a <> 0.0 then 1 else 0) A
	let X' = Array.zeroCreate N'
	let A' = Array.zeroCreate N'

	let mutable j = 0
	for i = 0 to N-1 do
	if A.[i] <> 0.0 then
	X'.[j] <- X.[i]
	A'.[j] <- Y.[i]*A.[i]
	j <- j + 1

	info "support vectors = %d" N'

	SVM(K,X',A',bias)