sslipchenko/SMO.fs

## SMO.fs
// Copyright 2016 Serge Slipchenko (Serge.Slipchenko@gmail.com)
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

namespace Semagle.MachineLearning.SVM

open LanguagePrimitives

[<Measure>] type MB

/// Implementation of Sequential Minimal Optimization (SMO) algorithm
module SMO =
    [<Literal>]
    let private tau = 1e-12

    [<Literal>]
    let private not_found = -1

    type WSSStrategy = MaximalViolatingPair | SecondOrderInformation

    type C_SVM = { iterations : int; epsilon : float; C : float; strategy : WSSStrategy; cacheSize : int<MB> }

    /// LRU list of computed columns
    type private LRU(capacity : int, N : int, Q : int -> int -> float) =
        let indices = Array.zeroCreate<int> capacity
        let columns = Array.zeroCreate<float[]> capacity
        let mutable first = 0
        let mutable last = 0

        /// Returns cache capacity
        member lru.Capacity = capacity

        /// Returns j-th column of Q matrix
        member lru.Item (j : int) =
            let index = lru.tryFindIndex j
            if index <> not_found then
                columns.[index]
            else
                let column = Array.init N (fun i -> Q i j)
                lru.insert j column
                column

        /// Try to find computed column values
        member private lru.tryFindIndex probe =
            let mutable i = first
            while (i <> last) && (indices.[i] <> probe) do
                i <- (i + 1) % capacity

            if i <> last then i else not_found

        /// Insert new computed column values
        member private lru.insert index column =
            indices.[last] <- index
            columns.[last] <- column
            last <- (last + 1) % capacity

            if first = last then first <- (first + 1) % capacity

        /// Returns required capacity for the specified cache size and column length
        static member capacity (cacheSize : int) (length : int) =
           let columnSize = sizeof<float>*length + sizeof<int> + sizeof<float[]>
           max 2 (cacheSize / columnSize)

    let C_SVC (X : 'X[]) (Y : float[]) (K : Kernel<'X>) (parameters : C_SVM) =
        if Array.length X <> Array.length Y then
            invalidArg "X and Y" "have different lengths"

        let info = printfn

        let N = Array.length X

        let C = Array.create N parameters.C
        let epsilon = parameters.epsilon

        let A = Array.zeroCreate N  // optimization variables
        let G = Array.create N -1.0 // gradient

        // helper functions
        let inline _y_gf i = -G.[i]*Y.[i]
        let inline Q i j = (K X.[i] X.[j])*Y.[i]*Y.[j]
        let Q_lru = new LRU(LRU.capacity ((int parameters.cacheSize)*1024*1024) N, N, Q)
        info "cache capacity = %d" Q_lru.Capacity

        let I_up () = seq { 0..N-1 } |> Seq.filter (fun i -> (Y.[i] = +1.0 && A.[i] < C.[i]) || (Y.[i] = -1.0 && A.[i] > 0.0))
        let I_low () = seq { 0..N-1 } |> Seq.filter (fun i -> (Y.[i] = -1.0 && A.[i] < C.[i]) || (Y.[i] = +1.0 && A.[i] > 0.0))

        let inline maxUp () =
            let ts = I_up ()
            if Seq.isEmpty ts then not_found else Seq.maxBy _y_gf ts

        let inline minLow () =
            let ts = I_low ()
            if Seq.isEmpty ts then not_found else Seq.minBy _y_gf ts

        let inline minLowTo s =
            let ts = I_low () |> Seq.filter (fun t -> _y_gf t < _y_gf s)
            let Q_s = Q_lru.[s]
            let objective t =
                let a_ts = (Q t t) + (Q_s.[s]) - 2.0*(Q_s.[t])*Y.[t]*Y.[s]
                let b_ts = _y_gf t - _y_gf s
                -b_ts*b_ts/(if a_ts > 0.0 then a_ts else tau)
            if Seq.isEmpty ts then not_found else Seq.minBy objective ts

        /// Maximal violating pair working set selection strategy
        let maximalViolatingPair () =
            let i = maxUp()
            if i = not_found then None else Some (i, minLow())

        /// Second order information working set selection strategy
        let secondOrderInformation () =
            let i = maxUp()
            if i = not_found then None else Some (i, minLowTo i)

        let selectWorkingSet =
            match parameters.strategy with
                | MaximalViolatingPair -> maximalViolatingPair
                | SecondOrderInformation -> secondOrderInformation

        /// Solve an optimization sub-problem
        let inline solve i j =
            let Q_i = Q_lru.[i]
            let Q_j = Q_lru.[j]
            let a_ij = (Q_i.[i]) + (Q_j.[j]) - 2.0*(Q_i.[j])*Y.[i]*Y.[j]
            if Y.[i] <> Y.[j] then
                let delta = (-G.[i]-G.[j])/(if a_ij < 0.0 then tau else a_ij)
                let diff = A.[i] - A.[j]
                match (A.[i] + delta, A.[j] + delta) with
                    | _, a_j when diff > 0.0 && a_j < 0.0 -> (diff, 0.0)
                    | a_i, _ when diff <= 0.0 && a_i < 0.0 -> (0.0, diff)
                    | _, a_j when diff <= C.[i] - C.[j] && a_j > C.[j] -> (C.[j]+diff, C.[j])
                    | a_i, _ when diff > C.[i] - C.[j] && a_i > C.[i] -> (C.[i], C.[i] - diff)
                    | a_i, a_j -> a_i, a_j
            else
                let delta = (G.[i]-G.[j])/(if a_ij < 0.0 then tau else a_ij)
                let sum = A.[i] + A.[j]
                match (A.[i] - delta, A.[j] + delta) with
                    | a_i, _ when sum > C.[i] && a_i > C.[i] -> (C.[i], sum - C.[i])
                    | _, a_j when sum <= C.[i] && a_j < 0.0 -> (sum, 0.0)
                    | _, a_j when sum > C.[j] && a_j > C.[j] -> (sum - C.[j], C.[j])
                    | a_i, _ when sum <= C.[j] && a_i < 0.0 -> (0.0, sum)
                    | a_i, a_j -> a_i, a_j

        /// Update gradient
        let inline updateG i j a_i a_j =
            let Q_i = Q_lru.[i]
            let Q_j = Q_lru.[j]
            for t in 0..N-1 do
                G.[t] <- G.[t] + (Q_i.[t])*(a_i - A.[i]) + (Q_j.[t])*(a_j - A.[j])

        /// Check stop conditions
        let stopCriterion () =
            let inline m y = I_up () |> Seq.filter (fun i -> Y.[i] = y) |> Seq.map _y_gf |> Seq.max
            let inline M y = I_low () |> Seq.filter (fun i -> Y.[i] = y) |> Seq.map _y_gf |> Seq.min
            ((m +1.0) - (M +1.0) < epsilon) || ((m -1.0) - (M -1.0) < epsilon)

        /// Sequential Minimal Optimization (SMO) Algorithm
        let rec optimize k =
            if k < parameters.iterations then
                /// 1. Find a pair of elements that violate the optimality condition
                match selectWorkingSet () with
                    | Some (i, j) ->
                        /// 2. Solve the optimization sub-problem
                        let a_i, a_j = solve i j
                        /// 3. Update the gradient and the solution
                        updateG i j a_i a_j
                        A.[i] <- a_i; A.[j] <- a_j
                        if stopCriterion() then k else optimize (k + 1)
                    | None -> k
             else
                failwith "Exceeded iterations limit"

        let iterations = optimize 0
        info "#iterations = %d" iterations

        /// Reconstruction of hyperplane bias
        let bias =
            let mutable b = 0.0
            let mutable M = 0
            for i = 0 to N-1 do
                if 0.0 < A.[i] && A.[i] < C.[i] then
                    b <- b + _y_gf i
                    M <- M + 1
            DivideByInt b M

        /// Remove support vectors with A.[i] = 0.0 and compute Y.[i]*A.[i]
        let N' = Array.sumBy (fun a -> if a <> 0.0 then 1 else 0) A
        let X' = Array.zeroCreate N'
        let A' = Array.zeroCreate N'

        let mutable j = 0
        for i = 0 to N-1 do
            if A.[i] <> 0.0 then
                X'.[j] <- X.[i]
                A'.[j] <- Y.[i]*A.[i]
                j <- j + 1

        info "support vectors = %d" N'

        SVM(K,X',A',bias)
	// Copyright 2016 Serge Slipchenko (Serge.Slipchenko@gmail.com)
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.

	namespace Semagle.MachineLearning.SVM

	open LanguagePrimitives

	[<Measure>] type MB

	/// Implementation of Sequential Minimal Optimization (SMO) algorithm
	module SMO =
	[<Literal>]
	let private tau = 1e-12

	[<Literal>]
	let private not_found = -1

	type WSSStrategy = MaximalViolatingPair \| SecondOrderInformation

	type C_SVM = { iterations : int; epsilon : float; C : float; strategy : WSSStrategy; cacheSize : int<MB> }

	/// LRU list of computed columns
	type private LRU(capacity : int, N : int, Q : int -> int -> float) =
	let indices = Array.zeroCreate<int> capacity
	let columns = Array.zeroCreate<float[]> capacity
	let mutable first = 0
	let mutable last = 0

	/// Returns cache capacity
	member lru.Capacity = capacity

	/// Returns j-th column of Q matrix
	member lru.Item (j : int) =
	let index = lru.tryFindIndex j
	if index <> not_found then
	columns.[index]
	else
	let column = Array.init N (fun i -> Q i j)
	lru.insert j column
	column

	/// Try to find computed column values
	member private lru.tryFindIndex probe =
	let mutable i = first
	while (i <> last) && (indices.[i] <> probe) do
	i <- (i + 1) % capacity

	if i <> last then i else not_found

	/// Insert new computed column values
	member private lru.insert index column =
	indices.[last] <- index
	columns.[last] <- column
	last <- (last + 1) % capacity

	if first = last then first <- (first + 1) % capacity

	/// Returns required capacity for the specified cache size and column length
	static member capacity (cacheSize : int) (length : int) =
	let columnSize = sizeof<float>*length + sizeof<int> + sizeof<float[]>
	max 2 (cacheSize / columnSize)

	let C_SVC (X : 'X[]) (Y : float[]) (K : Kernel<'X>) (parameters : C_SVM) =
	if Array.length X <> Array.length Y then
	invalidArg "X and Y" "have different lengths"

	let info = printfn

	let N = Array.length X

	let C = Array.create N parameters.C
	let epsilon = parameters.epsilon

	let A = Array.zeroCreate N // optimization variables
	let G = Array.create N -1.0 // gradient

	// helper functions
	let inline _y_gf i = -G.[i]*Y.[i]
	let inline Q i j = (K X.[i] X.[j])Y.[i]Y.[j]
	let Q_lru = new LRU(LRU.capacity ((int parameters.cacheSize)10241024) N, N, Q)
	info "cache capacity = %d" Q_lru.Capacity

	let I_up () = seq { 0..N-1 } \|> Seq.filter (fun i -> (Y.[i] = +1.0 && A.[i] < C.[i]) \|\| (Y.[i] = -1.0 && A.[i] > 0.0))
	let I_low () = seq { 0..N-1 } \|> Seq.filter (fun i -> (Y.[i] = -1.0 && A.[i] < C.[i]) \|\| (Y.[i] = +1.0 && A.[i] > 0.0))

	let inline maxUp () =
	let ts = I_up ()
	if Seq.isEmpty ts then not_found else Seq.maxBy _y_gf ts

	let inline minLow () =
	let ts = I_low ()
	if Seq.isEmpty ts then not_found else Seq.minBy _y_gf ts

	let inline minLowTo s =
	let ts = I_low () \|> Seq.filter (fun t -> _y_gf t < _y_gf s)
	let Q_s = Q_lru.[s]
	let objective t =
	let a_ts = (Q t t) + (Q_s.[s]) - 2.0(Q_s.[t])Y.[t]*Y.[s]
	let b_ts = _y_gf t - _y_gf s
	-b_ts*b_ts/(if a_ts > 0.0 then a_ts else tau)
	if Seq.isEmpty ts then not_found else Seq.minBy objective ts

	/// Maximal violating pair working set selection strategy
	let maximalViolatingPair () =
	let i = maxUp()
	if i = not_found then None else Some (i, minLow())

	/// Second order information working set selection strategy
	let secondOrderInformation () =
	let i = maxUp()
	if i = not_found then None else Some (i, minLowTo i)

	let selectWorkingSet =
	match parameters.strategy with
	\| MaximalViolatingPair -> maximalViolatingPair
	\| SecondOrderInformation -> secondOrderInformation

	/// Solve an optimization sub-problem
	let inline solve i j =
	let Q_i = Q_lru.[i]
	let Q_j = Q_lru.[j]
	let a_ij = (Q_i.[i]) + (Q_j.[j]) - 2.0(Q_i.[j])Y.[i]*Y.[j]
	if Y.[i] <> Y.[j] then
	let delta = (-G.[i]-G.[j])/(if a_ij < 0.0 then tau else a_ij)
	let diff = A.[i] - A.[j]
	match (A.[i] + delta, A.[j] + delta) with
	\| _, a_j when diff > 0.0 && a_j < 0.0 -> (diff, 0.0)
	\| a_i, _ when diff <= 0.0 && a_i < 0.0 -> (0.0, diff)
	\| _, a_j when diff <= C.[i] - C.[j] && a_j > C.[j] -> (C.[j]+diff, C.[j])
	\| a_i, _ when diff > C.[i] - C.[j] && a_i > C.[i] -> (C.[i], C.[i] - diff)
	\| a_i, a_j -> a_i, a_j
	else
	let delta = (G.[i]-G.[j])/(if a_ij < 0.0 then tau else a_ij)
	let sum = A.[i] + A.[j]
	match (A.[i] - delta, A.[j] + delta) with
	\| a_i, _ when sum > C.[i] && a_i > C.[i] -> (C.[i], sum - C.[i])
	\| _, a_j when sum <= C.[i] && a_j < 0.0 -> (sum, 0.0)
	\| _, a_j when sum > C.[j] && a_j > C.[j] -> (sum - C.[j], C.[j])
	\| a_i, _ when sum <= C.[j] && a_i < 0.0 -> (0.0, sum)
	\| a_i, a_j -> a_i, a_j

	/// Update gradient
	let inline updateG i j a_i a_j =
	let Q_i = Q_lru.[i]
	let Q_j = Q_lru.[j]
	for t in 0..N-1 do
	G.[t] <- G.[t] + (Q_i.[t])(a_i - A.[i]) + (Q_j.[t])(a_j - A.[j])

	/// Check stop conditions
	let stopCriterion () =
	let inline m y = I_up () \|> Seq.filter (fun i -> Y.[i] = y) \|> Seq.map _y_gf \|> Seq.max
	let inline M y = I_low () \|> Seq.filter (fun i -> Y.[i] = y) \|> Seq.map _y_gf \|> Seq.min
	((m +1.0) - (M +1.0) < epsilon) \|\| ((m -1.0) - (M -1.0) < epsilon)

	/// Sequential Minimal Optimization (SMO) Algorithm
	let rec optimize k =
	if k < parameters.iterations then
	/// 1. Find a pair of elements that violate the optimality condition
	match selectWorkingSet () with
	\| Some (i, j) ->
	/// 2. Solve the optimization sub-problem
	let a_i, a_j = solve i j
	/// 3. Update the gradient and the solution
	updateG i j a_i a_j
	A.[i] <- a_i; A.[j] <- a_j
	if stopCriterion() then k else optimize (k + 1)
	\| None -> k
	else
	failwith "Exceeded iterations limit"

	let iterations = optimize 0
	info "#iterations = %d" iterations

	/// Reconstruction of hyperplane bias
	let bias =
	let mutable b = 0.0
	let mutable M = 0
	for i = 0 to N-1 do
	if 0.0 < A.[i] && A.[i] < C.[i] then
	b <- b + _y_gf i
	M <- M + 1
	DivideByInt b M

	/// Remove support vectors with A.[i] = 0.0 and compute Y.[i]*A.[i]
	let N' = Array.sumBy (fun a -> if a <> 0.0 then 1 else 0) A
	let X' = Array.zeroCreate N'
	let A' = Array.zeroCreate N'

	let mutable j = 0
	for i = 0 to N-1 do
	if A.[i] <> 0.0 then
	X'.[j] <- X.[i]
	A'.[j] <- Y.[i]*A.[i]
	j <- j + 1

	info "support vectors = %d" N'

	SVM(K,X',A',bias)