Skip to content
{{ message }}

Instantly share code, notes, and snippets.

# sslipchenko/SMO.fs

Last active Aug 11, 2016
Optimization stages of Sequential Minimal Optimization (SMO) algorithm in F#
 // Copyright 2016 Serge Slipchenko (Serge.Slipchenko@gmail.com) // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. namespace Semagle.MachineLearning.SVM open LanguagePrimitives [] type MB /// Implementation of Sequential Minimal Optimization (SMO) algorithm module SMO = [] let private tau = 1e-12 [] let private not_found = -1 [] let private shrinking_iterations = 1000 type WSSStrategy = MaximalViolatingPair | SecondOrderInformation type C_SVM = { iterations : int; epsilon : float; C : float; strategy : WSSStrategy; shrinking : bool; cacheSize : int } let inline swap (a : 'A[]) i j = let tmp = a.[i] a.[i] <- a.[j] a.[j] <- tmp /// LRU list of computed columns type private LRU(capacity : int, N : int, Q : int -> int -> float) = let indices = Array.zeroCreate capacity let columns = Array.zeroCreate capacity let lengths = Array.zeroCreate capacity let mutable first = 0 let mutable last = 0 /// Returns cache capacity member lru.Capacity = capacity /// Returns L elements of j-th column of Q matrix member lru.Get (j : int) (L : int) = let index = lru.tryFindIndex j if index <> not_found then let column = columns.[index] let length = lengths.[index] if length < L then for i = length to L-1 do column.[i] <- Q i j lengths.[index] <- L column else let column = Array.init N (fun i -> if i < L then Q i j else 0.0) lru.insert j column L column /// Swap column elements member lru.Swap (i : int) (j : int) = let mutable index_i = not_found let mutable index_j = not_found let mutable k = first while k <> last do if indices.[k] = i then index_i <- k if indices.[k] = j then index_j <- k swap columns.[k] i j k <- (k + 1) % capacity if index_i <> not_found && index_j <> not_found then swap lengths index_i index_j if index_i <> not_found then indices.[index_i] <- j if index_j <> not_found then indices.[index_j] <- i /// Try to find computed column values member private lru.tryFindIndex t = let mutable i = first while (i <> last) && (indices.[i] <> t) do i <- (i + 1) % capacity if i <> last then i else not_found /// Insert new computed column values member private lru.insert index column length = indices.[last] <- index columns.[last] <- column lengths.[last] <- length last <- (last + 1) % capacity if first = last then first <- (first + 1) % capacity /// Returns required capacity for the specified cache size and column length static member capacity (cacheSize : int) (length : int) = let columnSize = sizeof*length + sizeof + sizeof max 2 (cacheSize / columnSize) let C_SVC (X : 'X[]) (Y : float[]) (K : Kernel<'X>) (parameters : C_SVM) = if Array.length X <> Array.length Y then invalidArg "X and Y" "have different lengths" let info = printfn let N = Array.length X let C = Array.create N parameters.C let epsilon = parameters.epsilon let A = Array.zeroCreate N // optimization variables let G = Array.create N -1.0 // gradient let G' = Array.create N 0.0 // // helper functions let inline _y_gf i = -G.[i]*Y.[i] let inline Q i j = (K X.[i] X.[j])*Y.[i]*Y.[j] let Q_lru = new LRU(LRU.capacity ((int parameters.cacheSize)*1024*1024) N, N, Q) info "cache capacity = %d" Q_lru.Capacity let inline isUnbound i = 0.0 < A.[i] && A.[i] < C.[i] let inline isUpBound i = Y.[i] = +1.0 && A.[i] = C.[i] || Y.[i] = -1.0 && A.[i] = 0.0 let inline isLowBound i = Y.[i] = +1.0 && A.[i] = 0.0 || Y.[i] = -1.0 && A.[i] = C.[i] let inline maxUp L = let mutable max_i = not_found let mutable max_v = System.Double.NegativeInfinity for i = 0 to L-1 do if not (isUpBound i) then let v = _y_gf i if v > max_v then max_i <- i max_v <- v max_i let inline minLow L = let mutable min_i = not_found let mutable min_v = System.Double.PositiveInfinity for i = 0 to L-1 do if not (isLowBound i) then let v = _y_gf i if v < min_v then min_i <- i min_v <- v min_i let inline minLowTo s L = let Q_s = Q_lru.Get s L let inline objective t = let a_ts = (Q t t) + (Q_s.[s]) - 2.0*(Q_s.[t])*Y.[t]*Y.[s] let b_ts = _y_gf t - _y_gf s -b_ts*b_ts/(if a_ts > 0.0 then a_ts else tau) let mutable min_i = not_found let mutable min_v = System.Double.PositiveInfinity for i = 0 to L-1 do if not (isLowBound i) && (_y_gf i < _y_gf s) then let v =objective i if v < min_v then min_i <- i min_v <- v min_i /// Maximal violating pair working set selection strategy let maximalViolatingPair L = let i = maxUp L if i = not_found then None else Some (i, minLow L) /// Second order information working set selection strategy let secondOrderInformation L = let i = maxUp L if i = not_found then None else Some (i, minLowTo i L) let selectWorkingSet = match parameters.strategy with | MaximalViolatingPair -> maximalViolatingPair | SecondOrderInformation -> secondOrderInformation /// Solve an optimization sub-problem let inline solve i j L = let Q_i = Q_lru.Get i L let Q_j = Q_lru.Get j L let a_ij = (Q_i.[i]) + (Q_j.[j]) - 2.0*(Q_i.[j])*Y.[i]*Y.[j] if Y.[i] <> Y.[j] then let delta = (-G.[i]-G.[j])/(if a_ij < 0.0 then tau else a_ij) let diff = A.[i] - A.[j] match (A.[i] + delta, A.[j] + delta) with | _, a_j when diff > 0.0 && a_j < 0.0 -> (diff, 0.0) | a_i, _ when (* diff <= 0.0 && *) a_i < 0.0 -> (0.0, diff) | _, a_j when diff <= C.[i] - C.[j] && a_j > C.[j] -> (C.[j]+diff, C.[j]) | a_i, _ when (* diff > C.[i] - C.[j] && *) a_i > C.[i] -> (C.[i], C.[i] - diff) | a_i, a_j -> a_i, a_j else let delta = (G.[i]-G.[j])/(if a_ij < 0.0 then tau else a_ij) let sum = A.[i] + A.[j] match (A.[i] - delta, A.[j] + delta) with | a_i, _ when sum > C.[i] && a_i > C.[i] -> (C.[i], sum - C.[i]) | _, a_j when (* sum <= C.[i] && *) a_j < 0.0 -> (sum, 0.0) | _, a_j when sum > C.[j] && a_j > C.[j] -> (sum - C.[j], C.[j]) | a_i, _ when (* sum <= C.[j] && *) a_i < 0.0 -> (0.0, sum) | a_i, a_j -> a_i, a_j /// Update gradient let inline updateG i j a_i a_j L = let Q_i = Q_lru.Get i L let Q_j = Q_lru.Get j L for t = 0 to L-1 do G.[t] <- G.[t] + (Q_i.[t])*(a_i - A.[i]) + (Q_j.[t])*(a_j - A.[j]) let inline updateG' i a = let sign = match a, A.[i] with | _ when a = C.[i] && A.[i] <> C.[i] -> +1.0 | _ when a <> C.[i] && A.[i] = C.[i] -> -1.0 | _ -> 0.0 if sign <> 0.0 then let Q_i = Q_lru.Get i N for t = 0 to N-1 do G'.[t] <- G'.[t] + sign * C.[i]*Q_i.[t] /// Reconstruct gradient let inline reconstructG L = info "reconstruct gradient" for t = L to N-1 do G.[t] <- G'.[t] - 1.0 let unbound = seq { 0..L-1 } |> Seq.filter isUnbound |> Seq.length if unbound*L > 2*L*(N-L) then for i = L to N-1 do let Q_i = Q_lru.Get i L for j = 0 to L-1 do if isUnbound j then G.[i] <- G.[i] + A.[j]*Q_i.[j] else for i = 0 to L-1 do if isUnbound i then let Q_i = Q_lru.Get i N for j = L to N-1 do G.[j] <- G.[j] + A.[i]*Q_i.[j] let inline m L = let mutable max_v = System.Double.NegativeInfinity for i = 0 to L-1 do if not (isUpBound i) then let v = _y_gf i if v > max_v then max_v <- v max_v let inline M L = let mutable min_v = System.Double.PositiveInfinity for i = 0 to L-1 do if not (isLowBound i) then let v = _y_gf i if v < min_v then min_v <- v min_v let inline shrink m M L = let inline isShrinked i = (isUpBound i) && (_y_gf i) > m || (isLowBound i) && (_y_gf i) < M let inline swapAll i j = swap X i j; swap Y i j swap C i j; swap A i j swap G i j; swap G' i j Q_lru.Swap i j let mutable swaps = 0 let mutable i = 0 let mutable j = L - 1 let mutable k = L - 1 let mutable shrinked = 0 while i <= j do match (isShrinked i), (isShrinked j) with | false, false -> i <- i + 1; j <- j - 1 | true, false -> j <- j - 1 | false, true -> i <- i + 1 | _ -> if j = k then j <- j - 1 else swaps <- swaps + 1 swapAll i k i <- i + 1 shrinked <- shrinked + 1 k <- k - 1 if shrinked > 0 then info "shrinked = %d, active = %d, swaps = %d" shrinked (L - shrinked) swaps L - shrinked let inline isOptimal m M epsilon = (m - M) <= epsilon /// Sequential Minimal Optimization (SMO) Algorithm let inline optimize_solve L = // Find a pair of elements that violate the optimality condition match selectWorkingSet L with | Some (i, j) -> // Solve the optimization sub-problem let a_i, a_j = solve i j L // Update the gradient updateG i j a_i a_j L if parameters.shrinking then updateG' i a_i updateG' j a_j // Update the solution A.[i] <- a_i; A.[j] <- a_j true | None -> false /// Optimize with shrinking every 1000 iterations let rec optimize_shrinking k s L reconstructed = let inline optimize_shrink m M L = let mutable reconstructed = reconstructed if not reconstructed && isOptimal m M (10.0*epsilon) then reconstructG L reconstructed <- true reconstructed, shrink m M L if k < parameters.iterations then let m_k = m L let M_k = M L // shrink active set let reconstructed, L = if s = 0 then optimize_shrink m_k M_k L else reconstructed, L // check optimality for active set if isOptimal m_k M_k epsilon then reconstructG L // check optimality for full set if not(isOptimal (m L) (M L) epsilon) && optimize_solve N then // shrink on next iteration optimize_shrinking k 1 N reconstructed else k else if optimize_solve L then optimize_shrinking (k + 1) (if s > 0 then (s - 1) else shrinking_iterations) L reconstructed else k else failwith "Exceeded iterations limit" /// Optimize without shrinking every 1000 iterations let rec optimize_non_shrinking k = if k < parameters.iterations then let m_k = m N let M_k = M N if not (isOptimal m_k M_k epsilon) && optimize_solve N then optimize_non_shrinking (k + 1) else k else failwith "Exceeded iterations limit" let iterations = if parameters.shrinking then optimize_shrinking 0 shrinking_iterations N false else optimize_non_shrinking 0 info "#iterations = %d" iterations /// Reconstruction of hyperplane bias let bias = let mutable b = 0.0 let mutable M = 0 for i = 0 to N-1 do if isUnbound i then b <- b + _y_gf i M <- M + 1 DivideByInt b M /// Remove support vectors with A.[i] = 0.0 and compute Y.[i]*A.[i] let N' = Array.sumBy (fun a -> if a <> 0.0 then 1 else 0) A let X' = Array.zeroCreate N' let A' = Array.zeroCreate N' let mutable j = 0 for i = 0 to N-1 do if A.[i] <> 0.0 then X'.[j] <- X.[i] A'.[j] <- Y.[i]*A.[i] j <- j + 1 info "support vectors = %d" N' SVM(K,X',A',bias)
to join this conversation on GitHub. Already have an account? Sign in to comment