Last active
August 11, 2016 13:36
-
-
Save sslipchenko/b923c9d2cac8692e614daeb4dd1910b8 to your computer and use it in GitHub Desktop.
Optimization stages of Sequential Minimal Optimization (SMO) algorithm in F#
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Copyright 2016 Serge Slipchenko (Serge.Slipchenko@gmail.com) | |
// | |
// Licensed under the Apache License, Version 2.0 (the "License"); | |
// you may not use this file except in compliance with the License. | |
// You may obtain a copy of the License at | |
// | |
// http://www.apache.org/licenses/LICENSE-2.0 | |
// | |
// Unless required by applicable law or agreed to in writing, software | |
// distributed under the License is distributed on an "AS IS" BASIS, | |
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
// See the License for the specific language governing permissions and | |
// limitations under the License. | |
namespace Semagle.MachineLearning.SVM | |
open LanguagePrimitives | |
[<Measure>] type MB | |
/// Implementation of Sequential Minimal Optimization (SMO) algorithm | |
module SMO = | |
[<Literal>] | |
let private tau = 1e-12 | |
[<Literal>] | |
let private not_found = -1 | |
[<Literal>] | |
let private shrinking_iterations = 1000 | |
type WSSStrategy = MaximalViolatingPair | SecondOrderInformation | |
type C_SVM = { iterations : int; epsilon : float; C : float; strategy : WSSStrategy; shrinking : bool; cacheSize : int<MB> } | |
let inline swap (a : 'A[]) i j = | |
let tmp = a.[i] | |
a.[i] <- a.[j] | |
a.[j] <- tmp | |
/// LRU list of computed columns | |
type private LRU(capacity : int, N : int, Q : int -> int -> float) = | |
let indices = Array.zeroCreate<int> capacity | |
let columns = Array.zeroCreate<float[]> capacity | |
let lengths = Array.zeroCreate<int> capacity | |
let mutable first = 0 | |
let mutable last = 0 | |
/// Returns cache capacity | |
member lru.Capacity = capacity | |
/// Returns L elements of j-th column of Q matrix | |
member lru.Get (j : int) (L : int) = | |
let index = lru.tryFindIndex j | |
if index <> not_found then | |
let column = columns.[index] | |
let length = lengths.[index] | |
if length < L then | |
for i = length to L-1 do | |
column.[i] <- Q i j | |
lengths.[index] <- L | |
column | |
else | |
let column = Array.init N (fun i -> if i < L then Q i j else 0.0) | |
lru.insert j column L | |
column | |
/// Swap column elements | |
member lru.Swap (i : int) (j : int) = | |
let mutable index_i = not_found | |
let mutable index_j = not_found | |
let mutable k = first | |
while k <> last do | |
if indices.[k] = i then index_i <- k | |
if indices.[k] = j then index_j <- k | |
swap columns.[k] i j | |
k <- (k + 1) % capacity | |
if index_i <> not_found && index_j <> not_found then | |
swap lengths index_i index_j | |
if index_i <> not_found then indices.[index_i] <- j | |
if index_j <> not_found then indices.[index_j] <- i | |
/// Try to find computed column values | |
member private lru.tryFindIndex t = | |
let mutable i = first | |
while (i <> last) && (indices.[i] <> t) do | |
i <- (i + 1) % capacity | |
if i <> last then i else not_found | |
/// Insert new computed column values | |
member private lru.insert index column length = | |
indices.[last] <- index | |
columns.[last] <- column | |
lengths.[last] <- length | |
last <- (last + 1) % capacity | |
if first = last then first <- (first + 1) % capacity | |
/// Returns required capacity for the specified cache size and column length | |
static member capacity (cacheSize : int) (length : int) = | |
let columnSize = sizeof<float>*length + sizeof<int> + sizeof<float[]> | |
max 2 (cacheSize / columnSize) | |
let C_SVC (X : 'X[]) (Y : float[]) (K : Kernel<'X>) (parameters : C_SVM) = | |
if Array.length X <> Array.length Y then | |
invalidArg "X and Y" "have different lengths" | |
let info = printfn | |
let N = Array.length X | |
let C = Array.create N parameters.C | |
let epsilon = parameters.epsilon | |
let A = Array.zeroCreate N // optimization variables | |
let G = Array.create N -1.0 // gradient | |
let G' = Array.create N 0.0 // | |
// helper functions | |
let inline _y_gf i = -G.[i]*Y.[i] | |
let inline Q i j = (K X.[i] X.[j])*Y.[i]*Y.[j] | |
let Q_lru = new LRU(LRU.capacity ((int parameters.cacheSize)*1024*1024) N, N, Q) | |
info "cache capacity = %d" Q_lru.Capacity | |
let inline isUnbound i = 0.0 < A.[i] && A.[i] < C.[i] | |
let inline isUpBound i = Y.[i] = +1.0 && A.[i] = C.[i] || Y.[i] = -1.0 && A.[i] = 0.0 | |
let inline isLowBound i = Y.[i] = +1.0 && A.[i] = 0.0 || Y.[i] = -1.0 && A.[i] = C.[i] | |
let inline maxUp L = | |
let mutable max_i = not_found | |
let mutable max_v = System.Double.NegativeInfinity | |
for i = 0 to L-1 do | |
if not (isUpBound i) then | |
let v = _y_gf i | |
if v > max_v then | |
max_i <- i | |
max_v <- v | |
max_i | |
let inline minLow L = | |
let mutable min_i = not_found | |
let mutable min_v = System.Double.PositiveInfinity | |
for i = 0 to L-1 do | |
if not (isLowBound i) then | |
let v = _y_gf i | |
if v < min_v then | |
min_i <- i | |
min_v <- v | |
min_i | |
let inline minLowTo s L = | |
let Q_s = Q_lru.Get s L | |
let inline objective t = | |
let a_ts = (Q t t) + (Q_s.[s]) - 2.0*(Q_s.[t])*Y.[t]*Y.[s] | |
let b_ts = _y_gf t - _y_gf s | |
-b_ts*b_ts/(if a_ts > 0.0 then a_ts else tau) | |
let mutable min_i = not_found | |
let mutable min_v = System.Double.PositiveInfinity | |
for i = 0 to L-1 do | |
if not (isLowBound i) && (_y_gf i < _y_gf s) then | |
let v =objective i | |
if v < min_v then | |
min_i <- i | |
min_v <- v | |
min_i | |
/// Maximal violating pair working set selection strategy | |
let maximalViolatingPair L = | |
let i = maxUp L | |
if i = not_found then None else Some (i, minLow L) | |
/// Second order information working set selection strategy | |
let secondOrderInformation L = | |
let i = maxUp L | |
if i = not_found then None else Some (i, minLowTo i L) | |
let selectWorkingSet = | |
match parameters.strategy with | |
| MaximalViolatingPair -> maximalViolatingPair | |
| SecondOrderInformation -> secondOrderInformation | |
/// Solve an optimization sub-problem | |
let inline solve i j L = | |
let Q_i = Q_lru.Get i L | |
let Q_j = Q_lru.Get j L | |
let a_ij = (Q_i.[i]) + (Q_j.[j]) - 2.0*(Q_i.[j])*Y.[i]*Y.[j] | |
if Y.[i] <> Y.[j] then | |
let delta = (-G.[i]-G.[j])/(if a_ij < 0.0 then tau else a_ij) | |
let diff = A.[i] - A.[j] | |
match (A.[i] + delta, A.[j] + delta) with | |
| _, a_j when diff > 0.0 && a_j < 0.0 -> (diff, 0.0) | |
| a_i, _ when (* diff <= 0.0 && *) a_i < 0.0 -> (0.0, diff) | |
| _, a_j when diff <= C.[i] - C.[j] && a_j > C.[j] -> (C.[j]+diff, C.[j]) | |
| a_i, _ when (* diff > C.[i] - C.[j] && *) a_i > C.[i] -> (C.[i], C.[i] - diff) | |
| a_i, a_j -> a_i, a_j | |
else | |
let delta = (G.[i]-G.[j])/(if a_ij < 0.0 then tau else a_ij) | |
let sum = A.[i] + A.[j] | |
match (A.[i] - delta, A.[j] + delta) with | |
| a_i, _ when sum > C.[i] && a_i > C.[i] -> (C.[i], sum - C.[i]) | |
| _, a_j when (* sum <= C.[i] && *) a_j < 0.0 -> (sum, 0.0) | |
| _, a_j when sum > C.[j] && a_j > C.[j] -> (sum - C.[j], C.[j]) | |
| a_i, _ when (* sum <= C.[j] && *) a_i < 0.0 -> (0.0, sum) | |
| a_i, a_j -> a_i, a_j | |
/// Update gradient | |
let inline updateG i j a_i a_j L = | |
let Q_i = Q_lru.Get i L | |
let Q_j = Q_lru.Get j L | |
for t = 0 to L-1 do | |
G.[t] <- G.[t] + (Q_i.[t])*(a_i - A.[i]) + (Q_j.[t])*(a_j - A.[j]) | |
let inline updateG' i a = | |
let sign = match a, A.[i] with | |
| _ when a = C.[i] && A.[i] <> C.[i] -> +1.0 | |
| _ when a <> C.[i] && A.[i] = C.[i] -> -1.0 | |
| _ -> 0.0 | |
if sign <> 0.0 then | |
let Q_i = Q_lru.Get i N | |
for t = 0 to N-1 do | |
G'.[t] <- G'.[t] + sign * C.[i]*Q_i.[t] | |
/// Reconstruct gradient | |
let inline reconstructG L = | |
info "reconstruct gradient" | |
for t = L to N-1 do | |
G.[t] <- G'.[t] - 1.0 | |
let unbound = seq { 0..L-1 } |> Seq.filter isUnbound |> Seq.length | |
if unbound*L > 2*L*(N-L) then | |
for i = L to N-1 do | |
let Q_i = Q_lru.Get i L | |
for j = 0 to L-1 do | |
if isUnbound j then | |
G.[i] <- G.[i] + A.[j]*Q_i.[j] | |
else | |
for i = 0 to L-1 do | |
if isUnbound i then | |
let Q_i = Q_lru.Get i N | |
for j = L to N-1 do | |
G.[j] <- G.[j] + A.[i]*Q_i.[j] | |
let inline m L = | |
let mutable max_v = System.Double.NegativeInfinity | |
for i = 0 to L-1 do | |
if not (isUpBound i) then | |
let v = _y_gf i | |
if v > max_v then | |
max_v <- v | |
max_v | |
let inline M L = | |
let mutable min_v = System.Double.PositiveInfinity | |
for i = 0 to L-1 do | |
if not (isLowBound i) then | |
let v = _y_gf i | |
if v < min_v then | |
min_v <- v | |
min_v | |
let inline shrink m M L = | |
let inline isShrinked i = (isUpBound i) && (_y_gf i) > m || (isLowBound i) && (_y_gf i) < M | |
let inline swapAll i j = | |
swap X i j; swap Y i j | |
swap C i j; swap A i j | |
swap G i j; swap G' i j | |
Q_lru.Swap i j | |
let mutable swaps = 0 | |
let mutable i = 0 | |
let mutable j = L - 1 | |
let mutable k = L - 1 | |
let mutable shrinked = 0 | |
while i <= j do | |
match (isShrinked i), (isShrinked j) with | |
| false, false -> i <- i + 1; j <- j - 1 | |
| true, false -> j <- j - 1 | |
| false, true -> i <- i + 1 | |
| _ -> | |
if j = k then | |
j <- j - 1 | |
else | |
swaps <- swaps + 1 | |
swapAll i k | |
i <- i + 1 | |
shrinked <- shrinked + 1 | |
k <- k - 1 | |
if shrinked > 0 then | |
info "shrinked = %d, active = %d, swaps = %d" shrinked (L - shrinked) swaps | |
L - shrinked | |
let inline isOptimal m M epsilon = (m - M) <= epsilon | |
/// Sequential Minimal Optimization (SMO) Algorithm | |
let inline optimize_solve L = | |
// Find a pair of elements that violate the optimality condition | |
match selectWorkingSet L with | |
| Some (i, j) -> | |
// Solve the optimization sub-problem | |
let a_i, a_j = solve i j L | |
// Update the gradient | |
updateG i j a_i a_j L | |
if parameters.shrinking then | |
updateG' i a_i | |
updateG' j a_j | |
// Update the solution | |
A.[i] <- a_i; A.[j] <- a_j | |
true | |
| None -> false | |
/// Optimize with shrinking every 1000 iterations | |
let rec optimize_shrinking k s L reconstructed = | |
let inline optimize_shrink m M L = | |
let mutable reconstructed = reconstructed | |
if not reconstructed && isOptimal m M (10.0*epsilon) then | |
reconstructG L | |
reconstructed <- true | |
reconstructed, shrink m M L | |
if k < parameters.iterations then | |
let m_k = m L | |
let M_k = M L | |
// shrink active set | |
let reconstructed, L = | |
if s = 0 then | |
optimize_shrink m_k M_k L | |
else | |
reconstructed, L | |
// check optimality for active set | |
if isOptimal m_k M_k epsilon then | |
reconstructG L | |
// check optimality for full set | |
if not(isOptimal (m L) (M L) epsilon) && optimize_solve N then | |
// shrink on next iteration | |
optimize_shrinking k 1 N reconstructed | |
else | |
k | |
else | |
if optimize_solve L then | |
optimize_shrinking (k + 1) (if s > 0 then (s - 1) else shrinking_iterations) L reconstructed | |
else | |
k | |
else | |
failwith "Exceeded iterations limit" | |
/// Optimize without shrinking every 1000 iterations | |
let rec optimize_non_shrinking k = | |
if k < parameters.iterations then | |
let m_k = m N | |
let M_k = M N | |
if not (isOptimal m_k M_k epsilon) && optimize_solve N then | |
optimize_non_shrinking (k + 1) | |
else | |
k | |
else | |
failwith "Exceeded iterations limit" | |
let iterations = | |
if parameters.shrinking then | |
optimize_shrinking 0 shrinking_iterations N false | |
else | |
optimize_non_shrinking 0 | |
info "#iterations = %d" iterations | |
/// Reconstruction of hyperplane bias | |
let bias = | |
let mutable b = 0.0 | |
let mutable M = 0 | |
for i = 0 to N-1 do | |
if isUnbound i then | |
b <- b + _y_gf i | |
M <- M + 1 | |
DivideByInt b M | |
/// Remove support vectors with A.[i] = 0.0 and compute Y.[i]*A.[i] | |
let N' = Array.sumBy (fun a -> if a <> 0.0 then 1 else 0) A | |
let X' = Array.zeroCreate N' | |
let A' = Array.zeroCreate N' | |
let mutable j = 0 | |
for i = 0 to N-1 do | |
if A.[i] <> 0.0 then | |
X'.[j] <- X.[i] | |
A'.[j] <- Y.[i]*A.[i] | |
j <- j + 1 | |
info "support vectors = %d" N' | |
SVM(K,X',A',bias) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment