Last active
August 11, 2016 13:36
-
-
Save sslipchenko/b923c9d2cac8692e614daeb4dd1910b8 to your computer and use it in GitHub Desktop.
Optimization stages of Sequential Minimal Optimization (SMO) algorithm in F#
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Copyright 2016 Serge Slipchenko (Serge.Slipchenko@gmail.com) | |
// | |
// Licensed under the Apache License, Version 2.0 (the "License"); | |
// you may not use this file except in compliance with the License. | |
// You may obtain a copy of the License at | |
// | |
// http://www.apache.org/licenses/LICENSE-2.0 | |
// | |
// Unless required by applicable law or agreed to in writing, software | |
// distributed under the License is distributed on an "AS IS" BASIS, | |
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
// See the License for the specific language governing permissions and | |
// limitations under the License. | |
namespace Semagle.MachineLearning.SVM | |
open LanguagePrimitives | |
[<Measure>] type MB | |
/// Implementation of Sequential Minimal Optimization (SMO) algorithm | |
module SMO = | |
[<Literal>] | |
let private tau = 1e-12 | |
[<Literal>] | |
let private not_found = -1 | |
[<Literal>] | |
let private shrinking_iterations = 1000 | |
type WSSStrategy = MaximalViolatingPair | SecondOrderInformation | |
type C_SVM = { iterations : int; epsilon : float; C : float; strategy : WSSStrategy; shrinking : bool; cacheSize : int<MB> } | |
let inline swap (a : 'A[]) i j = | |
let tmp = a.[i] | |
a.[i] <- a.[j] | |
a.[j] <- tmp | |
/// LRU list of computed columns | |
type private LRU(capacity : int, N : int, Q : int -> int -> float) = | |
let indices = Array.zeroCreate<int> capacity | |
let columns = Array.zeroCreate<float[]> capacity | |
let lengths = Array.zeroCreate<int> capacity | |
let mutable first = 0 | |
let mutable last = 0 | |
/// Returns cache capacity | |
member lru.Capacity = capacity | |
/// Returns L elements of j-th column of Q matrix | |
member lru.Get (j : int) (L : int) = | |
let index = lru.tryFindIndex j | |
if index <> not_found then | |
let column = columns.[index] | |
let length = lengths.[index] | |
if length < L then | |
for i = length to L-1 do | |
column.[i] <- Q i j | |
lengths.[index] <- L | |
column | |
else | |
let column = Array.init N (fun i -> if i < L then Q i j else 0.0) | |
lru.insert j column L | |
column | |
/// Swap column elements | |
member lru.Swap (i : int) (j : int) = | |
let mutable index_i = not_found | |
let mutable index_j = not_found | |
let mutable k = first | |
while k <> last do | |
if indices.[k] = i then index_i <- k | |
if indices.[k] = j then index_j <- k | |
swap columns.[k] i j | |
k <- (k + 1) % capacity | |
if index_i <> not_found && index_j <> not_found then | |
swap lengths index_i index_j | |
if index_i <> not_found then indices.[index_i] <- j | |
if index_j <> not_found then indices.[index_j] <- i | |
/// Try to find computed column values | |
member private lru.tryFindIndex t = | |
let mutable i = first | |
while (i <> last) && (indices.[i] <> t) do | |
i <- (i + 1) % capacity | |
if i <> last then i else not_found | |
/// Insert new computed column values | |
member private lru.insert index column length = | |
indices.[last] <- index | |
columns.[last] <- column | |
lengths.[last] <- length | |
last <- (last + 1) % capacity | |
if first = last then first <- (first + 1) % capacity | |
/// Returns required capacity for the specified cache size and column length | |
static member capacity (cacheSize : int) (length : int) = | |
let columnSize = sizeof<float>*length + sizeof<int> + sizeof<float[]> | |
max 2 (cacheSize / columnSize) | |
let C_SVC (X : 'X[]) (Y : float[]) (K : Kernel<'X>) (parameters : C_SVM) = | |
if Array.length X <> Array.length Y then | |
invalidArg "X and Y" "have different lengths" | |
let info = printfn | |
let N = Array.length X | |
let C = Array.create N parameters.C | |
let epsilon = parameters.epsilon | |
let A = Array.zeroCreate N // optimization variables | |
let G = Array.create N -1.0 // gradient | |
let G' = Array.create N 0.0 // | |
// helper functions | |
let inline _y_gf i = -G.[i]*Y.[i] | |
let inline Q i j = (K X.[i] X.[j])*Y.[i]*Y.[j] | |
let Q_lru = new LRU(LRU.capacity ((int parameters.cacheSize)*1024*1024) N, N, Q) | |
info "cache capacity = %d" Q_lru.Capacity | |
let I_up L = seq { 0..L-1 } |> Seq.filter (fun i -> (Y.[i] = +1.0 && A.[i] < C.[i]) || (Y.[i] = -1.0 && A.[i] > 0.0)) | |
let I_low L = seq { 0..L-1 } |> Seq.filter (fun i -> (Y.[i] = -1.0 && A.[i] < C.[i]) || (Y.[i] = +1.0 && A.[i] > 0.0)) | |
let inline maxUp up = | |
if Seq.isEmpty up then not_found else Seq.maxBy _y_gf up | |
let inline minLow low = | |
if Seq.isEmpty low then not_found else Seq.minBy _y_gf low | |
let inline minLowTo low s L = | |
let ts = low |> Seq.filter (fun t -> _y_gf t < _y_gf s) | |
let Q_s = Q_lru.Get s L | |
let inline objective t = | |
let a_ts = (Q t t) + (Q_s.[s]) - 2.0*(Q_s.[t])*Y.[t]*Y.[s] | |
let b_ts = _y_gf t - _y_gf s | |
-b_ts*b_ts/(if a_ts > 0.0 then a_ts else tau) | |
if Seq.isEmpty ts then not_found else Seq.minBy objective ts | |
/// Maximal violating pair working set selection strategy | |
let maximalViolatingPair up low L = | |
let i = maxUp up | |
if i = not_found then None else Some (i, minLow low) | |
/// Second order information working set selection strategy | |
let secondOrderInformation up low L = | |
let i = maxUp up | |
if i = not_found then None else Some (i, minLowTo low i L) | |
let selectWorkingSet = | |
match parameters.strategy with | |
| MaximalViolatingPair -> maximalViolatingPair | |
| SecondOrderInformation -> secondOrderInformation | |
/// Solve an optimization sub-problem | |
let inline solve i j L = | |
let Q_i = Q_lru.Get i L | |
let Q_j = Q_lru.Get j L | |
let a_ij = (Q_i.[i]) + (Q_j.[j]) - 2.0*(Q_i.[j])*Y.[i]*Y.[j] | |
if Y.[i] <> Y.[j] then | |
let delta = (-G.[i]-G.[j])/(if a_ij < 0.0 then tau else a_ij) | |
let diff = A.[i] - A.[j] | |
match (A.[i] + delta, A.[j] + delta) with | |
| _, a_j when diff > 0.0 && a_j < 0.0 -> (diff, 0.0) | |
| a_i, _ when diff <= 0.0 && a_i < 0.0 -> (0.0, diff) | |
| _, a_j when diff <= C.[i] - C.[j] && a_j > C.[j] -> (C.[j]+diff, C.[j]) | |
| a_i, _ when diff > C.[i] - C.[j] && a_i > C.[i] -> (C.[i], C.[i] - diff) | |
| a_i, a_j -> a_i, a_j | |
else | |
let delta = (G.[i]-G.[j])/(if a_ij < 0.0 then tau else a_ij) | |
let sum = A.[i] + A.[j] | |
match (A.[i] - delta, A.[j] + delta) with | |
| a_i, _ when sum > C.[i] && a_i > C.[i] -> (C.[i], sum - C.[i]) | |
| _, a_j when sum <= C.[i] && a_j < 0.0 -> (sum, 0.0) | |
| _, a_j when sum > C.[j] && a_j > C.[j] -> (sum - C.[j], C.[j]) | |
| a_i, _ when sum <= C.[j] && a_i < 0.0 -> (0.0, sum) | |
| a_i, a_j -> a_i, a_j | |
/// Update gradient | |
let inline updateG i j a_i a_j L = | |
let Q_i = Q_lru.Get i L | |
let Q_j = Q_lru.Get j L | |
for t = 0 to L-1 do | |
G.[t] <- G.[t] + (Q_i.[t])*(a_i - A.[i]) + (Q_j.[t])*(a_j - A.[j]) | |
let inline updateG' i a = | |
let sign = match a, A.[i] with | |
| _ when a = C.[i] && A.[i] <> C.[i] -> +1.0 | |
| _ when a <> C.[i] && A.[i] = C.[i] -> -1.0 | |
| _ -> 0.0 | |
if sign <> 0.0 then | |
let Q_i = Q_lru.Get i N | |
for t = 0 to N-1 do | |
G'.[t] <- G'.[t] + sign * C.[i]*Q_i.[t] | |
/// Reconstruct gradient | |
let inline reconstructG L = | |
info "reconstruct gradient" | |
for t = L to N-1 do | |
G.[t] <- G'.[t] - 1.0 | |
let inline isFree i = A.[i] <> 0.0 && A.[i] <> C.[i] | |
let free = seq { 0..L-1 } |> Seq.filter isFree |> Seq.length | |
if free*L > 2*L*(N-L) then | |
for i = L to N-1 do | |
let Q_i = Q_lru.Get i L | |
for j = 0 to L-1 do | |
if isFree j then | |
G.[i] <- G.[i] + A.[j]*Q_i.[j] | |
else | |
for i = 0 to L-1 do | |
if isFree i then | |
let Q_i = Q_lru.Get i N | |
for j = L to N-1 do | |
G.[j] <- G.[j] + A.[i]*Q_i.[j] | |
let inline m up = up |> Seq.map _y_gf |> Seq.max | |
let inline M low = low |> Seq.map _y_gf |> Seq.min | |
let inline shrink m M L = | |
let shrinked = seq { 0..L-1 } |> Seq.filter (fun i -> | |
(Y.[i] = +1.0 && A.[i] = C.[i] || Y.[i] = -1.0 && A.[i] = 0.0) && (_y_gf i) > m || | |
(Y.[i] = +1.0 && A.[i] = 0.0 || Y.[i] = -1.0 && A.[i] = C.[i]) && (_y_gf i) < M) |> Seq.toArray | |
if Array.length shrinked > 0 then | |
let inline swapAll i j = | |
swap X i j; swap Y i j | |
swap C i j; swap A i j | |
swap G i j; swap G' i j | |
Q_lru.Swap i j | |
let mutable swaps = 0 | |
let mutable i = 0 | |
let mutable j = (Array.length shrinked) - 1 | |
let mutable k = L - 1 | |
while i <= j do | |
if shrinked.[j] = k then | |
j <- j - 1 | |
else | |
swaps <- swaps + 1 | |
swapAll shrinked.[i] k | |
i <- i + 1 | |
k <- k - 1 | |
let shrinked = Array.length shrinked | |
info "shrinked = %d, active = %d, swaps = %d" shrinked (L - shrinked) swaps | |
L - shrinked | |
else | |
L | |
let inline isOptimal m M epsilon = (m - M) <= epsilon | |
/// Sequential Minimal Optimization (SMO) Algorithm | |
let inline optimize_solve up low L = | |
/// 1. Find a pair of elements that violate the optimality condition | |
match selectWorkingSet up low L with | |
| Some (i, j) -> | |
/// 2. Solve the optimization sub-problem | |
let a_i, a_j = solve i j L | |
/// 3. Update the gradient | |
updateG i j a_i a_j L | |
updateG' i a_i | |
updateG' j a_j | |
/// 4. Update the solution | |
A.[i] <- a_i; A.[j] <- a_j | |
true | |
| None -> false | |
/// Optimize with shrinking every 1000 iterations | |
let rec optimize_shrinking k s L reconstructed = | |
let inline optimize_shrink m M L = | |
let mutable reconstructed = reconstructed | |
if not reconstructed && isOptimal m M (10.0*epsilon) then | |
reconstructG L | |
reconstructed <- true | |
reconstructed, shrink m M L | |
if k < parameters.iterations then | |
let up_k = I_up L |> Seq.cache | |
let low_k = I_low L |> Seq.cache | |
let m_k = m up_k | |
let M_k = M low_k | |
let reconstructed, L = | |
if s = 0 then | |
optimize_shrink m_k M_k L | |
else | |
reconstructed, L | |
if isOptimal m_k M_k epsilon then | |
reconstructG L | |
let up_k = I_up N |> Seq.cache | |
let low_k = I_low N |> Seq.cache | |
if not(isOptimal (m (up_k)) (M (low_k)) epsilon) && optimize_solve up_k low_k N then | |
optimize_shrinking k 1 N reconstructed | |
else | |
k | |
else | |
if optimize_solve up_k low_k L then | |
optimize_shrinking (k + 1) (if s > 0 then (s - 1) else shrinking_iterations) L reconstructed | |
else | |
k | |
else | |
failwith "Exceeded iterations limit" | |
/// Optimize without shrinking every 1000 iterations | |
let rec optimize_non_shrinking k = | |
if k < parameters.iterations then | |
let up_k = I_up N |> Seq.cache | |
let low_k = I_low N |> Seq.cache | |
let m_k = m up_k | |
let M_k = M low_k | |
if not (isOptimal m_k M_k epsilon) && optimize_solve up_k low_k N then | |
optimize_non_shrinking (k + 1) | |
else | |
k | |
else | |
failwith "Exceeded iterations limit" | |
let iterations = | |
if parameters.shrinking then | |
optimize_shrinking 0 shrinking_iterations N false | |
else | |
optimize_non_shrinking 0 | |
info "#iterations = %d" iterations | |
/// Reconstruction of hyperplane bias | |
let bias = | |
let mutable b = 0.0 | |
let mutable M = 0 | |
for i = 0 to N-1 do | |
if 0.0 < A.[i] && A.[i] < C.[i] then | |
b <- b + _y_gf i | |
M <- M + 1 | |
DivideByInt b M | |
/// Remove support vectors with A.[i] = 0.0 and compute Y.[i]*A.[i] | |
let N' = Array.sumBy (fun a -> if a <> 0.0 then 1 else 0) A | |
let X' = Array.zeroCreate N' | |
let A' = Array.zeroCreate N' | |
let mutable j = 0 | |
for i = 0 to N-1 do | |
if A.[i] <> 0.0 then | |
X'.[j] <- X.[i] | |
A'.[j] <- Y.[i]*A.[i] | |
j <- j + 1 | |
info "support vectors = %d" N' | |
SVM(K,X',A',bias) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment