Instantly share code, notes, and snippets.

@sslipchenko /SMO.fs
Last active Aug 11, 2016

Embed
What would you like to do?
Optimization stages of Sequential Minimal Optimization (SMO) algorithm in F#
// Copyright 2016 Serge Slipchenko (Serge.Slipchenko@gmail.com)
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
namespace Semagle.MachineLearning.SVM
open LanguagePrimitives
[<Measure>] type MB
/// Implementation of Sequential Minimal Optimization (SMO) algorithm
module SMO =
[<Literal>]
let private tau = 1e-12
[<Literal>]
let private not_found = -1
[<Literal>]
let private shrinking_iterations = 1000
type WSSStrategy = MaximalViolatingPair | SecondOrderInformation
type C_SVM = { iterations : int; epsilon : float; C : float; strategy : WSSStrategy; shrinking : bool; cacheSize : int<MB> }
let inline swap (a : 'A[]) i j =
let tmp = a.[i]
a.[i] <- a.[j]
a.[j] <- tmp
/// LRU list of computed columns
type private LRU(capacity : int, N : int, Q : int -> int -> float) =
let indices = Array.zeroCreate<int> capacity
let columns = Array.zeroCreate<float[]> capacity
let lengths = Array.zeroCreate<int> capacity
let mutable first = 0
let mutable last = 0
/// Returns cache capacity
member lru.Capacity = capacity
/// Returns L elements of j-th column of Q matrix
member lru.Get (j : int) (L : int) =
let index = lru.tryFindIndex j
if index <> not_found then
let column = columns.[index]
let length = lengths.[index]
if length < L then
for i = length to L-1 do
column.[i] <- Q i j
lengths.[index] <- L
column
else
let column = Array.init N (fun i -> if i < L then Q i j else 0.0)
lru.insert j column L
column
/// Swap column elements
member lru.Swap (i : int) (j : int) =
let mutable index_i = not_found
let mutable index_j = not_found
let mutable k = first
while k <> last do
if indices.[k] = i then index_i <- k
if indices.[k] = j then index_j <- k
swap columns.[k] i j
k <- (k + 1) % capacity
if index_i <> not_found && index_j <> not_found then
swap lengths index_i index_j
if index_i <> not_found then indices.[index_i] <- j
if index_j <> not_found then indices.[index_j] <- i
/// Try to find computed column values
member private lru.tryFindIndex t =
let mutable i = first
while (i <> last) && (indices.[i] <> t) do
i <- (i + 1) % capacity
if i <> last then i else not_found
/// Insert new computed column values
member private lru.insert index column length =
indices.[last] <- index
columns.[last] <- column
lengths.[last] <- length
last <- (last + 1) % capacity
if first = last then first <- (first + 1) % capacity
/// Returns required capacity for the specified cache size and column length
static member capacity (cacheSize : int) (length : int) =
let columnSize = sizeof<float>*length + sizeof<int> + sizeof<float[]>
max 2 (cacheSize / columnSize)
let C_SVC (X : 'X[]) (Y : float[]) (K : Kernel<'X>) (parameters : C_SVM) =
if Array.length X <> Array.length Y then
invalidArg "X and Y" "have different lengths"
let info = printfn
let N = Array.length X
let C = Array.create N parameters.C
let epsilon = parameters.epsilon
let A = Array.zeroCreate N // optimization variables
let G = Array.create N -1.0 // gradient
let G' = Array.create N 0.0 //
// helper functions
let inline _y_gf i = -G.[i]*Y.[i]
let inline Q i j = (K X.[i] X.[j])*Y.[i]*Y.[j]
let Q_lru = new LRU(LRU.capacity ((int parameters.cacheSize)*1024*1024) N, N, Q)
info "cache capacity = %d" Q_lru.Capacity
let inline isUnbound i = 0.0 < A.[i] && A.[i] < C.[i]
let inline isUpBound i = Y.[i] = +1.0 && A.[i] = C.[i] || Y.[i] = -1.0 && A.[i] = 0.0
let inline isLowBound i = Y.[i] = +1.0 && A.[i] = 0.0 || Y.[i] = -1.0 && A.[i] = C.[i]
let inline maxUp L =
let mutable max_i = not_found
let mutable max_v = System.Double.NegativeInfinity
for i = 0 to L-1 do
if not (isUpBound i) then
let v = _y_gf i
if v > max_v then
max_i <- i
max_v <- v
max_i
let inline minLow L =
let mutable min_i = not_found
let mutable min_v = System.Double.PositiveInfinity
for i = 0 to L-1 do
if not (isLowBound i) then
let v = _y_gf i
if v < min_v then
min_i <- i
min_v <- v
min_i
let inline minLowTo s L =
let Q_s = Q_lru.Get s L
let inline objective t =
let a_ts = (Q t t) + (Q_s.[s]) - 2.0*(Q_s.[t])*Y.[t]*Y.[s]
let b_ts = _y_gf t - _y_gf s
-b_ts*b_ts/(if a_ts > 0.0 then a_ts else tau)
let mutable min_i = not_found
let mutable min_v = System.Double.PositiveInfinity
for i = 0 to L-1 do
if not (isLowBound i) && (_y_gf i < _y_gf s) then
let v =objective i
if v < min_v then
min_i <- i
min_v <- v
min_i
/// Maximal violating pair working set selection strategy
let maximalViolatingPair L =
let i = maxUp L
if i = not_found then None else Some (i, minLow L)
/// Second order information working set selection strategy
let secondOrderInformation L =
let i = maxUp L
if i = not_found then None else Some (i, minLowTo i L)
let selectWorkingSet =
match parameters.strategy with
| MaximalViolatingPair -> maximalViolatingPair
| SecondOrderInformation -> secondOrderInformation
/// Solve an optimization sub-problem
let inline solve i j L =
let Q_i = Q_lru.Get i L
let Q_j = Q_lru.Get j L
let a_ij = (Q_i.[i]) + (Q_j.[j]) - 2.0*(Q_i.[j])*Y.[i]*Y.[j]
if Y.[i] <> Y.[j] then
let delta = (-G.[i]-G.[j])/(if a_ij < 0.0 then tau else a_ij)
let diff = A.[i] - A.[j]
match (A.[i] + delta, A.[j] + delta) with
| _, a_j when diff > 0.0 && a_j < 0.0 -> (diff, 0.0)
| a_i, _ when (* diff <= 0.0 && *) a_i < 0.0 -> (0.0, diff)
| _, a_j when diff <= C.[i] - C.[j] && a_j > C.[j] -> (C.[j]+diff, C.[j])
| a_i, _ when (* diff > C.[i] - C.[j] && *) a_i > C.[i] -> (C.[i], C.[i] - diff)
| a_i, a_j -> a_i, a_j
else
let delta = (G.[i]-G.[j])/(if a_ij < 0.0 then tau else a_ij)
let sum = A.[i] + A.[j]
match (A.[i] - delta, A.[j] + delta) with
| a_i, _ when sum > C.[i] && a_i > C.[i] -> (C.[i], sum - C.[i])
| _, a_j when (* sum <= C.[i] && *) a_j < 0.0 -> (sum, 0.0)
| _, a_j when sum > C.[j] && a_j > C.[j] -> (sum - C.[j], C.[j])
| a_i, _ when (* sum <= C.[j] && *) a_i < 0.0 -> (0.0, sum)
| a_i, a_j -> a_i, a_j
/// Update gradient
let inline updateG i j a_i a_j L =
let Q_i = Q_lru.Get i L
let Q_j = Q_lru.Get j L
for t = 0 to L-1 do
G.[t] <- G.[t] + (Q_i.[t])*(a_i - A.[i]) + (Q_j.[t])*(a_j - A.[j])
let inline updateG' i a =
let sign = match a, A.[i] with
| _ when a = C.[i] && A.[i] <> C.[i] -> +1.0
| _ when a <> C.[i] && A.[i] = C.[i] -> -1.0
| _ -> 0.0
if sign <> 0.0 then
let Q_i = Q_lru.Get i N
for t = 0 to N-1 do
G'.[t] <- G'.[t] + sign * C.[i]*Q_i.[t]
/// Reconstruct gradient
let inline reconstructG L =
info "reconstruct gradient"
for t = L to N-1 do
G.[t] <- G'.[t] - 1.0
let unbound = seq { 0..L-1 } |> Seq.filter isUnbound |> Seq.length
if unbound*L > 2*L*(N-L) then
for i = L to N-1 do
let Q_i = Q_lru.Get i L
for j = 0 to L-1 do
if isUnbound j then
G.[i] <- G.[i] + A.[j]*Q_i.[j]
else
for i = 0 to L-1 do
if isUnbound i then
let Q_i = Q_lru.Get i N
for j = L to N-1 do
G.[j] <- G.[j] + A.[i]*Q_i.[j]
let inline m L =
let mutable max_v = System.Double.NegativeInfinity
for i = 0 to L-1 do
if not (isUpBound i) then
let v = _y_gf i
if v > max_v then
max_v <- v
max_v
let inline M L =
let mutable min_v = System.Double.PositiveInfinity
for i = 0 to L-1 do
if not (isLowBound i) then
let v = _y_gf i
if v < min_v then
min_v <- v
min_v
let inline shrink m M L =
let inline isShrinked i = (isUpBound i) && (_y_gf i) > m || (isLowBound i) && (_y_gf i) < M
let inline swapAll i j =
swap X i j; swap Y i j
swap C i j; swap A i j
swap G i j; swap G' i j
Q_lru.Swap i j
let mutable swaps = 0
let mutable i = 0
let mutable j = L - 1
let mutable k = L - 1
let mutable shrinked = 0
while i <= j do
match (isShrinked i), (isShrinked j) with
| false, false -> i <- i + 1; j <- j - 1
| true, false -> j <- j - 1
| false, true -> i <- i + 1
| _ ->
if j = k then
j <- j - 1
else
swaps <- swaps + 1
swapAll i k
i <- i + 1
shrinked <- shrinked + 1
k <- k - 1
if shrinked > 0 then
info "shrinked = %d, active = %d, swaps = %d" shrinked (L - shrinked) swaps
L - shrinked
let inline isOptimal m M epsilon = (m - M) <= epsilon
/// Sequential Minimal Optimization (SMO) Algorithm
let inline optimize_solve L =
// Find a pair of elements that violate the optimality condition
match selectWorkingSet L with
| Some (i, j) ->
// Solve the optimization sub-problem
let a_i, a_j = solve i j L
// Update the gradient
updateG i j a_i a_j L
if parameters.shrinking then
updateG' i a_i
updateG' j a_j
// Update the solution
A.[i] <- a_i; A.[j] <- a_j
true
| None -> false
/// Optimize with shrinking every 1000 iterations
let rec optimize_shrinking k s L reconstructed =
let inline optimize_shrink m M L =
let mutable reconstructed = reconstructed
if not reconstructed && isOptimal m M (10.0*epsilon) then
reconstructG L
reconstructed <- true
reconstructed, shrink m M L
if k < parameters.iterations then
let m_k = m L
let M_k = M L
// shrink active set
let reconstructed, L =
if s = 0 then
optimize_shrink m_k M_k L
else
reconstructed, L
// check optimality for active set
if isOptimal m_k M_k epsilon then
reconstructG L
// check optimality for full set
if not(isOptimal (m L) (M L) epsilon) && optimize_solve N then
// shrink on next iteration
optimize_shrinking k 1 N reconstructed
else
k
else
if optimize_solve L then
optimize_shrinking (k + 1) (if s > 0 then (s - 1) else shrinking_iterations) L reconstructed
else
k
else
failwith "Exceeded iterations limit"
/// Optimize without shrinking every 1000 iterations
let rec optimize_non_shrinking k =
if k < parameters.iterations then
let m_k = m N
let M_k = M N
if not (isOptimal m_k M_k epsilon) && optimize_solve N then
optimize_non_shrinking (k + 1)
else
k
else
failwith "Exceeded iterations limit"
let iterations =
if parameters.shrinking then
optimize_shrinking 0 shrinking_iterations N false
else
optimize_non_shrinking 0
info "#iterations = %d" iterations
/// Reconstruction of hyperplane bias
let bias =
let mutable b = 0.0
let mutable M = 0
for i = 0 to N-1 do
if isUnbound i then
b <- b + _y_gf i
M <- M + 1
DivideByInt b M
/// Remove support vectors with A.[i] = 0.0 and compute Y.[i]*A.[i]
let N' = Array.sumBy (fun a -> if a <> 0.0 then 1 else 0) A
let X' = Array.zeroCreate N'
let A' = Array.zeroCreate N'
let mutable j = 0
for i = 0 to N-1 do
if A.[i] <> 0.0 then
X'.[j] <- X.[i]
A'.[j] <- Y.[i]*A.[i]
j <- j + 1
info "support vectors = %d" N'
SVM(K,X',A',bias)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment