Instantly share code, notes, and snippets.

@sslipchenko /SMO.fs
Last active Aug 11, 2016

Embed
What would you like to do?
Optimization stages of Sequential Minimal Optimization (SMO) algorithm in F#
// Copyright 2016 Serge Slipchenko (Serge.Slipchenko@gmail.com)
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
namespace Semagle.MachineLearning.SVM
open LanguagePrimitives
[<Measure>] type MB
/// Implementation of Sequential Minimal Optimization (SMO) algorithm
module SMO =
[<Literal>]
let private tau = 1e-12
[<Literal>]
let private not_found = -1
[<Literal>]
let private shrinking_iterations = 1000
type WSSStrategy = MaximalViolatingPair | SecondOrderInformation
type C_SVM = { iterations : int; epsilon : float; C : float; strategy : WSSStrategy; shrinking : bool; cacheSize : int<MB> }
let inline swap (a : 'A[]) i j =
let tmp = a.[i]
a.[i] <- a.[j]
a.[j] <- tmp
/// LRU list of computed columns
type private LRU(capacity : int, N : int, Q : int -> int -> float) =
let indices = Array.zeroCreate<int> capacity
let columns = Array.zeroCreate<float[]> capacity
let lengths = Array.zeroCreate<int> capacity
let mutable first = 0
let mutable last = 0
/// Returns cache capacity
member lru.Capacity = capacity
/// Returns L elements of j-th column of Q matrix
member lru.Get (j : int) (L : int) =
let index = lru.tryFindIndex j
if index <> not_found then
let column = columns.[index]
let length = lengths.[index]
if length < L then
for i = length to L-1 do
column.[i] <- Q i j
lengths.[index] <- L
column
else
let column = Array.init N (fun i -> if i < L then Q i j else 0.0)
lru.insert j column L
column
/// Swap column elements
member lru.Swap (i : int) (j : int) =
let mutable index_i = not_found
let mutable index_j = not_found
let mutable k = first
while k <> last do
if indices.[k] = i then index_i <- k
if indices.[k] = j then index_j <- k
swap columns.[k] i j
k <- (k + 1) % capacity
if index_i <> not_found && index_j <> not_found then
swap lengths index_i index_j
if index_i <> not_found then indices.[index_i] <- j
if index_j <> not_found then indices.[index_j] <- i
/// Try to find computed column values
member private lru.tryFindIndex t =
let mutable i = first
while (i <> last) && (indices.[i] <> t) do
i <- (i + 1) % capacity
if i <> last then i else not_found
/// Insert new computed column values
member private lru.insert index column length =
indices.[last] <- index
columns.[last] <- column
lengths.[last] <- length
last <- (last + 1) % capacity
if first = last then first <- (first + 1) % capacity
/// Returns required capacity for the specified cache size and column length
static member capacity (cacheSize : int) (length : int) =
let columnSize = sizeof<float>*length + sizeof<int> + sizeof<float[]>
max 2 (cacheSize / columnSize)
let C_SVC (X : 'X[]) (Y : float[]) (K : Kernel<'X>) (parameters : C_SVM) =
if Array.length X <> Array.length Y then
invalidArg "X and Y" "have different lengths"
let info = printfn
let N = Array.length X
let C = Array.create N parameters.C
let epsilon = parameters.epsilon
let A = Array.zeroCreate N // optimization variables
let G = Array.create N -1.0 // gradient
let G' = Array.create N 0.0 //
// helper functions
let inline _y_gf i = -G.[i]*Y.[i]
let inline Q i j = (K X.[i] X.[j])*Y.[i]*Y.[j]
let Q_lru = new LRU(LRU.capacity ((int parameters.cacheSize)*1024*1024) N, N, Q)
info "cache capacity = %d" Q_lru.Capacity
let I_up L = seq { 0..L-1 } |> Seq.filter (fun i -> (Y.[i] = +1.0 && A.[i] < C.[i]) || (Y.[i] = -1.0 && A.[i] > 0.0))
let I_low L = seq { 0..L-1 } |> Seq.filter (fun i -> (Y.[i] = -1.0 && A.[i] < C.[i]) || (Y.[i] = +1.0 && A.[i] > 0.0))
let inline maxUp up =
if Seq.isEmpty up then not_found else Seq.maxBy _y_gf up
let inline minLow low =
if Seq.isEmpty low then not_found else Seq.minBy _y_gf low
let inline minLowTo low s L =
let ts = low |> Seq.filter (fun t -> _y_gf t < _y_gf s)
let Q_s = Q_lru.Get s L
let inline objective t =
let a_ts = (Q t t) + (Q_s.[s]) - 2.0*(Q_s.[t])*Y.[t]*Y.[s]
let b_ts = _y_gf t - _y_gf s
-b_ts*b_ts/(if a_ts > 0.0 then a_ts else tau)
if Seq.isEmpty ts then not_found else Seq.minBy objective ts
/// Maximal violating pair working set selection strategy
let maximalViolatingPair up low L =
let i = maxUp up
if i = not_found then None else Some (i, minLow low)
/// Second order information working set selection strategy
let secondOrderInformation up low L =
let i = maxUp up
if i = not_found then None else Some (i, minLowTo low i L)
let selectWorkingSet =
match parameters.strategy with
| MaximalViolatingPair -> maximalViolatingPair
| SecondOrderInformation -> secondOrderInformation
/// Solve an optimization sub-problem
let inline solve i j L =
let Q_i = Q_lru.Get i L
let Q_j = Q_lru.Get j L
let a_ij = (Q_i.[i]) + (Q_j.[j]) - 2.0*(Q_i.[j])*Y.[i]*Y.[j]
if Y.[i] <> Y.[j] then
let delta = (-G.[i]-G.[j])/(if a_ij < 0.0 then tau else a_ij)
let diff = A.[i] - A.[j]
match (A.[i] + delta, A.[j] + delta) with
| _, a_j when diff > 0.0 && a_j < 0.0 -> (diff, 0.0)
| a_i, _ when diff <= 0.0 && a_i < 0.0 -> (0.0, diff)
| _, a_j when diff <= C.[i] - C.[j] && a_j > C.[j] -> (C.[j]+diff, C.[j])
| a_i, _ when diff > C.[i] - C.[j] && a_i > C.[i] -> (C.[i], C.[i] - diff)
| a_i, a_j -> a_i, a_j
else
let delta = (G.[i]-G.[j])/(if a_ij < 0.0 then tau else a_ij)
let sum = A.[i] + A.[j]
match (A.[i] - delta, A.[j] + delta) with
| a_i, _ when sum > C.[i] && a_i > C.[i] -> (C.[i], sum - C.[i])
| _, a_j when sum <= C.[i] && a_j < 0.0 -> (sum, 0.0)
| _, a_j when sum > C.[j] && a_j > C.[j] -> (sum - C.[j], C.[j])
| a_i, _ when sum <= C.[j] && a_i < 0.0 -> (0.0, sum)
| a_i, a_j -> a_i, a_j
/// Update gradient
let inline updateG i j a_i a_j L =
let Q_i = Q_lru.Get i L
let Q_j = Q_lru.Get j L
for t = 0 to L-1 do
G.[t] <- G.[t] + (Q_i.[t])*(a_i - A.[i]) + (Q_j.[t])*(a_j - A.[j])
let inline updateG' i a =
let sign = match a, A.[i] with
| _ when a = C.[i] && A.[i] <> C.[i] -> +1.0
| _ when a <> C.[i] && A.[i] = C.[i] -> -1.0
| _ -> 0.0
if sign <> 0.0 then
let Q_i = Q_lru.Get i N
for t = 0 to N-1 do
G'.[t] <- G'.[t] + sign * C.[i]*Q_i.[t]
/// Reconstruct gradient
let inline reconstructG L =
info "reconstruct gradient"
for t = L to N-1 do
G.[t] <- G'.[t] - 1.0
let inline isFree i = A.[i] <> 0.0 && A.[i] <> C.[i]
let free = seq { 0..L-1 } |> Seq.filter isFree |> Seq.length
if free*L > 2*L*(N-L) then
for i = L to N-1 do
let Q_i = Q_lru.Get i L
for j = 0 to L-1 do
if isFree j then
G.[i] <- G.[i] + A.[j]*Q_i.[j]
else
for i = 0 to L-1 do
if isFree i then
let Q_i = Q_lru.Get i N
for j = L to N-1 do
G.[j] <- G.[j] + A.[i]*Q_i.[j]
let inline m up = up |> Seq.map _y_gf |> Seq.max
let inline M low = low |> Seq.map _y_gf |> Seq.min
let inline shrink m M L =
let shrinked = seq { 0..L-1 } |> Seq.filter (fun i ->
(Y.[i] = +1.0 && A.[i] = C.[i] || Y.[i] = -1.0 && A.[i] = 0.0) && (_y_gf i) > m ||
(Y.[i] = +1.0 && A.[i] = 0.0 || Y.[i] = -1.0 && A.[i] = C.[i]) && (_y_gf i) < M) |> Seq.toArray
if Array.length shrinked > 0 then
let inline swapAll i j =
swap X i j; swap Y i j
swap C i j; swap A i j
swap G i j; swap G' i j
Q_lru.Swap i j
let mutable swaps = 0
let mutable i = 0
let mutable j = (Array.length shrinked) - 1
let mutable k = L - 1
while i <= j do
if shrinked.[j] = k then
j <- j - 1
else
swaps <- swaps + 1
swapAll shrinked.[i] k
i <- i + 1
k <- k - 1
let shrinked = Array.length shrinked
info "shrinked = %d, active = %d, swaps = %d" shrinked (L - shrinked) swaps
L - shrinked
else
L
let inline isOptimal m M epsilon = (m - M) <= epsilon
/// Sequential Minimal Optimization (SMO) Algorithm
let inline optimize_solve up low L =
/// 1. Find a pair of elements that violate the optimality condition
match selectWorkingSet up low L with
| Some (i, j) ->
/// 2. Solve the optimization sub-problem
let a_i, a_j = solve i j L
/// 3. Update the gradient
updateG i j a_i a_j L
updateG' i a_i
updateG' j a_j
/// 4. Update the solution
A.[i] <- a_i; A.[j] <- a_j
true
| None -> false
/// Optimize with shrinking every 1000 iterations
let rec optimize_shrinking k s L reconstructed =
let inline optimize_shrink m M L =
let mutable reconstructed = reconstructed
if not reconstructed && isOptimal m M (10.0*epsilon) then
reconstructG L
reconstructed <- true
reconstructed, shrink m M L
if k < parameters.iterations then
let up_k = I_up L |> Seq.cache
let low_k = I_low L |> Seq.cache
let m_k = m up_k
let M_k = M low_k
let reconstructed, L =
if s = 0 then
optimize_shrink m_k M_k L
else
reconstructed, L
if isOptimal m_k M_k epsilon then
reconstructG L
let up_k = I_up N |> Seq.cache
let low_k = I_low N |> Seq.cache
if not(isOptimal (m (up_k)) (M (low_k)) epsilon) && optimize_solve up_k low_k N then
optimize_shrinking k 1 N reconstructed
else
k
else
if optimize_solve up_k low_k L then
optimize_shrinking (k + 1) (if s > 0 then (s - 1) else shrinking_iterations) L reconstructed
else
k
else
failwith "Exceeded iterations limit"
/// Optimize without shrinking every 1000 iterations
let rec optimize_non_shrinking k =
if k < parameters.iterations then
let up_k = I_up N |> Seq.cache
let low_k = I_low N |> Seq.cache
let m_k = m up_k
let M_k = M low_k
if not (isOptimal m_k M_k epsilon) && optimize_solve up_k low_k N then
optimize_non_shrinking (k + 1)
else
k
else
failwith "Exceeded iterations limit"
let iterations =
if parameters.shrinking then
optimize_shrinking 0 shrinking_iterations N false
else
optimize_non_shrinking 0
info "#iterations = %d" iterations
/// Reconstruction of hyperplane bias
let bias =
let mutable b = 0.0
let mutable M = 0
for i = 0 to N-1 do
if 0.0 < A.[i] && A.[i] < C.[i] then
b <- b + _y_gf i
M <- M + 1
DivideByInt b M
/// Remove support vectors with A.[i] = 0.0 and compute Y.[i]*A.[i]
let N' = Array.sumBy (fun a -> if a <> 0.0 then 1 else 0) A
let X' = Array.zeroCreate N'
let A' = Array.zeroCreate N'
let mutable j = 0
for i = 0 to N-1 do
if A.[i] <> 0.0 then
X'.[j] <- X.[i]
A'.[j] <- Y.[i]*A.[i]
j <- j + 1
info "support vectors = %d" N'
SVM(K,X',A',bias)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment