Last active
May 27, 2022 06:54
-
-
Save numist/712661976a3a638363ff8bd2f0894875 to your computer and use it in GitHub Desktop.
A rough Swift implementation of the Vose alias method
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// What is an alias method? | |
// If we have a list of elements with different frequencies, | |
// like the letters in the English language… | |
let frequencies = [ | |
("a", 8.167), ("b", 1.492), ("c", 2.782), ("d", 4.253), ("e", 12.702), | |
("f", 2.228), ("g", 2.015), ("h", 6.094), ("i", 6.966), ("j", 0.153), | |
("k", 0.772), ("l", 4.025), ("m", 2.406), ("n", 6.749), ("o", 7.507), | |
("p", 1.929), ("q", 0.095), ("r", 5.987), ("s", 6.327), ("t", 9.056), | |
("u", 2.758), ("v", 0.978), ("w", 2.360), ("x", 0.150), ("y", 1.974), | |
("z", 0.074), (" ", 22.861) | |
] | |
// …an alias method can take these probabilities to build a structure… | |
var aliasMethod = VoseAliasMethod(frequencies) | |
// …and that structure can be used to generate elements randomly based on the | |
// distribution in constant time! | |
var randStr = "" | |
for _ in 0..<77 { | |
randStr += aliasMethod.next() | |
} | |
print(randStr) // Prints: | |
// ieee m ru p sing crrigevelichb ic redu kv rodtrc eiheermtcyontlh flteh ltha | |
/* | |
But how??! | |
An alias method selects its output by the roll of a fair die to choose a | |
possible output. Upon choosing that element, it flips an unfair coin to | |
determine if it should change its choice to a different element. | |
This is easiest to understand by visualizing it. Let's look at the | |
probabilities of a streetlight's colour at any given time: | |
6| @@@ | |
5| ### @@@ | |
4| ### @@@ | |
3| ### @@@ | |
2| ### @@@ | |
1| ### *** @@@ | |
+-------------- | |
red yel grn | |
Visually, what an alias method does is change the three bars so they are | |
the same height: | |
4| ### @@@ ### | |
3| ### @@@ @@@ | |
2| ### @@@ @@@ | |
1| ### *** @@@ | |
+-------------- | |
red yel grn | |
Now, to produce an element the algorithm chooses a bar at random: | |
If the algorithm selects red (the first bar): | |
red is produced every time | |
If the algorithm selects yel (the middle bar): | |
flip a biased coin to produce yel 1/4 of the time and grn 3/4 of the time | |
If the algorithm selects grn (the last bar): | |
flip a biased coin to produce grn 3/4 of the time and red 1/4 of the time | |
*/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public struct VoseAliasMethod<Element> { | |
/* It's expected that the vast majority of people won't care about | |
* customizing the specific rng used by this type, so an existential is | |
* preferable to another generic parameter. */ | |
private struct AnyRandomNumberGenerator : RandomNumberGenerator { | |
var n: () -> UInt64 | |
init(_ rng: RandomNumberGenerator) { | |
var r = rng | |
n = { r.next() } | |
} | |
mutating func next() -> UInt64 { | |
return n() | |
} | |
} | |
// (choice, % to keep choice, alternate) | |
private let a: [(Element, Double, Element)] | |
// rng for producing elements | |
private var r: AnyRandomNumberGenerator | |
public init( | |
_ probs: [(Element, Double)], | |
rng: RandomNumberGenerator = SystemRandomNumberGenerator() | |
) { | |
precondition(probs.count > 0) | |
let totalP = probs.reduce(0.0, { (r, e) in | |
let (_, p) = e | |
precondition(p > 0.0) | |
return r + p | |
}) | |
let bucketP = totalP / Double(probs.count) | |
// Sort the elements as more or less likely than average | |
var larger = [Int: Double]() | |
var smaller = [Int: Double]() | |
for i in probs.indices { | |
let (_, p) = probs[i] | |
if p >= bucketP { | |
larger[i] = p | |
} else { | |
smaller[i] = p | |
} | |
} | |
var field = [(Element, Double, Element)]() | |
for _ in 0..<probs.count { | |
/* `larger` may be empty during the last iteration due to | |
* roundoff error. In that case, the last value will be in | |
* `smaller` with a probability very close to bucketP. */ | |
let (bigI, bigP) = larger.popFirst() ?? smaller.popFirst()! | |
let (bigE, _) = probs[bigI] | |
// If there are no smaller options, use a 0% chance… | |
let (smallI, smallP) = smaller.popFirst() ?? (-1, 0.0) | |
// …of selecting the big element | |
let (smallE, _) = smallI >= 0 ? probs[smallI] : probs[bigI] | |
// The 0% chance is important to ensure this math is correct | |
let newBigP = bigP - (bucketP - smallP) | |
if newBigP >= bucketP { | |
larger[bigI] = newBigP | |
} else { | |
smaller[bigI] = newBigP | |
} | |
// (choice, % to keep choice, alt) | |
field.append((smallE, smallP / bucketP, bigE)) | |
} | |
assert(field.count == probs.count) | |
assert(larger.count == 0) | |
// `smaller` may contain one element with a near-zero probability | |
assert((0...1).contains(smaller.count)) | |
a = field | |
r = AnyRandomNumberGenerator(rng) | |
} | |
public mutating func next() -> Element { | |
let (choice, pToKeep, alt) = a.randomElement(using: &r)! | |
return Double.random(in: 0.0..<1.0, using: &r) < pToKeep ? choice : alt | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Here's another implementation in Java, and an article that goes into more depth.