Skip to content

Instantly share code, notes, and snippets.

@MichaelSnowden
Last active August 23, 2020 17:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save MichaelSnowden/8c7b0ba0c4963fec6911062a61e5a311 to your computer and use it in GitHub Desktop.
Save MichaelSnowden/8c7b0ba0c4963fec6911062a61e5a311 to your computer and use it in GitHub Desktop.
Reservoir Sampling in Elm
module NonEmpty exposing (..)
type alias NonEmpty a =
{ head : a, tail : List a }
module ReservoirSampling exposing (sampleList, sampleStream)
import NonEmpty exposing (NonEmpty)
import Random
sampleStream : Int -> a -> a -> Random.Generator a
sampleStream numSeen old new =
let
gotRandomInt k =
let
replace =
k == 0
chosen =
if replace then
new
else
old
in
chosen
in
Random.map gotRandomInt (Random.int 0 (numSeen + 1))
sampleListAfter : Int -> NonEmpty a -> Random.Generator a
sampleListAfter numSeen { head, tail } =
case tail of
[] ->
Random.constant head
nextHead :: nextTail ->
let
gotSample sample =
sampleListAfter (numSeen + 1) { head = sample, tail = nextTail }
in
Random.andThen gotSample (sampleStream numSeen head nextHead)
sampleList : NonEmpty a -> Random.Generator a
sampleList =
sampleListAfter 0
module ReservoirSamplingTest exposing (..)
import Dict exposing (Dict)
import Expect
import NonEmpty exposing (NonEmpty)
import Random
import ReservoirSampling
import Test exposing (Test)
exampleTest : Test
exampleTest =
let
input =
{ head = 1, tail = [ 2, 3 ] }
( actual1, _ ) =
Random.step (ReservoirSampling.sampleList input) (Random.initialSeed 0)
( actual2, _ ) =
Random.step (ReservoirSampling.sampleList input) (Random.initialSeed 3)
( actual3, _ ) =
Random.step (ReservoirSampling.sampleList input) (Random.initialSeed 1)
in
Test.describe "exampleTest"
[ Test.test "sample1" <| always <| Expect.equal actual1 1
, Test.test "sample2" <| always <| Expect.equal actual2 2
, Test.test "sample3" <| always <| Expect.equal actual3 3
]
generateSample : Int -> Random.Generator Int
generateSample size =
let
head =
0
tail =
List.range 1 (size - 1)
list =
{ head = head, tail = tail }
in
ReservoirSampling.sampleList list
generateSampleCounts : Int -> Int -> Dict Int Int -> Random.Generator (Dict Int Int)
generateSampleCounts numSamples numBuckets counts =
if numSamples == 0 then
Random.constant counts
else
let
gotFirstSample sample =
let
updateCount maybeCount =
case maybeCount of
Just count ->
Just <| count + 1
Nothing ->
Just 1
newCounts =
Dict.update sample updateCount counts
in
generateSampleCounts (numSamples - 1) numBuckets newCounts
in
generateSample numBuckets |> Random.andThen gotFirstSample
-- Generate a bunch of lists and verify that the sampler samples uniformly from them
randomTest : Test
randomTest =
let
numSamples =
10000
numBuckets =
10
( counts, _ ) =
Random.step (generateSampleCounts numSamples numBuckets Dict.empty) (Random.initialSeed 42)
uniformDistribution =
1.0 / numBuckets
actualDistribution count =
toFloat count / numSamples
toTest : ( Int, Int ) -> Test
toTest ( bucket, count ) =
Test.test ("bucket " ++ String.fromInt bucket ++ " should have proportion ~ 1 / numBuckets") <|
\_ ->
Expect.within (Expect.Relative 0.1) uniformDistribution (actualDistribution count)
tests : List Test
tests =
Dict.toList counts |> List.map toTest
in
Test.describe "randomTest"
[ Test.test ("should have " ++ String.fromInt numBuckets ++ " tests") <| \_ -> Expect.equal numBuckets (List.length tests)
, Test.describe "should be close to uniform" tests
]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment