binarybana/bench.jl

## bench.jl
using StatsBase

randsubseq2(A::AbstractVector, m::Integer) = randsubseq2!(Array(eltype(A),m), A)

function randsubseq2!(S::AbstractVector, A::AbstractArray)
    m = length(S)
    m == 0 && return S # needed for correctness: code below requires m > 0
    n = length(A)
    m == n && return copy!(S, A)
    0 <= m <= n || throw(DomainError())
    i = 0; j = 0
    while true
        if (n - i)*rand() <= m - j
           S[j += 1] = A[i += 1]
           j == m && return S
        else
           i += 1
        end
    end
end

vitter(A::AbstractArray, k::Integer) = vitter!(A,Array(eltype(A),k))

function vitter!(A::AbstractArray, x::AbstractArray)
    N = length(A)
    n = length(x)
    V_prime = exp(log(rand())/n)
    quant1 = N-n+1
    quant2 = quant1/N
    alpha_inverse = 15
    threshold = alpha_inverse * n
    ai=1
    local S
    while n>1 && threshold<N
        while true
            local X
            while true
                X = N * (1.0 - V_prime)
                S = itrunc(X)
                S < quant1 && break
                V_prime = exp(log(rand())/n)
            end
            y = rand()/quant2
            LHS = exp(log(y)/(n-1))
            RHS = ((quant1 - S)/quant1) * (N/(N-X))
            if LHS <= RHS # Accept S
                V_prime = LHS/RHS
                break
            end
            if n-1>S
                bottom = N-n
                limit = N-S
            else
                bottom = N-S-1
                limit = quant1
            end
            for top=N-1:-1:limit
                y *= top/bottom
                bottom = bottom - 1
            end
            if exp(log(y)/(n-1)) <= N/(N-X)
                # Accept S
                V_prime = exp(log(rand())/(n-1))
                break
            end
            V_prime = exp(log(rand())/n)
        end
        # Select (S+1)st record
        N -= S+1
        n -= 1
        x[end-n] = A[ai+=S]

        quant1 -= S
        quant2 = quant1/N
        threshold -= alpha_inverse
    end
    if n>1 # Algo A
        top = N - n
        for n = n-1:-1:2 # FIXME: this n-1 was originally n
            V = rand()
            S = 1
            quot = top/N
            while quot > V
                S = S+1
                top = top-1
                N = N-1
                quot *= top/N
            end
            x[end-n] = A[ai+=S]
            N-=1
        end
    end
    #One left:
    S = itrunc(N*V_prime)
    x[end] = A[ai+=S]
end

function rand_first_index(n, k)
    r = rand()
    p = k/n
    i = 1
    while p < r
        i += 1
        p += (1-p)k/(n-(i-1))
    end
    return i
end

ordered_sample_norep{T}(xs::AbstractArray{T}, k::Integer) = ordered_sample_norep!(xs, Array(T,k))

function ordered_sample_norep!(xs::AbstractArray, target::AbstractArray)
    n = length(xs)
    k = length(target)
    i = 0
    for j in 1:k
        step = rand_first_index(n, k)
        n -= step
        i += step
        target[j] = xs[i]
        k -= 1
    end
    return target
end

function floyd1{T}(a::AbstractArray{T}, m::Integer)
    n = length(a)
    0 <= m <= n || throw(DomainError())
    # Floyd's Õ(m) algorithm from J. Bentley and B. Floyd, "Programming pearls: A sample of brilliance,"
    # Commun. ACM Magazine 30, no. 9, 754--757 (1987).
    # Adapted from https://github.com/JuliaLang/julia/issues/6714#issuecomment-42057923
    #s = IntSet()
    s = Set{Int}()
    x = Array(T, m)

    for (xind,j) = enumerate(n-m+1:n)
        i = rand(1:j)
        ind = i in s ? j : i
        x[xind] = a[ind]
        push!(s, ind)
    end
    x
end

function floyd1p{T}(a::AbstractArray{T}, m::Integer)
    n = length(a)
    0 <= m <= n || throw(DomainError())
    # Floyd's Õ(m) algorithm from J. Bentley and B. Floyd, "Programming pearls: A sample of brilliance,"
    # Commun. ACM Magazine 30, no. 9, 754--757 (1987).
    # Adapted from https://github.com/JuliaLang/julia/issues/6714#issuecomment-42057923
    #s = IntSet()
    s = Set{Int}()
    x = T[]
    sizehint(x, m)

    for j = n-m+1:n
        i = rand(1:j)
        if i in s
            #use j
            push!(x,a[j])
            push!(s,j)
        else
            unshift!(x,a[j])
            push!(s,i)
        end
    end
    x
end

# warmup
sample(1:1000, 100)
floyd1(1:1000, 100)
floyd1p(1:1000, 100)
ordered_sample_norep(1:1000, 100)
vitter(1:1000, 100)

N=10_000
krange = 5:5:200
navg=200

using PyPlot
close("all")

labels=["sample ordered",
        "sample unordered",
        "randsubseq2",
        "vitter",
        "floyd ordered",
        "floyd unordered"]
        #"one-more-min ordered"]

times = {Float64[] for x=1:length(labels)}
for i=krange
    gc_disable()
    meas = [median([@elapsed sample(1:N, i, replace=false, ordered=true) for x=1:navg]),
            median([@elapsed sample(1:N, i, replace=false) for x=1:navg]),
            median([@elapsed randsubseq2(1:N, i) for x=1:navg]),
            median([@elapsed vitter(1:N, i) for x=1:navg]),
            median([@elapsed floyd1(1:N, i) for x=1:navg]),
            median([@elapsed floyd1p(1:N, i) for x=1:navg])]
            #median([@elapsed ordered_sample_norep(1:N, i) for x=1:10])]
    gc_enable()
    gc()

    [push!(x,t) for (x,t) in zip(times,meas)]
end

[plot([krange],t,label=l,linewidth=5, alpha=0.7) for (t,l) in zip(times,labels)]
legend(loc="best")
title("From $(1:N), sample ...")
	using StatsBase

	randsubseq2(A::AbstractVector, m::Integer) = randsubseq2!(Array(eltype(A),m), A)

	function randsubseq2!(S::AbstractVector, A::AbstractArray)
	m = length(S)
	m == 0 && return S # needed for correctness: code below requires m > 0
	n = length(A)
	m == n && return copy!(S, A)
	0 <= m <= n \|\| throw(DomainError())
	i = 0; j = 0
	while true
	if (n - i)*rand() <= m - j
	S[j += 1] = A[i += 1]
	j == m && return S
	else
	i += 1
	end
	end
	end

	vitter(A::AbstractArray, k::Integer) = vitter!(A,Array(eltype(A),k))

	function vitter!(A::AbstractArray, x::AbstractArray)
	N = length(A)
	n = length(x)
	V_prime = exp(log(rand())/n)
	quant1 = N-n+1
	quant2 = quant1/N
	alpha_inverse = 15
	threshold = alpha_inverse * n
	ai=1
	local S
	while n>1 && threshold<N
	while true
	local X
	while true
	X = N * (1.0 - V_prime)
	S = itrunc(X)
	S < quant1 && break
	V_prime = exp(log(rand())/n)
	end
	y = rand()/quant2
	LHS = exp(log(y)/(n-1))
	RHS = ((quant1 - S)/quant1) * (N/(N-X))
	if LHS <= RHS # Accept S
	V_prime = LHS/RHS
	break
	end
	if n-1>S
	bottom = N-n
	limit = N-S
	else
	bottom = N-S-1
	limit = quant1
	end
	for top=N-1:-1:limit
	y *= top/bottom
	bottom = bottom - 1
	end
	if exp(log(y)/(n-1)) <= N/(N-X)
	# Accept S
	V_prime = exp(log(rand())/(n-1))
	break
	end
	V_prime = exp(log(rand())/n)
	end
	# Select (S+1)st record
	N -= S+1
	n -= 1
	x[end-n] = A[ai+=S]

	quant1 -= S
	quant2 = quant1/N
	threshold -= alpha_inverse
	end
	if n>1 # Algo A
	top = N - n
	for n = n-1:-1:2 # FIXME: this n-1 was originally n
	V = rand()
	S = 1
	quot = top/N
	while quot > V
	S = S+1
	top = top-1
	N = N-1
	quot *= top/N
	end
	x[end-n] = A[ai+=S]
	N-=1
	end
	end
	#One left:
	S = itrunc(N*V_prime)
	x[end] = A[ai+=S]
	end

	function rand_first_index(n, k)
	r = rand()
	p = k/n
	i = 1
	while p < r
	i += 1
	p += (1-p)k/(n-(i-1))
	end
	return i
	end

	ordered_sample_norep{T}(xs::AbstractArray{T}, k::Integer) = ordered_sample_norep!(xs, Array(T,k))

	function ordered_sample_norep!(xs::AbstractArray, target::AbstractArray)
	n = length(xs)
	k = length(target)
	i = 0
	for j in 1:k
	step = rand_first_index(n, k)
	n -= step
	i += step
	target[j] = xs[i]
	k -= 1
	end
	return target
	end

	function floyd1{T}(a::AbstractArray{T}, m::Integer)
	n = length(a)
	0 <= m <= n \|\| throw(DomainError())
	# Floyd's Õ(m) algorithm from J. Bentley and B. Floyd, "Programming pearls: A sample of brilliance,"
	# Commun. ACM Magazine 30, no. 9, 754--757 (1987).
	# Adapted from https://github.com/JuliaLang/julia/issues/6714#issuecomment-42057923
	#s = IntSet()
	s = Set{Int}()
	x = Array(T, m)

	for (xind,j) = enumerate(n-m+1:n)
	i = rand(1:j)
	ind = i in s ? j : i
	x[xind] = a[ind]
	push!(s, ind)
	end
	x
	end

	function floyd1p{T}(a::AbstractArray{T}, m::Integer)
	n = length(a)
	0 <= m <= n \|\| throw(DomainError())
	# Floyd's Õ(m) algorithm from J. Bentley and B. Floyd, "Programming pearls: A sample of brilliance,"
	# Commun. ACM Magazine 30, no. 9, 754--757 (1987).
	# Adapted from https://github.com/JuliaLang/julia/issues/6714#issuecomment-42057923
	#s = IntSet()
	s = Set{Int}()
	x = T[]
	sizehint(x, m)

	for j = n-m+1:n
	i = rand(1:j)
	if i in s
	#use j
	push!(x,a[j])
	push!(s,j)
	else
	unshift!(x,a[j])
	push!(s,i)
	end
	end
	x
	end

	# warmup
	sample(1:1000, 100)
	floyd1(1:1000, 100)
	floyd1p(1:1000, 100)
	ordered_sample_norep(1:1000, 100)
	vitter(1:1000, 100)

	N=10_000
	krange = 5:5:200
	navg=200

	using PyPlot
	close("all")

	labels=["sample ordered",
	"sample unordered",
	"randsubseq2",
	"vitter",
	"floyd ordered",
	"floyd unordered"]
	#"one-more-min ordered"]

	times = {Float64[] for x=1:length(labels)}
	for i=krange
	gc_disable()
	meas = [median([@elapsed sample(1:N, i, replace=false, ordered=true) for x=1:navg]),
	median([@elapsed sample(1:N, i, replace=false) for x=1:navg]),
	median([@elapsed randsubseq2(1:N, i) for x=1:navg]),
	median([@elapsed vitter(1:N, i) for x=1:navg]),
	median([@elapsed floyd1(1:N, i) for x=1:navg]),
	median([@elapsed floyd1p(1:N, i) for x=1:navg])]
	#median([@elapsed ordered_sample_norep(1:N, i) for x=1:10])]
	gc_enable()
	gc()

	[push!(x,t) for (x,t) in zip(times,meas)]
	end

	[plot([krange],t,label=l,linewidth=5, alpha=0.7) for (t,l) in zip(times,labels)]
	legend(loc="best")
	title("From $(1:N), sample ...")