carlobaldassi/bitarray_perf_comparison.txt

## bitarray_perf_comparison.txt
b = BitArray(10_000_017); fill!(b, true)
2x4 DataFrame:
         Function  Elapsed Relative Replications
[1,]    "oldfill" 0.703632    3.394         1000
[2,]    "newfill" 0.207316      1.0         1000

b1 = randbool(10_000_117); b2 = randbool(10_000_017); copy!(b1, b2)
2x4 DataFrame:
         Function  Elapsed Relative Replications
[1,]    "oldcopy" 0.834918  2.37655         1000
[2,]    "newcopy" 0.351316      1.0         1000

b = randbool(10_000_117); convert(Array{Int}, b)
2x4 DataFrame:
            Function Elapsed Relative Replications
[1,]    "oldconvert" 11.2007  7.20337           10
[2,]    "newconvert" 1.55492      1.0           10

a = rand(0:1, 1_000_017); convert(BitArray, a)
2x4 DataFrame:
            Function Elapsed Relative Replications
[1,]    "oldconvert"  2.0255  1.58136           20
[2,]    "newconvert" 1.28086      1.0           20

a = {rand(0:1) for i = 1:1_017}; convert(BitArray, a)
2x4 DataFrame:
            Function  Elapsed Relative Replications
[1,]    "oldconvert" 0.320322  1.16844           10
[2,]    "newconvert" 0.274146      1.0           10

b = BitArray(10_000_017); bitarray_rand_fill!(b)
2x4 DataFrame:
             Function Elapsed Relative Replications
[1,]    "oldrandfill" 2.39907  1.20137         1000
[2,]    "newrandfill" 1.99694      1.0         1000

b = randbool(10_000); I = rand(1:10_000, 10_000); b[I]
2x4 DataFrame:
             Function  Elapsed Relative Replications
[1,]    "oldgetindex"  1.43322  6.71223         1000
[2,]    "newgetindex" 0.213524      1.0         1000

b = randbool(100, 100); I1 = rand(1:100, 100); I2 = rand(1:100, 100); b[I1,I2]
2x4 DataFrame:
             Function Elapsed Relative Replications
[1,]    "oldgetindex"  1.9024  1.10048         1000
[2,]    "newgetindex" 1.72869      1.0         1000

b = randbool(1_000_000); I = randbool(1_000_000); b[I]
2x4 DataFrame:
             Function  Elapsed Relative Replications
[1,]    "oldgetindex"  1.18848  2.19207           10
[2,]    "newgetindex" 0.542172      1.0           10

b = randbool(1_000_000); I = bitunpack(randbool(1_000_000)); b[I]
2x4 DataFrame:
             Function  Elapsed Relative Replications
[1,]    "oldgetindex" 0.704441  4.51643           10
[2,]    "newgetindex" 0.155973      1.0           10

b = randbool(1_000_017); I = randbool(1_000_017); b[I] = true
2x4 DataFrame:
             Function Elapsed Relative Replications
[1,]    "oldsetindex" 2.86153      1.0           50
[2,]    "newsetindex" 2.90139  1.01393           50

b = randbool(1_000_017); I = bitunpack(randbool(1_000_017)); b[I] = true
2x4 DataFrame:
             Function Elapsed Relative Replications
[1,]    "oldsetindex"  10.048  1.04502          100
[2,]    "newsetindex" 9.61514      1.0          100

b = randbool(1_000_000); I = randbool(1_000_000); X = randbool(nnz(I)); b[I] = X
2x4 DataFrame:
             Function  Elapsed Relative Replications
[1,]    "oldsetindex" 0.806377      1.0           10
[2,]    "newsetindex" 0.879788  1.09104           10

b = randbool(1_000_000); I = bitunpack(randbool(1_000_000)); X = randbool(nnz(I)); b[I] = X
2x4 DataFrame:
             Function  Elapsed Relative Replications
[1,]    "oldsetindex" 0.419165  1.26231           10
[2,]    "newsetindex" 0.332061      1.0           10

b = randbool(1_000_000); I = bitunpack(randbool(1_000_000)); X = bitunpack(randbool(nnz(I))); b[I] = X
2x4 DataFrame:
             Function  Elapsed Relative Replications
[1,]    "oldsetindex" 0.123026  1.02952           10
[2,]    "newsetindex" 0.119498      1.0           10

b = randbool(1_000_000); -b
2x4 DataFrame:
               Function Elapsed Relative Replications
[1,]    "oldunaryminus" 4.38952  1.46101          200
[2,]    "newunaryminus" 3.00445      1.0          200

b = randbool(1_000_017); ~b
2x4 DataFrame:
           Function Elapsed Relative Replications
[1,]    "oldbitnot" 18.0818  9.30296        10000
[2,]    "newbitnot" 1.94367      1.0        10000

b = randbool(1_000_017); flipbits!(b)
2x4 DataFrame:
             Function Elapsed Relative Replications
[1,]    "oldflipbits" 6.47089  3.20978       100000
[2,]    "newflipbits" 2.01599      1.0       100000

b = randbool(1_000_017); x = 2; (div(b,x),mod(b,x))
2x4 DataFrame:
           Function Elapsed Relative Replications
[1,]    "olddivmod" 6.25411   4.4016           20
[2,]    "newdivmod" 1.42087      1.0           20

b = randbool(1_000_017); x = 2.0; (div(b,x),mod(b,x))
2x4 DataFrame:
           Function Elapsed Relative Replications
[1,]    "olddivmod" 13.6185  7.50146           40
[2,]    "newdivmod" 1.81545      1.0           40

b1 = randbool(1_000_017); b2 = randbool(1_000_017; (b1&b2, b1|b2, b1$b2)
2x4 DataFrame:
             Function  Elapsed Relative Replications
[1,]    "oldandorxor"  8.56246  18.8855         1000
[2,]    "newandorxor" 0.453389      1.0         1000

b1 = randbool(1_000_017); b2 = randbool(1_000_017); b1 .^ b2
2x4 DataFrame:
        Function  Elapsed Relative Replications
[1,]    "oldpow"  2.80608  16.3882         1000
[2,]    "newpow" 0.171226      1.0         1000

b = randbool(1_000_017); x = -2.0; b .^ x
2x4 DataFrame:
        Function  Elapsed Relative Replications
[1,]    "oldpow"  1.67773  5.53508           10
[2,]    "newpow" 0.303108      1.0           10

b = randbool(1_000_017); a = rand(0:5, 1_000_017); b .^ a
2x4 DataFrame:
        Function  Elapsed Relative Replications
[1,]    "oldpow"  1.65294  2.11172           20
[2,]    "newpow" 0.782746      1.0           20

a = rand(1_000_017); x = 0.5; a .< x
2x4 DataFrame:
        Function  Elapsed Relative Replications
[1,]     "oldlt"   1.2469  1.61651          200
[2,]     "newlt" 0.771353      1.0          200

a = rand(1_000_017); b = rand(1_000_017); a .< b
2x4 DataFrame:
        Function  Elapsed Relative Replications
[1,]     "oldlt"  1.56158  1.69547          200
[2,]     "newlt" 0.921032      1.0          200

b = [falses(100_000_000), randbool(17)]; (findnext(b, 1), findnext(b, 100_000_010))
2x4 DataFrame:
             Function Elapsed Relative Replications
[1,]    "oldfindnext" 3.23528  1.26685         1000
[2,]    "newfindnext" 2.55381      1.0         1000

b = [trues(100_000_000), randbool(17)]; (findnextnot(b, 1), findnextnot(b, 100_000_010))
2x4 DataFrame:
                Function Elapsed Relative Replications
[1,]    "oldfindnextnot" 3.98593  1.47118         1000
[2,]    "newfindnextnot" 2.70935      1.0         1000

b = randbool(1_000_017)]; find(b)
2x4 DataFrame:
         Function Elapsed Relative Replications
[1,]    "oldfind" 5.98756  10.8875          100
[2,]    "newfind" 0.54995      1.0          100

b = [trues(9_999_900), randbool(117)]; all(b)
2x4 DataFrame:
        Function Elapsed Relative Replications
[1,]    "oldall" 3.38011  2.52766        10000
[2,]    "newall" 1.33725      1.0        10000

b = [trues(9_999_900), randbool(117)]; any(b)
2x4 DataFrame:
        Function Elapsed Relative Replications
[1,]    "oldany" 2.70628   1.6008        10000
[2,]    "newany" 1.69058      1.0        10000

## bitperf.jl
module BitPerf

using Benchmark

const _msk64 = ~uint64(0)
macro _mskr(l) :(_msk64 >>> (64-$(esc(l)))) end
macro _div64(l) :($(esc(l)) >>> 6) end
macro _mod64(l) :($(esc(l)) & 63) end
macro _msk_end(l) :(@_mskr @_mod64 $(esc(l))) end
num_bit_chunks(n::Int) = @_div64 (n+63)
const bitcache_chunks = 1024 # this can be changed
const bitcache_size = 64 * bitcache_chunks # do not change this

import Base.getindex_unchecked, Base.setindex_unchecked,
       Base.get_chunks_id, Base.bitarray_rand_fill!, Base.to_index,
       Base.indices, Base.gen_cartesian_map,
       Base.promote_array_type, Base.dumpbitcache,
       Base.findnextnot

#function getindex_unchecked(pBc::Ptr{Uint64}, i::Integer)
    #i1, i2 = get_chunks_id(i)
    #u = uint64(1)
    #return (unsafe_load(pBc, i1) >>> i2) & u == u
#end
#
#function setindex_unchecked(pBc::Ptr{Uint64}, x, i::Integer)
    #x = convert(Bool, x)
    #i1, i2 = get_chunks_id(i)
    #u = uint64(1) << i2
    #c = unsafe_load(pBc, i1)
    #if x
        #c |= u
    #else
        #c &= ~u
    #end
    #unsafe_store!(pBc, c, i1)
#end

# this is unsafe in 2 ways:
#  1) no boundary checking
#  2) calling functions must keep a reference to the
#     original Array to avoid garbage collection
#     (e.g. A=UnsafeArray(A) is dangerous)
# it must be used with care!
immutable UnsafeArray{T} <: AbstractArray
    p::Ptr{T}
    l::Int
    function UnsafeArray(A::Array{T})
        new(pointer(A), length(A))
    end
end

UnsafeArray{T}(A::Array{T}) = UnsafeArray{T}(A)

Base.getindex(uA::UnsafeArray, i::Int) = unsafe_load(uA.p, i)
Base.setindex!{T}(uA::UnsafeArray{T}, x, i::Int) = unsafe_store!(uA.p, x, i)
Base.setindex!{T}(uA::UnsafeArray{T}, x, r::Range1) = for i in r unsafe_store!(uA.p, x, i); end
Base.length(uA::UnsafeArray) = uA.l

Base.start(uA::UnsafeArray) = 1
Base.next(uA::UnsafeArray, ind::Int) = (uA[ind], ind+1)
Base.done(uA::UnsafeArray, ind::Int) = ind > length(uA)

# see comments on UsafeArray
immutable UnsafeBitArray <: AbstractArray
    p::Ptr{Uint64}
    l::Int
    function UnsafeBitArray(B::BitArray)
        new(pointer(B.chunks), length(B))
    end
end

Base.getindex(uB::UnsafeBitArray, i::Int) = getindex_unchecked(uB.p, i)
Base.setindex!(uB::UnsafeBitArray, x, i::Int) = setindex_unchecked(uB.p, x, i)
Base.length(uB::UnsafeBitArray) = uB.l

unsafe{T}(A::Array{T}) = UnsafeArray(A)
unsafe(B::BitArray) = UnsafeBitArray(B)

unsafechunks(B::BitArray) = unsafe(B.chunks)

function new_fill!(B::BitArray, x)
    y = convert(Bool, x)
    Bc = unsafechunks(B)
    if !y
        for i = 1:length(Bc)
            Bc[i] = 0
        end
    else
        lB = length(B)
        if lB == 0
            return B
        end
        lBc = length(Bc)
        for i = 1:lBc-1
            Bc[i] = _msk64
        end
        Bc[end] = @_msk_end length(B)
    end
    return B
end

function dotest_fill()
    const b = BitArray(10_000_017)

    oldfill() = fill!(b, true)
    newfill() = new_fill!(b, true)

    @assert oldfill().chunks == newfill().chunks

    println("b = BitArray(10_000_017); fill!(b, true)")
    println(compare([oldfill, newfill], 1000))
end


function new_copy!(dest::BitArray, src::BitArray)
    destc = unsafechunks(dest)
    srcc = unsafechunks(src)
    nc_d = length(destc)
    nc_s = length(srcc)
    nc = min(nc_s, nc_d)
    if nc == 0
        return dest
    end
    for i = 1:nc-1
        destc[i] = srcc[i]
    end
    if length(src) >= length(dest)
        destc[nc] = srcc[nc]
    else
        msk_s = @_msk_end length(src)
        msk_d = ~msk_s
        destc[nc] = (msk_d & destc[nc]) | (msk_s & srcc[nc])
    end
    return dest
end

function dotest_copy()
    const b1 = randbool(10_000_117)
    const b2 = randbool(10_000_017)

    const c1 = copy(b1)
    const c2 = copy(b2)

    oldcopy() = copy!(b1, b2)
    newcopy() = new_copy!(c1, c2)

    @assert oldcopy().chunks == newcopy().chunks

    println("b1 = randbool(10_000_117); b2 = randbool(10_000_017); copy!(b1, b2)")
    println(compare([oldcopy, newcopy], 1000))
end

function new_convert{T,N}(::Type{Array{T,N}}, B::BitArray{N})
    A = Array(T, size(B))
    uB = unsafe(B)
    if isbits(T)
        uA = UnsafeArray(A)
        for i = 1:length(A)
            uA[i] = uB[i]
        end
    else
        for i = 1:length(A)
            A[i] = uB[i]
        end
    end
    return A
end

function dotest_convert1()
    const b = randbool(10_000_117)

    oldconvert() = convert(Array{Int}, b)
    newconvert() = new_convert(Array{Int}, b)

    @assert oldconvert() == newconvert()

    println("b = randbool(10_000_117); convert(Array{Int}, b)")
    println(compare([oldconvert, newconvert], 10))
end

function dotest_convert2()
    const b = randbool(100_117)

    oldconvert() = convert(Array{BigInt}, b)
    newconvert() = new_convert(Array{BigInt}, b)

    @assert oldconvert() == newconvert()

    println("b = randbool(10_000_117); convert(Array{BigInt}, b)")
    println(compare([oldconvert, newconvert], 100))
end

macro convert_to_bitarray(Bc, A, l)
    quote
        ind = 1
        for i = 1:length($(esc(Bc)))-1
            u = uint64(1)
            c = uint64(0)
            for j = 1:64
                if bool($(esc(A))[ind])
                    c |= u
                end
                ind += 1
                u <<= 1
            end
            $(esc(Bc))[i] = c
        end
        u = uint64(1)
        c = uint64(0)
        for j = 0:@_mod64($(esc(l))-1)
            if bool($(esc(A))[ind])
                c |= u
            end
            ind += 1
            u <<= 1
        end
        $(esc(Bc))[end] = c
    end
end

function convert_to_bitarray_frombits(Bc::UnsafeArray{Uint64}, A::Array, l::Int)
    uA = unsafe(A)
    @convert_to_bitarray(Bc, uA, l)
end

function convert_to_bitarray_fromnonbits(Bc::UnsafeArray{Uint64}, A::Array, l::Int)
    @convert_to_bitarray(Bc, A, l)
end

new_convert{T,N}(::Type{BitArray}, A::AbstractArray{T,N}) = new_convert(BitArray{N},A)
function new_convert{T,N}(::Type{BitArray{N}}, A::AbstractArray{T,N})
    B = BitArray(size(A))
    l = length(B)
    if l == 0
        return B
    end
    Bc = unsafechunks(B)
    if isa(A, Array) && isbits(T)
        convert_to_bitarray_frombits(Bc, A, l)
    else
        convert_to_bitarray_fromnonbits(Bc, A, l)
    end
    return B
end

function dotest_convert3()
    const a = rand(0:1, 10_000_017)

    oldconvert() = convert(BitArray, a)
    newconvert() = new_convert(BitArray, a)

    @assert oldconvert() == newconvert()

    println("a = rand(0:1, 1_000_017); convert(BitArray, a)")
    println(compare([oldconvert, newconvert], 20))
end

function dotest_convert4()
    const a = {rand(0:1) for i = 1:1_000_017}

    oldconvert() = convert(BitArray, a)
    newconvert() = new_convert(BitArray, a)

    @assert oldconvert() == newconvert()

    println("a = {rand(0:1) for i = 1:1_017}; convert(BitArray, a)")
    println(compare([oldconvert, newconvert], 10))
end

function new_bitarray_rand_fill!(B::BitArray)
    if length(B) == 0
        return B
    end
    Bc = unsafechunks(B)
    for i = 1:length(Bc)-1
        Bc[i] = rand(Uint64)
    end
    msk = @_msk_end length(B)
    Bc[end] = msk & rand(Uint64)
    return B
end

function dotest_randfill()
    const b = BitArray(10_000_017)

    oldrandfill() = (srand(1); bitarray_rand_fill!(b))
    newrandfill() = (srand(1); new_bitarray_rand_fill!(b))

    @assert oldrandfill().chunks == newrandfill().chunks

    println("b = BitArray(10_000_017); bitarray_rand_fill!(b)")
    println(compare([oldrandfill, newrandfill], 1000))
end

function new_getindex{T<:Real}(B::BitArray, I::AbstractVector{T})
    X = BitArray(length(I))
    lB = length(B)
    uX = UnsafeBitArray(X) # this is faster than unsafe(X)
    uB = unsafe(B)
    ind = 1
    for i in I
        # faster X[ind] = B[i]
        i = to_index(i)
        if i < 1 || i > lB
            throw(BoundsError())
        end
        uX[ind] = uB[i]
        ind += 1
    end
    return X
end

function dotest_getindex1()
    const I = rand(1:10_000, 10_000)
    const b = randbool(10_000)

    oldgetindex() = getindex(b, I)
    newgetindex() = new_getindex(b, I)

    @assert oldgetindex().chunks == newgetindex().chunks

    println("b = randbool(10_000); I = rand(1:10_000, 10_000); b[I]")
    println(compare([oldgetindex, newgetindex], 1000))
end

let getindex_cache = nothing
    global new_getindex
    function new_getindex(B::BitArray, I::Union(Real,AbstractVector)...)
        I = indices(I)
        X = BitArray(index_shape(I...))
        uX = unsafe(X)

        if is(getindex_cache,nothing)
            getindex_cache = Dict()
        end
        gen_cartesian_map(getindex_cache, ivars -> quote
                uX[ind] = B[$(ivars...)]
                ind += 1
            end, I, (:B, :uX, :ind), B, uX, 1)
        return X
    end
end

function dotest_getindex2()
    const I1 = rand(1:100, 100)
    const I2 = rand(1:100, 100)
    const b = randbool(100, 100)

    oldgetindex() = getindex(b, I1, I2)
    newgetindex() = new_getindex(b, I1, I2)

    @assert oldgetindex().chunks == newgetindex().chunks

    println("b = randbool(100, 100); I1 = rand(1:100, 100); I2 = rand(1:100, 100); b[I1,I2]")
    println(compare([oldgetindex, newgetindex], 1000))
end

function new_getindex_bool_1d(B::BitArray, I::AbstractArray{Bool})
    n = sum(I)
    X = BitArray(n)
    lI = length(I)
    if lI != length(B)
        throw(BoundsError())
    end
    uX = UnsafeBitArray(X) # this is faster than unsafe(X)
    uB = unsafe(B)
    ind = 1
    for i = 1:length(I)
        if I[i]
            uX[ind] = uB[i]
            ind += 1
        end
    end
    return X
end

new_getindex(B::BitVector, I::AbstractVector{Bool}) = new_getindex_bool_1d(B, I)

function dotest_getindex3()
    const I = randbool(1_000_000)
    const b = randbool(1_000_000)

    oldgetindex() = getindex(b, I)
    newgetindex() = new_getindex(b, I)

    @assert oldgetindex().chunks == newgetindex().chunks

    println("b = randbool(1_000_000); I = randbool(1_000_000); b[I]")
    println(compare([oldgetindex, newgetindex], 10))
end

function dotest_getindex4()
    const I = bitunpack(randbool(1_000_000))
    const b = randbool(1_000_000)

    oldgetindex() = getindex(b, I)
    newgetindex() = new_getindex(b, I)

    @assert oldgetindex().chunks == newgetindex().chunks

    println("b = randbool(1_000_000); I = bitunpack(randbool(1_000_000)); b[I]")
    println(compare([oldgetindex, newgetindex], 10))
end

function new_setindex_bool_scalar_1d(A::BitArray, x, I::AbstractArray{Bool})
    if length(I) > length(A)
        throw(BoundsError())
    end
    x = bool(x)
    uA = unsafe(A)
    for i = 1:length(I)
        if I[i]
            uA[i] = x
        end
    end
    A
end

new_setindex!(A::BitArray, x, I::AbstractVector{Bool}) = new_setindex_bool_scalar_1d(A, x, I)

function dotest_setindex1()
    const I = randbool(1_000_017)
    const b = randbool(1_000_017)

    oldsetindex() = setindex!(b, true, I)
    newsetindex() = new_setindex!(b, true, I)

    @assert oldsetindex().chunks == newsetindex().chunks

    println("b = randbool(1_000_017); I = randbool(1_000_017); b[I] = true")
    println(compare([oldsetindex, newsetindex], 50))
end

function dotest_setindex2()
    const I = bitunpack(randbool(1_000_0017))
    const b = randbool(1_000_0017)

    oldsetindex() = setindex!(b, true, I)
    newsetindex() = new_setindex!(b, true, I)

    @assert oldsetindex().chunks == newsetindex().chunks

    println("b = randbool(1_000_017); I = bitunpack(randbool(1_000_017)); b[I] = true")
    println(compare([oldsetindex, newsetindex], 100))
end

function new_setindex_bool_vector_1d(A::BitArray, X::AbstractArray, I::AbstractArray{Bool})
    if length(I) > length(A)
        throw(BoundsError())
    end
    uA = unsafe(A)
    c = 1
    for i = 1:length(I)
        if I[i]
            uA[i] = X[c]
            c += 1
        end
    end
    A
end

new_setindex!(A::BitArray, X::AbstractArray, I::AbstractVector{Bool}) = new_setindex_bool_vector_1d(A, X, I)

function dotest_setindex3()
    const I = randbool(1_000_000)
    const b = randbool(1_000_000)
    const X = randbool(nnz(I))

    oldsetindex() = setindex!(b, X, I)
    newsetindex() = new_setindex!(b, X, I)

    @assert oldsetindex().chunks == newsetindex().chunks

    println("b = randbool(1_000_000); I = randbool(1_000_000); X = randbool(nnz(I)); b[I] = X")
    println(compare([oldsetindex, newsetindex], 10))
end

function dotest_setindex4()
    const I = bitunpack(randbool(1_000_000))
    const b = randbool(1_000_000)
    const X = randbool(nnz(I))

    oldsetindex() = setindex!(b, X, I)
    newsetindex() = new_setindex!(b, X, I)

    @assert oldsetindex().chunks == newsetindex().chunks

    println("b = randbool(1_000_000); I = bitunpack(randbool(1_000_000)); X = randbool(nnz(I)); b[I] = X")
    println(compare([oldsetindex, newsetindex], 10))
end

function dotest_setindex5()
    const I = bitunpack(randbool(1_000_000))
    const b = randbool(1_000_000)
    const X = bitunpack(randbool(nnz(I)))

    oldsetindex() = setindex!(b, X, I)
    newsetindex() = new_setindex!(b, X, I)

    @assert oldsetindex().chunks == newsetindex().chunks

    println("b = randbool(1_000_000); I = bitunpack(randbool(1_000_000)); X = bitunpack(randbool(nnz(I))); b[I] = X")
    println(compare([oldsetindex, newsetindex], 10))
end

function new_unary_minus(B::BitArray)
    A = zeros(Int, size(B))
    l = length(B)
    if l == 0
        return A
    end
    Bc = unsafechunks(B)
    uA = unsafe(A)
    ind = 1
    for i = 1:length(Bc)-1
        u = uint64(1)
        c = Bc[i]
        for j = 1:64
            if c & u != 0
                uA[ind] = -1
            end
            ind += 1
            u <<= 1
        end
    end
    u = uint64(1)
    c = Bc[end]
    for j = 0:@_mod64(l-1)
        if c & u != 0
            uA[ind] = -1
        end
        ind += 1
        u <<= 1
    end
    return A
end

function dotest_unaryminus()
    const b = randbool(1_000_000)

    oldunaryminus() = -b
    newunaryminus() = new_unary_minus(b)

    @assert oldunaryminus() == newunaryminus()

    println("b = randbool(1_000_000); -b")
    println(compare([oldunaryminus, newunaryminus], 200))
end

function new_bitnot(B::BitArray)
    C = similar(B)
    if isempty(B)
        return C
    end
    Bc = unsafechunks(B)
    Cc = unsafechunks(C)
    for i = 1:length(Bc)-1
        Cc[i] = ~Bc[i]
    end
    msk = @_msk_end length(B)
    Cc[end] = msk & ~Bc[end]
    return C
end

function dotest_bitnot()
    const b = randbool(1_000_017)

    oldbitnot() = ~b
    newbitnot() = new_bitnot(b)

    @assert oldbitnot().chunks == newbitnot().chunks

    println("b = randbool(1_000_017); ~b")
    println(compare([oldbitnot, newbitnot], 10000))
end

function new_flipbits!(B::BitArray)
    Bc = unsafechunks(B)
    if !isempty(Bc)
        for i = 1:length(Bc)-1
            Bc[i] = ~Bc[i]
        end
        msk = @_msk_end length(B)
        Bc[end] = msk & ~Bc[end]
    end
    return B
end

function dotest_flipbits()
    const b = randbool(1_000_017)
    const c = copy(b)

    oldflipbits() = flipbits!(b)
    newflipbits() = new_flipbits!(c)

    @assert oldflipbits().chunks == newflipbits().chunks

    println("b = randbool(1_000_017); flipbits!(b)")
    println(compare([oldflipbits, newflipbits], 100000))
end

for (nf,fb,f) in ((:new_div,:unsafe_div,:div),
                  (:new_mod,:unsafe_mod,:mod))
    @eval begin
        function ($fb)(uF::UnsafeArray, uB::UnsafeBitArray, x::Number)
            for i = 1:length(uF)
                uF[i] = ($f)(uB[i], x)
            end
        end
        function ($fb)(F::Array, uB::UnsafeBitArray, x::Number)
            for i = 1:length(F)
                F[i] = ($f)(uB[i], x)
            end
        end
        function ($nf)(B::BitArray, x::Number)
            T = promote_array_type(typeof(x), Bool)
            F = Array(T, size(B))
            uB = unsafe(B)
            uF = isbits(T) ? unsafe(F) : F
            ($fb)(uF, uB, x)
            return F
        end
    end
end

function dotest_divmod1()
    const b = randbool(1_000_017)
    const x = 2

    olddivmod() = (div(b, x), mod(b, x))
    newdivmod() = (new_div(b, x), new_mod(b, x))

    @assert olddivmod() == newdivmod()

    println("b = randbool(1_000_017); x = 2; (div(b,x),mod(b,x))")
    println(compare([olddivmod, newdivmod], 20))
end

function dotest_divmod2()
    const b = randbool(1_000_017)
    const x = 2.0

    olddivmod() = (div(b, x), mod(b, x))
    newdivmod() = (new_div(b, x), new_mod(b, x))

    @assert olddivmod() == newdivmod()

    println("b = randbool(1_000_017); x = 2.0; (div(b,x),mod(b,x))")
    println(compare([olddivmod, newdivmod], 40))
end

function dotest_divmod3()
    const b = randbool(40_017)
    const x = BigFloat(2.0)

    olddivmod() = (div(b, x), mod(b, x))
    newdivmod() = (new_div(b, x), new_mod(b, x))

    @assert olddivmod() == newdivmod()

    println("b = randbool(40_017); x = BigFloat(2.0); (div(b,x),mod(b,x))")
    println(compare([olddivmod, newdivmod], 10))
end

for (nf,f) in ((:new_and,:&), (:new_or,:|), (:new_xor,:$))
    @eval begin
        function ($nf)(A::BitArray, B::BitArray)
            F = BitArray(promote_shape(size(A),size(B))...)
            Ac = unsafechunks(A)
            Bc = unsafechunks(B)
            if !isempty(Ac) && !isempty(Bc)
                Fc = unsafechunks(F)
                for i = 1:length(Fc) - 1
                    Fc[i] = ($f)(Ac[i], Bc[i])
                end
                msk = @_msk_end length(F)
                Fc[end] = msk & ($f)(Ac[end], Bc[end])
            end
            return F
        end
    end
end

function dotest_andorxor()
    const b1 = randbool(1_000_017)
    const b2 = randbool(1_000_017)

    oldandorxor() = (b1 & b2, b1 | b2, b1 $ b2)
    newandorxor() = (new_and(b1,b2), new_or(b1,b2), new_xor(b1,b2))

    @assert oldandorxor() == newandorxor()

    println("b1 = randbool(1_000_017); b2 = randbool(1_000_017; (b1&b2, b1|b2, b1\$b2)")
    println(compare([oldandorxor, newandorxor], 1000))
end

function new_pow(A::BitArray, B::BitArray)
    F = BitArray(promote_shape(size(A),size(B))...)
    Ac = unsafechunks(A)
    Bc = unsafechunks(B)
    if !isempty(Ac) && !isempty(Bc)
        Fc = unsafechunks(F)
        for i = 1:length(Fc) - 1
            Fc[i] = Ac[i] | ~Bc[i]
        end
        msk = @_msk_end length(F)
        Fc[end] = msk & (Ac[end] | ~Bc[end])
    end
    return F
end

function dotest_pow1()
    const b1 = randbool(1_000_017)
    const b2 = randbool(1_000_017)

    oldpow() = b1 .^ b2
    newpow() = new_pow(b1,b2)

    @assert oldpow().chunks == newpow().chunks

    println("b1 = randbool(1_000_017); b2 = randbool(1_000_017); b1 .^ b2")
    println(compare([oldpow, newpow], 1000))
end

macro unsafe_pow(F, uB, u, z, uerr, zerr)
    quote
        for i = 1:length($(esc(uB)))
            if $(esc(uB))[i]
                if uerr == nothing
                    $(esc(F))[i] = $(esc(u))
                else
                    throw(uerr)
                end
            else
                if zerr == nothing
                    $(esc(F))[i] = $(esc(z))
                else
                    throw(zerr)
                end
            end
        end
    end
end

unsafe_pow(uF::UnsafeArray, uB::UnsafeBitArray, u, z, uerr, zerr) = @unsafe_pow(uF, uB, u, z, uerr, zerr)
unsafe_pow(F::Array, uB::UnsafeBitArray, u, z, uerr, zerr) = @unsafe_pow(F, uB, u, z, uerr, zerr)

function new_pow{T<:Number}(B::BitArray, x::T)
    if x == 0
        return ones(typeof(true ^ x), size(B))
    elseif T <: Real && x > 0
        return convert(Array{T}, B)
    else
        z = nothing
        u = nothing
        zerr = nothing
        uerr = nothing
        try
            z = false .^ x
        catch err
            zerr = err
        end
        try
            u = true .^ x
        catch err
            uerr = err
        end
        if zerr == nothing && uerr == nothing
            S = promote_type(typeof(z), typeof(u))
        elseif zerr == nothing
            S = typeof(z)
        else
            S = typeof(u)
        end
        F = Array(S, size(B))
        uB = unsafe(B)
        uF = isbits(F) ? unsafe(F) : F
        unsafe_pow(uF, uB, u, z, uerr, zerr)
        return F
    end
end

function dotest_pow2()
    const b = randbool(1_000_017)
    const x = -2.0

    oldpow() = b .^ x
    newpow() = new_pow(b,x)

    @assert oldpow() == newpow()

    println("b = randbool(1_000_017); x = -2.0; b .^ x")
    println(compare([oldpow, newpow], 10))
end

function dotest_pow3()
    const b = randbool(1_000_017)
    const x = BigFloat(-2)

    oldpow() = b .^ x
    newpow() = new_pow(b,x)

    @assert oldpow() == newpow()

    println("b = randbool(1_000_017); x = BigFloat(-2); b .^ x")
    println(compare([oldpow, newpow], 10))
end

function dumpbitcache(Bc::UnsafeArray{Uint64}, C::UnsafeArray{Bool}, ind::Int, nc::Int)
    cind = 1
    for i = 1:nc
        u = uint64(1)
        c = uint64(0)
        for j = 1:64
            if C[cind]
                c |= u
            end
            cind += 1
            u <<= 1
        end
        Bc[ind] = c
        ind += 1
    end
    return ind
end
# note: purposedly unhygenic macro: exp is an expression which
#       depends on "ind" and yields a Bool
macro bitcache_body(exp, l, ind, C)
    quote
        left = $(esc(l)) - $(esc(ind)) + 1
        for j = 1:min(bitcache_size, left)
            $(esc(C))[j] = $exp
            ind += 1
        end
        C[left+1:bitcache_size] = false
        return ind
    end
end

function bitcache_new_pow{T}(A::UnsafeBitArray, B::Array{T}, l::Int, ind::Int, C::UnsafeArray{Bool})
    @bitcache_body(bool(A[ind] .^ B[ind]), l, ind, C)
end
function new_pow{T<:Integer}(A::BitArray, B::Array{T})
    F = BitArray(promote_shape(size(A),size(B)))
    l = length(F)
    if l == 0
        return F
    end
    C = Array(Bool, bitcache_size)
    uC = unsafe(C)
    uA = unsafe(A)
    Fc = unsafechunks(F)
    lFc = length(Fc)
    indr = 1
    indw = 1
    for i = 1:div(l + bitcache_size - 1, bitcache_size)
        indr = bitcache_new_pow(uA, B, l, indr, uC)
        nc = min(bitcache_chunks, lFc - indw + 1)
        indw = dumpbitcache(Fc, uC, indw, nc)
    end
    return F
end

function dotest_pow4()
    const b = randbool(1_000_017)
    const a = rand(0:5, 1_000_017)

    oldpow() = b .^ a
    newpow() = new_pow(b,a)

    @assert oldpow() == newpow()

    println("b = randbool(1_000_017); a = rand(0:5, 1_000_017); b .^ a")
    println(compare([oldpow, newpow], 20))
end

for (cachef, scalarf) in ((:bitcache_new_eq , :(==)),
                          (:bitcache_new_lt , :<   ),
                          (:bitcache_new_neq, :!=  ),
                          (:bitcache_new_le , :<=  ))
    for (sigA, sigB, expA, expB) in ((:(AbstractArray{T}), :(AbstractArray{S}), :(A[ind]), :(B[ind])),
                                     (:(AbstractArray{T}), :(UnsafeArray{S}),   :(A[ind]), :(B[ind])),
                                     (:(AbstractArray{T}), :S,                  :(A[ind]), :B),
                                     (:(UnsafeArray{T}),   :(AbstractArray{S}), :(A[ind]), :(B[ind])),
                                     (:(UnsafeArray{T}),   :(UnsafeArray{S}),   :(A[ind]), :(B[ind])),
                                     (:(UnsafeArray{T}),   :S,                  :(A[ind]), :B),
                                     (:T,                  :(AbstractArray{S}), :A,        :(B[ind])),
                                     (:T,                  :(UnsafeArray{S}),   :A,        :(B[ind])))
        @eval function ($cachef){T,S}(A::$sigA, B::$sigB, l::Int, ind::Int, C::UnsafeArray{Bool})
            @bitcache_body(($scalarf)($expA, $expB), l, ind, C)
        end
    end
end

for (f, cachef) in ((:new_eq,  :bitcache_new_eq ),
                    (:new_lt,  :bitcache_new_lt ),
                    (:new_neq, :bitcache_new_neq),
                    (:new_le,  :bitcache_new_le ))
    for (sigA, sigB, shape) in ((:(AbstractArray{T}), :(AbstractArray{S}), :(promote_shape(size(A), size(B)))),
                                (:T,                  :(AbstractArray{S}), :(size(B))),
                                (:(AbstractArray{T}), :S,                  :(size(A))))
        @eval begin
            function ($f){T,S}(A::$sigA, B::$sigB)
                F = BitArray($shape)
                l = length(F)
                if l == 0
                    return F
                end
                C = Array(Bool, bitcache_size)
                uC = unsafe(C)
                Fc = unsafechunks(F)
                lFc = length(Fc)
                uA = isa(A, Array) && isbits(T) ? unsafe(A) : A
                uB = isa(B, Array) && isbits(S) ? unsafe(B) : B
                indr = 1
                indw = 1
                for i = 1:div(l + bitcache_size - 1, bitcache_size)
                    indr = ($cachef)(uA, uB, l, indr, uC)
                    nc = min(bitcache_chunks, lFc - indw + 1)
                    indw = dumpbitcache(Fc, uC, indw, nc)
                end
                return F
            end
        end
    end
end

function dotest_lt1()
    const a = rand(1_000_017)
    const x = 0.5

    oldlt() = a .< x
    newlt() = new_lt(a, x)

    @assert oldlt().chunks == newlt().chunks

    println("a = rand(1_000_017); x = 0.5; a .< x")
    println(compare([oldlt, newlt], 200))
end

function dotest_lt2()
    const a = rand(1_000_017)
    const b = rand(1_000_017)

    oldlt() = a .< b
    newlt() = new_lt(a, b)

    @assert oldlt().chunks == newlt().chunks

    println("a = rand(1_000_017); b = rand(1_000_017); a .< b")
    println(compare([oldlt, newlt], 200))
end

function dotest_lt3()
    const a = [BigInt(i) for i=rand(1:10,1_000_017)]
    const b = [BigInt(i) for i=rand(1:10,1_000_017)]

    oldlt() = a .< b
    newlt() = new_lt(a, b)

    @assert oldlt().chunks == newlt().chunks

    println("a = [BigInt(i) for i=rand(1:10,1_000_017)]; b = [BigInt(i) for i=rand(1:10,1_000_017)]; a .< b")
    println(compare([oldlt, newlt], 100))
end

function dotest_lt4()
    const a = [BigInt(i) for i=rand(1:10,10_017)]
    const b = rand(10_017)

    oldlt() = a .< b
    newlt() = new_lt(a, b)

    @assert oldlt().chunks == newlt().chunks

    println("a = [BigInt(i) for i=rand(1:10,10_017)]; b = rand(10_017); a .< b")
    println(compare([oldlt, newlt], 100))
end

function dotest_lt5()
    const a = [BigInt(i) for i=rand(1:10,10_017)]
    const x = 0.5

    oldlt() = a .< x
    newlt() = new_lt(a, x)

    @assert oldlt().chunks == newlt().chunks

    println("a = [BigInt(i) for i=rand(1:10,1_000_017)]; x = 0.5; a .< x")
    println(compare([oldlt, newlt], 100))
end

function new_findnext(B::BitArray, start::Integer)
    Bc = unsafechunks(B)
    if start <= 0
        throw(BoundsError())
    end
    if start > length(B)
        return 0
    end

    chunk_start = @_div64(start-1)+1
    mask = _msk64 << @_mod64(start-1)

    c = Bc[chunk_start]
    if c & mask != 0
        return (chunk_start-1) << 6 + trailing_zeros(c & mask) + 1
    end

    for i = chunk_start+1:length(Bc)
        c = Bc[i]
        if c != 0
            return (i-1) << 6 + trailing_zeros(c) + 1
        end
    end
    return 0
end

function dotest_findnext()
    const b = [falses(100_000_000), randbool(17)]

    oldfindnext() = (findnext(b, 1), findnext(b, 100_000_010))
    newfindnext() = (new_findnext(b, 1), new_findnext(b, 100_000_010))

    @assert oldfindnext() == newfindnext()

    println("b = [falses(100_000_000), randbool(17)]; (findnext(b, 1), findnext(b, 100_000_010))")
    println(compare([oldfindnext, newfindnext], 1000))
end

function new_findnextnot(B::BitArray, start::Integer)
    Bc = unsafechunks(B)
    l = length(Bc)
    if start <= 0
        throw(BoundsError())
    end
    if start > length(B)
        return 0
    end

    chunk_start = @_div64(start-1)+1
    mask = ~(_msk64 << @_mod64(start-1))

    c = Bc[chunk_start] | mask
    if c != _msk64
        return (chunk_start-1) << 6 + trailing_ones(c) + 1
    end

    for i = chunk_start+1:l-1
        c = Bc[i]
        if c != _msk64
            return (i-1) << 6 + trailing_ones(c) + 1
        end
    end

    c = Bc[l]
    if c != @_msk_end length(B)
        return (l-1) << 6 + trailing_ones(c) + 1
    end
    return 0
end

function dotest_findnextnot()
    const b = [trues(100_000_000), randbool(17)]

    oldfindnextnot() = (findnextnot(b, 1), findnextnot(b, 100_000_010))
    newfindnextnot() = (new_findnextnot(b, 1), new_findnextnot(b, 100_000_010))

    @assert oldfindnextnot() == newfindnextnot()

    println("b = [trues(100_000_000), randbool(17)]; (findnextnot(b, 1), findnextnot(b, 100_000_010))")
    println(compare([oldfindnextnot, newfindnextnot], 1000))
end

function new_find(B::BitArray)
    l = length(B)
    nnzB = nnz(B)
    I = Array(Int, nnzB)
    if nnzB == 0
        return I
    end
    uI = unsafe(I)
    # note: using unsafechunks(B) degrades performance by ~10x
    #       here (for mysterious reasons)
    #Bc = unsafechunks(B)
    Bc = UnsafeArray(B.chunks)
    Bcount = 1
    Icount = 1
    for i = 1:length(Bc)-1
        u = uint64(1)
        c = Bc[i]
        for j = 1:64
            if c & u != 0
                uI[Icount] = Bcount
                Icount += 1
            end
            Bcount += 1
            u <<= 1
        end
    end
    u = uint64(1)
    c = Bc[end]
    for j = 0:@_mod64(l-1)
        if c & u != 0
            uI[Icount] = Bcount
            Icount += 1
        end
        Bcount += 1
        u <<= 1
    end
    return I
end

function dotest_find()
    const b = randbool(1_000_017)

    oldfind() = find(b)
    newfind() = new_find(b)

    @assert oldfind() == newfind()

    println("b = randbool(1_000_017)]; find(b)")
    println(compare([oldfind, newfind], 100))
end

function new_all(B::BitArray)
    if length(B) == 0
        return true
    end
    # note: using unsafechunks or even
    #       unsafe_load(pBc, i) degrades
    #       performance a bit
    Bc = B.chunks
    pBc = pointer(Bc)
    for i = 1:length(Bc)-1
        if unsafe_load(pBc) != _msk64
            return false
        end
        pBc += sizeof(Uint64)
    end
    if unsafe_load(pBc) != @_msk_end length(B)
        return false
    end
    return true
end

function dotest_all()
    const b = [trues(9_999_900), randbool(117)]

    oldall() = all(b)
    newall() = new_all(b)

    @assert oldall() == newall()

    println("b = [trues(9_999_900), randbool(117)]; all(b)")
    println(compare([oldall, newall], 10000))
end

function new_any(B::BitArray)
    if length(B) == 0
        return false
    end
    Bc = unsafechunks(B)
    for i = 1:length(Bc)
        if Bc[i] != 0
            return true
        end
    end
    return false
end

function dotest_any()
    const b = [falses(9_999_900), randbool(117)]

    oldany() = any(b)
    newany() = new_any(b)

    @assert oldany() == newany()

    println("b = [trues(9_999_900), randbool(117)]; any(b)")
    println(compare([oldany, newany], 10000))
end


function dotest(;nobig=true)
    dotest_fill()
    dotest_copy()
    dotest_convert1()
    nobig || dotest_convert2()
    dotest_convert3()
    dotest_convert4()
    dotest_randfill()
    dotest_getindex1()
    dotest_getindex2()
    dotest_getindex3()
    dotest_getindex4()
    dotest_setindex1()
    dotest_setindex2()
    dotest_setindex3()
    dotest_setindex4()
    dotest_setindex5()
    dotest_unaryminus()
    dotest_bitnot()
    dotest_flipbits()
    dotest_divmod1()
    dotest_divmod2()
    nobig || dotest_divmod3()
    dotest_andorxor()
    dotest_pow1()
    dotest_pow2()
    nobig || dotest_pow3()
    dotest_pow4()
    dotest_lt1()
    dotest_lt2()
    nobig || dotest_lt3()
    nobig || dotest_lt4()
    nobig || dotest_lt5()
    dotest_findnext()
    dotest_findnextnot()
    dotest_find()
    dotest_all()
    dotest_any()
end


end
	b = BitArray(10_000_017); fill!(b, true)
	2x4 DataFrame:
	Function Elapsed Relative Replications
	[1,] "oldfill" 0.703632 3.394 1000
	[2,] "newfill" 0.207316 1.0 1000

	b1 = randbool(10_000_117); b2 = randbool(10_000_017); copy!(b1, b2)
	2x4 DataFrame:
	Function Elapsed Relative Replications
	[1,] "oldcopy" 0.834918 2.37655 1000
	[2,] "newcopy" 0.351316 1.0 1000

	b = randbool(10_000_117); convert(Array{Int}, b)
	2x4 DataFrame:
	Function Elapsed Relative Replications
	[1,] "oldconvert" 11.2007 7.20337 10
	[2,] "newconvert" 1.55492 1.0 10

	a = rand(0:1, 1_000_017); convert(BitArray, a)
	2x4 DataFrame:
	Function Elapsed Relative Replications
	[1,] "oldconvert" 2.0255 1.58136 20
	[2,] "newconvert" 1.28086 1.0 20

	a = {rand(0:1) for i = 1:1_017}; convert(BitArray, a)
	2x4 DataFrame:
	Function Elapsed Relative Replications
	[1,] "oldconvert" 0.320322 1.16844 10
	[2,] "newconvert" 0.274146 1.0 10

	b = BitArray(10_000_017); bitarray_rand_fill!(b)
	2x4 DataFrame:
	Function Elapsed Relative Replications
	[1,] "oldrandfill" 2.39907 1.20137 1000
	[2,] "newrandfill" 1.99694 1.0 1000

	b = randbool(10_000); I = rand(1:10_000, 10_000); b[I]
	2x4 DataFrame:
	Function Elapsed Relative Replications
	[1,] "oldgetindex" 1.43322 6.71223 1000
	[2,] "newgetindex" 0.213524 1.0 1000

	b = randbool(100, 100); I1 = rand(1:100, 100); I2 = rand(1:100, 100); b[I1,I2]
	2x4 DataFrame:
	Function Elapsed Relative Replications
	[1,] "oldgetindex" 1.9024 1.10048 1000
	[2,] "newgetindex" 1.72869 1.0 1000

	b = randbool(1_000_000); I = randbool(1_000_000); b[I]
	2x4 DataFrame:
	Function Elapsed Relative Replications
	[1,] "oldgetindex" 1.18848 2.19207 10
	[2,] "newgetindex" 0.542172 1.0 10

	b = randbool(1_000_000); I = bitunpack(randbool(1_000_000)); b[I]
	2x4 DataFrame:
	Function Elapsed Relative Replications
	[1,] "oldgetindex" 0.704441 4.51643 10
	[2,] "newgetindex" 0.155973 1.0 10

	b = randbool(1_000_017); I = randbool(1_000_017); b[I] = true
	2x4 DataFrame:
	Function Elapsed Relative Replications
	[1,] "oldsetindex" 2.86153 1.0 50
	[2,] "newsetindex" 2.90139 1.01393 50

	b = randbool(1_000_017); I = bitunpack(randbool(1_000_017)); b[I] = true
	2x4 DataFrame:
	Function Elapsed Relative Replications
	[1,] "oldsetindex" 10.048 1.04502 100
	[2,] "newsetindex" 9.61514 1.0 100

	b = randbool(1_000_000); I = randbool(1_000_000); X = randbool(nnz(I)); b[I] = X
	2x4 DataFrame:
	Function Elapsed Relative Replications
	[1,] "oldsetindex" 0.806377 1.0 10
	[2,] "newsetindex" 0.879788 1.09104 10

	b = randbool(1_000_000); I = bitunpack(randbool(1_000_000)); X = randbool(nnz(I)); b[I] = X
	2x4 DataFrame:
	Function Elapsed Relative Replications
	[1,] "oldsetindex" 0.419165 1.26231 10
	[2,] "newsetindex" 0.332061 1.0 10

	b = randbool(1_000_000); I = bitunpack(randbool(1_000_000)); X = bitunpack(randbool(nnz(I))); b[I] = X
	2x4 DataFrame:
	Function Elapsed Relative Replications
	[1,] "oldsetindex" 0.123026 1.02952 10
	[2,] "newsetindex" 0.119498 1.0 10

	b = randbool(1_000_000); -b
	2x4 DataFrame:
	Function Elapsed Relative Replications
	[1,] "oldunaryminus" 4.38952 1.46101 200
	[2,] "newunaryminus" 3.00445 1.0 200

	b = randbool(1_000_017); ~b
	2x4 DataFrame:
	Function Elapsed Relative Replications
	[1,] "oldbitnot" 18.0818 9.30296 10000
	[2,] "newbitnot" 1.94367 1.0 10000

	b = randbool(1_000_017); flipbits!(b)
	2x4 DataFrame:
	Function Elapsed Relative Replications
	[1,] "oldflipbits" 6.47089 3.20978 100000
	[2,] "newflipbits" 2.01599 1.0 100000

	b = randbool(1_000_017); x = 2; (div(b,x),mod(b,x))
	2x4 DataFrame:
	Function Elapsed Relative Replications
	[1,] "olddivmod" 6.25411 4.4016 20
	[2,] "newdivmod" 1.42087 1.0 20

	b = randbool(1_000_017); x = 2.0; (div(b,x),mod(b,x))
	2x4 DataFrame:
	Function Elapsed Relative Replications
	[1,] "olddivmod" 13.6185 7.50146 40
	[2,] "newdivmod" 1.81545 1.0 40

	b1 = randbool(1_000_017); b2 = randbool(1_000_017; (b1&b2, b1\|b2, b1$b2)
	2x4 DataFrame:
	Function Elapsed Relative Replications
	[1,] "oldandorxor" 8.56246 18.8855 1000
	[2,] "newandorxor" 0.453389 1.0 1000

	b1 = randbool(1_000_017); b2 = randbool(1_000_017); b1 .^ b2
	2x4 DataFrame:
	Function Elapsed Relative Replications
	[1,] "oldpow" 2.80608 16.3882 1000
	[2,] "newpow" 0.171226 1.0 1000

	b = randbool(1_000_017); x = -2.0; b .^ x
	2x4 DataFrame:
	Function Elapsed Relative Replications
	[1,] "oldpow" 1.67773 5.53508 10
	[2,] "newpow" 0.303108 1.0 10

	b = randbool(1_000_017); a = rand(0:5, 1_000_017); b .^ a
	2x4 DataFrame:
	Function Elapsed Relative Replications
	[1,] "oldpow" 1.65294 2.11172 20
	[2,] "newpow" 0.782746 1.0 20

	a = rand(1_000_017); x = 0.5; a .< x
	2x4 DataFrame:
	Function Elapsed Relative Replications
	[1,] "oldlt" 1.2469 1.61651 200
	[2,] "newlt" 0.771353 1.0 200

	a = rand(1_000_017); b = rand(1_000_017); a .< b
	2x4 DataFrame:
	Function Elapsed Relative Replications
	[1,] "oldlt" 1.56158 1.69547 200
	[2,] "newlt" 0.921032 1.0 200

	b = [falses(100_000_000), randbool(17)]; (findnext(b, 1), findnext(b, 100_000_010))
	2x4 DataFrame:
	Function Elapsed Relative Replications
	[1,] "oldfindnext" 3.23528 1.26685 1000
	[2,] "newfindnext" 2.55381 1.0 1000

	b = [trues(100_000_000), randbool(17)]; (findnextnot(b, 1), findnextnot(b, 100_000_010))
	2x4 DataFrame:
	Function Elapsed Relative Replications
	[1,] "oldfindnextnot" 3.98593 1.47118 1000
	[2,] "newfindnextnot" 2.70935 1.0 1000

	b = randbool(1_000_017)]; find(b)
	2x4 DataFrame:
	Function Elapsed Relative Replications
	[1,] "oldfind" 5.98756 10.8875 100
	[2,] "newfind" 0.54995 1.0 100

	b = [trues(9_999_900), randbool(117)]; all(b)
	2x4 DataFrame:
	Function Elapsed Relative Replications
	[1,] "oldall" 3.38011 2.52766 10000
	[2,] "newall" 1.33725 1.0 10000

	b = [trues(9_999_900), randbool(117)]; any(b)
	2x4 DataFrame:
	Function Elapsed Relative Replications
	[1,] "oldany" 2.70628 1.6008 10000
	[2,] "newany" 1.69058 1.0 10000