nalimilan/table.jl

## table.jl
using DataArrays
using NamedArrays

function table(x::PooledDataVector...; usena::Bool = false)
    n = length(x)
    l = [length(y) for y in x]
    for i in 1:n
        if l[1] != l[i]
            error("arguments are not of the same length: $l")
        end
    end

    lev = [levels(y) for y in x]

    if usena
        el = Array(Int, n)
        nalev = [length(l) + 1 for l in lev]

        a = zeros(Int, ntuple(n, i -> length(lev[i]) + 1))

        for i in 1:l[1]
            for j in 1:n
                val = int(x[j].refs[i])
                @inbounds el[j] = val == zero(val) ? nalev[j] : val
            end

            @inbounds a[el...] += 1
        end

        NamedArray(a, ntuple(n, i -> [lev[i], "NA"]), ntuple(n, i -> "Dim$i"))
    else
        a = zeros(Int, ntuple(n, i -> length(lev[i])))

        for i in 1:l[1]
            el = ntuple(n, j -> x[j].refs[i])

            pos = true
            for val in el
                if val == zero(val)
                    pos = false
                    break
                end
            end

            if pos
                @inbounds a[el...] += 1
            end
        end

        NamedArrays.NamedArray(a, ntuple(n, i -> lev[i]), ntuple(n, i -> "Dim$i"))
    end
end


function table2(x::PooledDataVector...; usena::Bool = false)
    n = length(x)
    l = [length(y) for y in x]
    for i in 1:n
        if l[1] != l[i]
            error("arguments are not of the same length: $l")
        end
    end

    lev = [levels(y) for y in x]
    el = Array(Int, n)

    if usena
        nalev = [length(l) + 1 for l in lev]

        a = zeros(Int, ntuple(n, i -> length(lev[i]) + 1))

        for i in 1:l[1]
            for j in 1:n
                val = int(x[j].refs[i])
                @inbounds el[j] = val == zero(val) ? nalev[j] : val
            end

            @inbounds a[el...] += 1
        end

        NamedArray(a, ntuple(n, i -> [lev[i], "NA"]), ntuple(n, i -> "Dim$i"))
    else
        a = zeros(Int, ntuple(n, i -> length(lev[i])))

        for i in 1:l[1]
            # ONLY CHANGE IS HERE
            for j in 1:n
                el[j] = x[j].refs[i]
            end

            pos = true
            for val in el
                if val == zero(val)
                    pos = false
                    break
                end
            end

            if pos
                @inbounds a[el...] += 1
            end
        end

        NamedArrays.NamedArray(a, ntuple(n, i -> lev[i]), ntuple(n, i -> "Dim$i"))
    end
end


function table3(x::PooledDataVector...; usena::Bool = false)
	n = length(x)
	len = [length(y) for y in x]

	for i in 1:n
	    if len[1] != len[i]
	        error(string("arguments are not of the same length: ", tuple(len...)))
	    end
	end

	lev = [levels(y) for y in x]

	if usena
        dims = ntuple(n, i -> length(lev[i]) + 1)
	    # The first way of building nalev gives and Any array, which hurts performance
        # nalev = [dim + 1 for dim in dims]
        nalev = [length(lev[i]) + 1 for i in 1:n]
	    sizes = cumprod(nalev)
	    a = zeros(Int, dims)

	    for i in 1:len[1]
	        el = int(x[1].refs[i])::Int

	        for j in 2:n
	            val = int(x[j].refs[i])::Int

	            if val == zero(val)
	                val = nalev[j]
	            end

	            el += int((val - 1) * sizes[j - 1])::Int
	        end

	        @inbounds a[el] += 1
	    end

	    NamedArray(a, ntuple(n, i -> [lev[i], "NA"]), ntuple(n, i -> "Dim$i"))
	else
        dims = ntuple(n, i -> length(lev[i]))
	    sizes = cumprod([dims...])
	    a = zeros(Int, dims)

	    for i in 1:len[1]
	        pos = (x[1].refs[i] != zero(Uint))
	        el = int(x[1].refs[i])::Int

	        for j in 2:n
	            val = x[j].refs[i]

	            if val == zero(val)
	                pos = false
	                break
	            end

	            el += int((val - 1) * sizes[j - 1])::Int
	        end

	        if pos
	            @inbounds a[el] += 1
	        end
	    end

	    NamedArrays.NamedArray(a, ntuple(n, i -> lev[i]), ntuple(n, i -> "Dim$i"))
	end
end


## To test
a = PooledDataArray(rep(1:10, 100000))
precompile(table, (a,))
precompile(table2, (a,))
precompile(table3, (a,))

@time table(a)
@time table2(a)
@time table3(a)

precompile(table, (a, a))
precompile(table2, (a, a))
precompile(table3, (a, a))

@time table(a, a)
@time table2(a, a)
@time table3(a, a)
	using DataArrays
	using NamedArrays

	function table(x::PooledDataVector...; usena::Bool = false)
	n = length(x)
	l = [length(y) for y in x]
	for i in 1:n
	if l[1] != l[i]
	error("arguments are not of the same length: $l")
	end
	end

	lev = [levels(y) for y in x]

	if usena
	el = Array(Int, n)
	nalev = [length(l) + 1 for l in lev]

	a = zeros(Int, ntuple(n, i -> length(lev[i]) + 1))

	for i in 1:l[1]
	for j in 1:n
	val = int(x[j].refs[i])
	@inbounds el[j] = val == zero(val) ? nalev[j] : val
	end

	@inbounds a[el...] += 1
	end

	NamedArray(a, ntuple(n, i -> [lev[i], "NA"]), ntuple(n, i -> "Dim$i"))
	else
	a = zeros(Int, ntuple(n, i -> length(lev[i])))

	for i in 1:l[1]
	el = ntuple(n, j -> x[j].refs[i])

	pos = true
	for val in el
	if val == zero(val)
	pos = false
	break
	end
	end

	if pos
	@inbounds a[el...] += 1
	end
	end

	NamedArrays.NamedArray(a, ntuple(n, i -> lev[i]), ntuple(n, i -> "Dim$i"))
	end
	end


	function table2(x::PooledDataVector...; usena::Bool = false)
	n = length(x)
	l = [length(y) for y in x]
	for i in 1:n
	if l[1] != l[i]
	error("arguments are not of the same length: $l")
	end
	end

	lev = [levels(y) for y in x]
	el = Array(Int, n)

	if usena
	nalev = [length(l) + 1 for l in lev]

	a = zeros(Int, ntuple(n, i -> length(lev[i]) + 1))

	for i in 1:l[1]
	for j in 1:n
	val = int(x[j].refs[i])
	@inbounds el[j] = val == zero(val) ? nalev[j] : val
	end

	@inbounds a[el...] += 1
	end

	NamedArray(a, ntuple(n, i -> [lev[i], "NA"]), ntuple(n, i -> "Dim$i"))
	else
	a = zeros(Int, ntuple(n, i -> length(lev[i])))

	for i in 1:l[1]
	# ONLY CHANGE IS HERE
	for j in 1:n
	el[j] = x[j].refs[i]
	end

	pos = true
	for val in el
	if val == zero(val)
	pos = false
	break
	end
	end

	if pos
	@inbounds a[el...] += 1
	end
	end

	NamedArrays.NamedArray(a, ntuple(n, i -> lev[i]), ntuple(n, i -> "Dim$i"))
	end
	end


	function table3(x::PooledDataVector...; usena::Bool = false)
	n = length(x)
	len = [length(y) for y in x]

	for i in 1:n
	if len[1] != len[i]
	error(string("arguments are not of the same length: ", tuple(len...)))
	end
	end

	lev = [levels(y) for y in x]

	if usena
	dims = ntuple(n, i -> length(lev[i]) + 1)
	# The first way of building nalev gives and Any array, which hurts performance
	# nalev = [dim + 1 for dim in dims]
	nalev = [length(lev[i]) + 1 for i in 1:n]
	sizes = cumprod(nalev)
	a = zeros(Int, dims)

	for i in 1:len[1]
	el = int(x[1].refs[i])::Int

	for j in 2:n
	val = int(x[j].refs[i])::Int

	if val == zero(val)
	val = nalev[j]
	end

	el += int((val - 1) * sizes[j - 1])::Int
	end

	@inbounds a[el] += 1
	end

	NamedArray(a, ntuple(n, i -> [lev[i], "NA"]), ntuple(n, i -> "Dim$i"))
	else
	dims = ntuple(n, i -> length(lev[i]))
	sizes = cumprod([dims...])
	a = zeros(Int, dims)

	for i in 1:len[1]
	pos = (x[1].refs[i] != zero(Uint))
	el = int(x[1].refs[i])::Int

	for j in 2:n
	val = x[j].refs[i]

	if val == zero(val)
	pos = false
	break
	end

	el += int((val - 1) * sizes[j - 1])::Int
	end

	if pos
	@inbounds a[el] += 1
	end
	end

	NamedArrays.NamedArray(a, ntuple(n, i -> lev[i]), ntuple(n, i -> "Dim$i"))
	end
	end



	## To test
	a = PooledDataArray(rep(1:10, 100000))
	precompile(table, (a,))
	precompile(table2, (a,))
	precompile(table3, (a,))

	@time table(a)
	@time table2(a)
	@time table3(a)

	precompile(table, (a, a))
	precompile(table2, (a, a))
	precompile(table3, (a, a))

	@time table(a, a)
	@time table2(a, a)
	@time table3(a, a)