Skip to content

Instantly share code, notes, and snippets.

@xiaodaigh

xiaodaigh/0_code.jl

Last active Aug 30, 2020
Embed
What would you like to do?
Fast implementation of `nuniuqe` in a SORTED vector
x = rand(1:1_000_000, 1_000_000_000)
using SortingLab
fsort!(x)
function unroll_loop(x)
count = 0
@inbounds count += x[1] != x[2]
@inbounds count += x[2] != x[3]
@inbounds count += x[3] != x[4]
@inbounds count += x[4] != x[5]
@inbounds count += x[5] != x[6]
@inbounds count += x[6] != x[7]
@inbounds count += x[7] != x[8]
@inbounds count += x[8] != x[9]
l = length(x)
upto = 8div(l, 8) - 8
@inbounds for i in 8:8:upto
count += x[i+1] != x[i+2]
count += x[i+2] != x[i+3]
count += x[i+3] != x[i+4]
count += x[i+4] != x[i+5]
count += x[i+5] != x[i+6]
count += x[i+6] != x[i+7]
count += x[i+7] != x[i+8]
count += x[i+8] != x[i+9]
end
for i in upto+1:l
count += x[i] != x[i-1]
end
count
end
using LoopVectorization
function nunique(x)
count = 1
@avx for i in 1:length(x)-1
count += !isequal(x[i], x[i+1])
end
count
end
@time nunique(x)
function lo_hi_partition(n, parts = Threads.nthreads())
part_len = div(n, parts)
lo = collect(0:part_len:n-part_len) .+ 1
hi = similar(lo)
hi[1:end-1] .= lo[2:end] .- 1
hi[end] = n
lo, hi
end
lo_hi_partition(length(x))
function pnunique(x)
lo, hi = lo_hi_partition(length(x))
cnt = 1
nt = Threads.nthreads()
Threads.@threads for i in 1:nt
l, h = lo[i], hi[i]
@inbounds cnt += nunique(@view x[l:h])
end
for (l, h) in zip(@view(lo[2:end]), hi)
@inbounds cnt -= isequal(x[h], x[l])
end
cnt
end
@benchmark pnunique(x)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.