Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
CPU bitonic sort
shared = rand(16)
bisort(shared) = begin
NUM = UInt(length(shared))
k = UInt(2)
while (k <= NUM)
j = div(k,2)
while(j >=1)
for tid in UInt(0):UInt(NUM-1)
ixj = tid⊻j
if(ixj > tid)
if((tid & k) == 0)
if (shared[tid+1] > shared[ixj+1])
tmp = shared[ixj+1]
shared[ixj+1] = shared[tid+1]
shared[tid+1] = tmp
end
else
if (shared[tid+1] < shared[ixj+1])
tmp = shared[ixj+1]
shared[ixj+1] = shared[tid+1]
shared[tid+1] = tmp
end
end
end
end
j = div(j,2)
end
k = UInt(k*2)
end
shared
end
bisort(shared, NUM) = begin
tid = UInt(((blockIdx().x - 1) * blockDim().x + threadIdx().x)-1)
k = UInt(2)
while (k <= NUM)
j = div(k, 2)
while j >= 1
ixj = tid⊻UInt(j)
if ixj > tid
if (tid & k) == 0
if shared[tid+1] > shared[ixj+1]
tmp = shared[ixj+1]
shared[ixj+1] = shared[tid+1]
shared[tid+1] = tmp
end
else
if shared[tid+1] < shared[ixj+1]
tmp = shared[ixj+1]
shared[ixj+1] = shared[tid+1]
shared[tid+1] = tmp
end
end
end
j = div(j,2)
sync_threads()
end
k = UInt(k*2)
end
return
end
using CuArrays, CUDAnative
shared = rand(Float32, 1024)
cushared = cu(shared)
bitonicsort!(cushared) = begin
CuArrays.@sync begin
@cuda threads = 512 blocks = 2 bisort(cushared, 1024)
end
end
using BenchmarkTools
@time bitonicsort!(cushared)
using SortingAlgorithms
@time sort(shared, alg=RadixSort)
using Flux
cpu(cushared) |> issorted
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.