Skip to content

Instantly share code, notes, and snippets.

@xiaodaigh
Last active January 29, 2019 12:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save xiaodaigh/3611355501353728e7c036889e0e0c5b to your computer and use it in GitHub Desktop.
Save xiaodaigh/3611355501353728e7c036889e0e0c5b to your computer and use it in GitHub Desktop.
CPU bitonic sort
shared = rand(16)
bisort(shared) = begin
NUM = UInt(length(shared))
k = UInt(2)
while (k <= NUM)
j = div(k,2)
while(j >=1)
for tid in UInt(0):UInt(NUM-1)
ixj = tid⊻j
if(ixj > tid)
if((tid & k) == 0)
if (shared[tid+1] > shared[ixj+1])
tmp = shared[ixj+1]
shared[ixj+1] = shared[tid+1]
shared[tid+1] = tmp
end
else
if (shared[tid+1] < shared[ixj+1])
tmp = shared[ixj+1]
shared[ixj+1] = shared[tid+1]
shared[tid+1] = tmp
end
end
end
end
j = div(j,2)
end
k = UInt(k*2)
end
shared
end
bisort(shared, NUM) = begin
tid = UInt(((blockIdx().x - 1) * blockDim().x + threadIdx().x)-1)
k = UInt(2)
while (k <= NUM)
j = div(k, 2)
while j >= 1
ixj = tid⊻UInt(j)
if ixj > tid
if (tid & k) == 0
if shared[tid+1] > shared[ixj+1]
tmp = shared[ixj+1]
shared[ixj+1] = shared[tid+1]
shared[tid+1] = tmp
end
else
if shared[tid+1] < shared[ixj+1]
tmp = shared[ixj+1]
shared[ixj+1] = shared[tid+1]
shared[tid+1] = tmp
end
end
end
j = div(j,2)
sync_threads()
end
k = UInt(k*2)
end
return
end
using CuArrays, CUDAnative
shared = rand(Float32, 1024)
cushared = cu(shared)
bitonicsort!(cushared) = begin
CuArrays.@sync begin
@cuda threads = 512 blocks = 2 bisort(cushared, 1024)
end
end
using BenchmarkTools
@time bitonicsort!(cushared)
using SortingAlgorithms
@time sort(shared, alg=RadixSort)
using Flux
cpu(cushared) |> issorted
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment