Last active
January 29, 2019 12:46
-
-
Save xiaodaigh/3611355501353728e7c036889e0e0c5b to your computer and use it in GitHub Desktop.
CPU bitonic sort
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
shared = rand(16) | |
bisort(shared) = begin | |
NUM = UInt(length(shared)) | |
k = UInt(2) | |
while (k <= NUM) | |
j = div(k,2) | |
while(j >=1) | |
for tid in UInt(0):UInt(NUM-1) | |
ixj = tid⊻j | |
if(ixj > tid) | |
if((tid & k) == 0) | |
if (shared[tid+1] > shared[ixj+1]) | |
tmp = shared[ixj+1] | |
shared[ixj+1] = shared[tid+1] | |
shared[tid+1] = tmp | |
end | |
else | |
if (shared[tid+1] < shared[ixj+1]) | |
tmp = shared[ixj+1] | |
shared[ixj+1] = shared[tid+1] | |
shared[tid+1] = tmp | |
end | |
end | |
end | |
end | |
j = div(j,2) | |
end | |
k = UInt(k*2) | |
end | |
shared | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
bisort(shared, NUM) = begin | |
tid = UInt(((blockIdx().x - 1) * blockDim().x + threadIdx().x)-1) | |
k = UInt(2) | |
while (k <= NUM) | |
j = div(k, 2) | |
while j >= 1 | |
ixj = tid⊻UInt(j) | |
if ixj > tid | |
if (tid & k) == 0 | |
if shared[tid+1] > shared[ixj+1] | |
tmp = shared[ixj+1] | |
shared[ixj+1] = shared[tid+1] | |
shared[tid+1] = tmp | |
end | |
else | |
if shared[tid+1] < shared[ixj+1] | |
tmp = shared[ixj+1] | |
shared[ixj+1] = shared[tid+1] | |
shared[tid+1] = tmp | |
end | |
end | |
end | |
j = div(j,2) | |
sync_threads() | |
end | |
k = UInt(k*2) | |
end | |
return | |
end | |
using CuArrays, CUDAnative | |
shared = rand(Float32, 1024) | |
cushared = cu(shared) | |
bitonicsort!(cushared) = begin | |
CuArrays.@sync begin | |
@cuda threads = 512 blocks = 2 bisort(cushared, 1024) | |
end | |
end | |
using BenchmarkTools | |
@time bitonicsort!(cushared) | |
using SortingAlgorithms | |
@time sort(shared, alg=RadixSort) | |
using Flux | |
cpu(cushared) |> issorted |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment