Created
June 29, 2018 14:39
-
-
Save andreasnoack/740e11fa7cb128831f861ceff53a5c8b to your computer and use it in GitHub Desktop.
CUDAnative stencil
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function stencil(v, b, ω, i, j) | |
vij = (1 - ω)*v[i, j] + | |
ω*(v[i + 1, j] + v[i - 1, j] + v[i, j + 1] + v[i, j - 1] + | |
b[i, j])/4 | |
# if i == 2 && j == 5 | |
# @cuprintf("HALLO! %f\n", Float64(vij)) | |
# end | |
return vij | |
end | |
function stencil_inner_kernel!(v, b, ω, n) | |
li = (blockIdx().x-1) * blockDim().x + threadIdx().x | |
mm, nn = size(v) | |
i, j = ind2sub(v, li) | |
@cuprintf("li = %d, i = %d, j = %d\n", Int32(li), Int32(i), Int32(j)) | |
# even-even | |
if iseven(i) && i <= 2n && | |
iseven(j) && j <= 2n | |
vij = stencil(v, b, ω, i, j) | |
v[i, j] = vij | |
end | |
sync_threads() | |
sync_warp() | |
# odd-odd | |
if isodd(i) && 1 < i <= (2n - 1) && | |
isodd(j) && 1 < j <= (2n + 1) | |
vij = stencil(v, b, ω, i, j) | |
v[i, j] = vij | |
end | |
sync_threads() | |
sync_warp() | |
# even-odd | |
if iseven(i) && i <= 2n && | |
isodd(j) && 1 < j <= (2n + 1) | |
vij = stencil(v, b, ω, i, j) | |
v[i, j] = vij | |
end | |
sync_threads() | |
sync_warp() | |
# odd-even | |
if isodd(i) && 1 < i <= (2n - 1) && | |
iseven(j) && j <= 2n | |
vij = stencil(v, b, ω, i, j) | |
v[i, j] = vij | |
end | |
sync_threads() | |
sync_warp() | |
return nothing | |
end | |
function stencil_outer_kernel!(v, b, ω, n, iter) | |
for i in 1:iter | |
stencil_inner_kernel!(v, b, ω, n) | |
end | |
return nothing | |
end | |
function compute_resistance_gpu(n, nreps = 100) | |
# assume n and omega already defined or take | |
# the following values for the optimal omega | |
μ = (cos(π/(2*n)) + cos(π/(2*n + 1)))/2 | |
ω = 2*(1 - sqrt(1 - μ^2))/μ^2 | |
# (See page 409 of Strang Intro to Applied Math , this is equation 16) | |
# Initialize voltages | |
v = fill!(CuArray{Float32}(2*n + 1, 2*n + 2), 0) | |
# Define Input Currents | |
b = copy(v) | |
b[n + 1, (n + 1):(n + 2)] = [1 -1] | |
BLOCKSIZE = 2 | |
# Jacobi Steps | |
@show size(v) | |
@show mn = length(v) | |
@show gridsize = div(mn, BLOCKSIZE) + 1 | |
@cuda (gridsize, BLOCKSIZE) stencil_outer_kernel!(v, b, ω, n, nreps) | |
# Compute resistance = v_A - v_b = 2 v_A | |
r = 2*v[n + 1, n + 1] | |
return v, b, r | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment