Skip to content

Instantly share code, notes, and snippets.

@andreasnoack
Created June 29, 2018 14:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save andreasnoack/740e11fa7cb128831f861ceff53a5c8b to your computer and use it in GitHub Desktop.
Save andreasnoack/740e11fa7cb128831f861ceff53a5c8b to your computer and use it in GitHub Desktop.
CUDAnative stencil
function stencil(v, b, ω, i, j)
vij = (1 - ω)*v[i, j] +
ω*(v[i + 1, j] + v[i - 1, j] + v[i, j + 1] + v[i, j - 1] +
b[i, j])/4
# if i == 2 && j == 5
# @cuprintf("HALLO! %f\n", Float64(vij))
# end
return vij
end
function stencil_inner_kernel!(v, b, ω, n)
li = (blockIdx().x-1) * blockDim().x + threadIdx().x
mm, nn = size(v)
i, j = ind2sub(v, li)
@cuprintf("li = %d, i = %d, j = %d\n", Int32(li), Int32(i), Int32(j))
# even-even
if iseven(i) && i <= 2n &&
iseven(j) && j <= 2n
vij = stencil(v, b, ω, i, j)
v[i, j] = vij
end
sync_threads()
sync_warp()
# odd-odd
if isodd(i) && 1 < i <= (2n - 1) &&
isodd(j) && 1 < j <= (2n + 1)
vij = stencil(v, b, ω, i, j)
v[i, j] = vij
end
sync_threads()
sync_warp()
# even-odd
if iseven(i) && i <= 2n &&
isodd(j) && 1 < j <= (2n + 1)
vij = stencil(v, b, ω, i, j)
v[i, j] = vij
end
sync_threads()
sync_warp()
# odd-even
if isodd(i) && 1 < i <= (2n - 1) &&
iseven(j) && j <= 2n
vij = stencil(v, b, ω, i, j)
v[i, j] = vij
end
sync_threads()
sync_warp()
return nothing
end
function stencil_outer_kernel!(v, b, ω, n, iter)
for i in 1:iter
stencil_inner_kernel!(v, b, ω, n)
end
return nothing
end
function compute_resistance_gpu(n, nreps = 100)
# assume n and omega already defined or take
# the following values for the optimal omega
μ = (cos(π/(2*n)) + cos(π/(2*n + 1)))/2
ω = 2*(1 - sqrt(1 - μ^2))/μ^2
# (See page 409 of Strang Intro to Applied Math , this is equation 16)
# Initialize voltages
v = fill!(CuArray{Float32}(2*n + 1, 2*n + 2), 0)
# Define Input Currents
b = copy(v)
b[n + 1, (n + 1):(n + 2)] = [1 -1]
BLOCKSIZE = 2
# Jacobi Steps
@show size(v)
@show mn = length(v)
@show gridsize = div(mn, BLOCKSIZE) + 1
@cuda (gridsize, BLOCKSIZE) stencil_outer_kernel!(v, b, ω, n, nreps)
# Compute resistance = v_A - v_b = 2 v_A
r = 2*v[n + 1, n + 1]
return v, b, r
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment