Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
using LinearAlgebra, Statistics
using StaticArrays, Parameters
using CUDAdrv, CUDAnative
struct Lattice{D,Q,T}
δt :: T
δx :: T
τ :: T
e :: Vector{SVector{D,T}}
w :: Vector{T}
ρ :: Array{T,D}
u :: Array{SVector{D,T},D}
end
function Lattice{D,Q,T}(dims=(128,128); δt::Real=1, δx::Real=1, τ::Real=0.65) where {D,Q,T<:AbstractFloat}
@assert length(dims)==D || length(dims)==1 "Wrong dimensions supplied."
if length(dims)==1 && D==2
dims=(dims,dims)
elseif length(dims)==1 && D==3
dims=(dims,dims,dims)
else
error("Wrong dimensions supplied.")
end
if Q==9
e = δx/δt * Vector{SVector{2,T}}([
[0, 0],
[1, 0], [0, 1], [-1, 0], [0, -1],
[1, 1], [-1,1], [-1,-1], [1, -1]
])
w = Vector{T}([4/9, 1/9, 1/9, 1/9, 1/9, 1/36, 1/36, 1/36, 1/36])
else
error("Q fail")
end
lattice = Lattice{D,Q,T}(δt, δx, τ, e, w, ones(T,dims), zeros(SVector{D,T},dims))
end
@inbounds function advance!(lattice::Lattice{D,Q,T}) where {D,Q,T<:AbstractFloat}
@unpack δt, δx, τ, e, w, ρ, u = lattice
NX, NY = size(u)
for y 1 : NY
for x 1 : NX
cs = one(T) / sqrt(T(3))
eq0 = zero(MVector{Q,T})
# predictor step
ρ0, u0 = zero(T), zero(SVector{D,T})
for q 1 : Q
i, j = Int(x-δt*e[q][1]), Int(y-δt*e[q][2])
if i < 1 i = NX
elseif i > NX i = 1 end
if j < 1 j = NY
elseif j > NY j = 1 end
uij = u[i,j]
uu = dot(uij, uij)
eu = dot(e[q], uij)
eq0[q] = ρ[i,j] * w[q] * ( one(T) + eu/cs^2 + 0.5*(eu^2 - cs*uu)/cs^4 )
ρ0 += eq0[q]
u0 += eq0[q] * e[q]
end
u0 = u0 / ρ0
# corrector step
ρ[x,y] = ρ0
sumneq = zero(T)
uu = dot(u0, u0)
for q 1 : Q
eu = dot(e[q], u0)
eq1 = ρ0 * w[q] * ( one(T) + eu/cs^2 + 0.5*(eu^2 - cs*uu)/cs^4 )
sumneq += e[q] * (eq1 - eq0[q])
end
u[x,y] = u0 - (one(T) - one(T)/τ) * sumneq / ρ0
end
end
nothing
end
function advance_gpu!::CuDeviceArray{T,D}, u::CuDeviceArray{SVector{D,T},D},
δt::T, δx::T, τ::T, e::CuDeviceVector{SVector{D,T}}, w::CuDeviceVector{T}) where {D,Q,T<:AbstractFloat}
x = (blockIdx().x-1) * blockDim().x + threadIdx().x
y = (blockIdx().y-1) * blockDim().y + threadIdx().y
NX, NY = size(u)
cs = one(T) / sqrt(T(3))
eq0 = zero(MVector{Q,T})
# predictor step
ρ0, u0 = zero(T), zero(SVector{D,T})
for q 1 : Q
i, j = Int(x-δt*e[q][1]), Int(y-δt*e[q][2])
if i < 1 i = NX
elseif i > NX i = 1 end
if j < 1 j = NY
elseif j > NY j = 1 end
uij = u[i,j]
uu = dot(uij, uij)
eu = dot(e[q], uij)
eq0[q] = ρ[i,j] * w[q] * ( one(T) + eu/cs^2 + 0.5*(eu^2 - cs*uu)/cs^4 )
ρ0 += eq0[q]
u0 += eq0[q] * e[q]
end
u0 = u0 / ρ0
# corrector step
ρ[x,y] = ρ0
sumneq = zero(T)
uu = dot(u0, u0)
for q 1 : Q
eu = dot(e[q], u0)
eq1 = ρ0 * w[q] * ( one(T) + eu/cs^2 + 0.5*(eu^2 - cs*uu)/cs^4 )
sumneq += e[q] * (eq1 - eq0[q])
end
u[x,y] = u0 - (one(T) - one(T)/τ) * sumneq / ρ0
nothing
end
function advance_gpu!(lattice::Lattice{D,Q,T}, timesteps=1) where {D,Q,T<:AbstractFloat}
@unpack δt, δx, τ, e, w, ρ, u = lattice
dev_e = CuArray(e)
dev_w = CuArray(w)
dev_ρ = CuArray(ρ)
dev_u = CuArray(u)
for t 1 : timesteps
@cuda threads=(8,8) advance_gpu!(dev_ρ, dev_u, δt, δx, τ, dev_e, dev_w)
end
copyto!(lattice.ρ, dev_ρ)
copyto!(lattice.u, dev_u)
nothing
end
function test()
N = 256
δx = 1
δt = 1
τ = 0.65
D, Q = 2, 9
lattice = Lattice{D,Q,Float32}(N; δt=δt, δx=δx, τ=τ)
end
lattice = test()
advance_gpu!(lattice)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment