Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
Optimized Julia code for CoMD cell method variant
include("initAtoms.jl")
include("simflat.jl")
include("ljforce.jl")
using ArgParse
# Translation of CoMD to Julia
function initValidate(sim)
ke = sim.eKinetic
pe = sim.ePot
natoms = sim.atoms.nGlobal
@printf "Initial PE (cohesive energy) : %f\n" pe/natoms
@printf "Initial energy : %f atom count : %d\n" (ke+pe)/natoms natoms
end
firstCall = true
iStepPrev = -1
function printInfo(sim, iStep, elapsedTime)
global firstCall, iStepPrev
if firstCall
firstCall = false
@printf "%100s\n" "Perfomance"
@printf "#%6s %10s %18s %18s %18s %12s %12s %10s\n" "Loop" "Time(fs)" "Total Energy" "Potential Energy" "Kinetic Energy" "Temperature" "(us/atom)" "# Atoms"
end
nEval = iStep - iStepPrev
iStepPrev = iStep
simtime = iStep * sim.dt
n = sim.atoms.nGlobal
eTotal = (sim.ePot + sim.eKinetic)/n
eK = sim.eKinetic/n
eU = sim.ePot /n
Temp = (sim.eKinetic/n)/(kB_eV * 1.5)
timePerAtom = 1.0e6 * elapsedTime/(n*nEval)
@printf "%6d %10.2f %18.12f %18.12f %18.12f %12.4f %10.4f %10d\n" iStep simtime eTotal eU eK Temp timePerAtom n
end
function printPerformanceResults(sim, elapsedTime)
n = sim.atoms.nGlobal
nEval = sim.nSteps
timePerAtom = 1.0e6 * elapsedTime/(n*nEval)
@printf "\nAverage all atom update rate: %10.f us/atom\n" timePerAtom
end
function run_comd()
cmd = parseCommandLine()
sim = initSimulation(cmd)
@code_llvm sim.pot.forceFunc(sim.pot, sim)
sim.eKinetic = kineticEnergy(sim.atoms, sim.boxes, sim.species)
initValidate(sim)
timestepTimeOneIteration = 0.0
timestepTime= 0.0
iStep = 0
for jStep in 1:sim.printRate:sim.nSteps
printInfo(sim, iStep, timestepTimeOneIteration)
tic()
timestep(sim, sim.printRate, sim.dt)
timestepTimeOneIteration = toq()
timestepTime += timestepTimeOneIteration
iStep += sim.printRate
end
printInfo(sim, iStep, timestepTimeOneIteration)
printPerformanceResults(sim, timestepTime)
end
function parseCommandLine()
s = ArgParseSettings()
@add_arg_table s begin
"--nx", "-x"
help = "number of unit cells in x"
arg_type = Int
default = 10
"--ny", "-y"
help = "number of unit cells in y"
arg_type = Int
default = 10
"--nz", "-z"
help = "number of unit cells in z"
arg_type = Int
default = 10
"--nSteps", "-N"
help = "total number of time steps"
arg_type = Int
default = 100
"--printRate", "-n"
help = "number of steps between output"
arg_type = Int
default = 10
"--dt", "-D"
help = "time step (in fs)"
arg_type = Float
default = 1.0
"--lat", "-l"
help = "lattice parameter (Angstroms)"
arg_type = Float
default = -1.0
"--temp", "-T"
help = "initial temperature (K)"
arg_type = Float
default = 600.0
end
return parse_args(s)
end
run_comd()
#@profile run_comd()
#s = open("prof4.txt","w")
#Profile.print(s)
#close(s)
include("constants.jl")
type LinkCell
MAXATOMS::Int
nLocalBoxes::Int
nTotalBoxes::Int
nAtoms::Array{Int}
gridSize::Array{Int}
invBoxSize::Array{Float}
end
function initLinkCells(pot, box_size)
gridSize = zeros(Int, 3)
for i in 1:3
gridSize[i] = floor(Int, box_size[i]/pot.cutoff)
end
boxSize = [box_size[i]*1.0/gridSize[i] for i in 1:3]
nLocalBoxes = gridSize[1] * gridSize[2] * gridSize[3]
nHaloBoxes = 2*((gridSize[1] + 2) * (gridSize[2] + gridSize[3] + 2) + (gridSize[2] * gridSize[3]))
nTotalBoxes = nLocalBoxes + nHaloBoxes
invBoxSize = zeros(Float, 3)
for i in 1:3
invBoxSize[i] = 1.0/boxSize[i]
end
nAtoms = zeros(Int, nTotalBoxes)
LinkCell(64, nLocalBoxes, nTotalBoxes, nAtoms, gridSize, invBoxSize)
end
function getBoxFromTuple(boxes, ix, iy, iz)
gs = boxes.gridSize
n = boxes.nLocalBoxes
iBox = 0
# Halo in Z+
if iz == gs[3]
iBox = n + 2*gs[3]*gs[2] + 2*gs[3]*(gs[1]+2) + (gs[1]+2)*(gs[2]+2) + (gs[1]+2)*(iy+1) + (ix+1)
# Halo in Z-
elseif iz == -1
iBox = n + 2*gs[3]*gs[2] + 2*gs[3]*(gs[1]+2) + (gs[1]+2)*(iy+1) + (ix+1)
# Halo in Y+
elseif iy == gs[2]
iBox = n + 2*gs[3]*gs[2] + gs[3]*(gs[1]+2) + (gs[1]+2)*iz + (ix+1)
# Halo in Y-
elseif iy == -1
iBox = n + 2*gs[3]*gs[2] + iz*(gs[1]+2) + (ix+1)
# Halo in X+
elseif ix == gs[1]
iBox = n + gs[2]*gs[3] + iz*gs[2] + iy
# Halo in X-
elseif ix == -1
iBox = n + iz*gs[2] + iy
# Local link cell
else
iBox = ix + gs[1]*iy + gs[1]*gs[2]*iz
end
# One-based indexing
iBox += 1
if iBox > boxes.nTotalBoxes
println("iBox too large: $iBox, Total Boxes = $(boxes.nTotalBoxes)")
println(" ix,iy,iz $ix, $iy, $iz")
println(" gridSize $(boxes.gridSize)")
end
return iBox
end
function getBoxFromCoord(boxes, r)
ix = floor(Int, r[1]*boxes.invBoxSize[1])
iy = floor(Int, r[2]*boxes.invBoxSize[2])
iz = floor(Int, r[3]*boxes.invBoxSize[3])
getBoxFromTuple(boxes, ix, iy, iz)
end
function putAtomInBox(boxes, atoms, gid, iType, r, p=[0.0, 0.0, 0.0])
#println("x,y,z = $x,$y,$z")
iBox = getBoxFromCoord(boxes, r)
iOff = (iBox-1)*boxes.MAXATOMS
iOff += boxes.nAtoms[iBox] + 1
if iBox < boxes.nLocalBoxes
atoms.nLocal += 1
end
boxes.nAtoms[iBox] += 1
atoms.gid[iOff] = gid
atoms.species[iOff] = iType
for k in 1:3
atoms.r[k,iOff] = r[k]
atoms.p[k,iOff] = p[k]
end
end
function getTuple(boxes, iBox)
ix = 0
iy = 0
iz = 0
gs = boxes.gridSize
iBox = iBox-1 # One-based indexing
# local box
if iBox < boxes.nLocalBoxes
ix = iBox % gs[1]
iBox = div(iBox, gs[1])
iy = iBox % gs[2]
iz = div(iBox, gs[2])
else # halo box
ink = iBox - boxes.nLocalBoxes
if ink < 2*gs[2]*gs[3]
if ink < gs[2]*gs[3]
ix = 0
else
ink -= gs[2]*gs[3]
ix = gs[1] + 1
end
iy = 1 + ink % gs[2]
iz = 1 + div(ink, gs[2])
elseif ink < 2*gs[3]*(gs[2] + gs[1] + 2)
ink -= 2*gs[3]*gs[2]
if ink < (gs[1]+2)*gs[3]
iy = 0
else
ink -= (gs[1]+2)*gs[3]
iy = gs[2]+1
end
ix = ink % (gs[2] + 2)
iz = 1 + div(ink, gs[1] + 2)
else
ink -= 2*gs[3]*(gs[2] + gs[1] + 2)
if ink < (gs[1] + 2)*(gs[2] + 2)
iz = 0
else
ink -= (gs[1] + 2)*(gs[2] + 2)
iz = gs[3] + 1
end
ix = ink %(gs[1] + 2)
iy = div(ink, gs[1] + 2)
end
ix -= 1
iy -= 1
iz -= 1
end
return ix,iy,iz
end
function getNeighborBoxes(boxes, iBox, nbrBoxes)
ix, iy, iz = getTuple(boxes, iBox)
count = 0
for i in ix-1:ix+1
for j in iy-1:iy+1
for k in iz-1:iz+1
count += 1
nbrBoxes[count] = getBoxFromTuple(boxes, i, j, k)
end
end
end
return count
end
function copyAtom(boxes, atoms, iAtom, iBox, jAtom, jBox)
iOff = boxes.MAXATOMS * (iBox-1) + iAtom
jOff = boxes.MAXATOMS * (jBox-1) + jAtom
atoms.gid[jOff] = atoms.gid[iOff]
atoms.species[jOff] = atoms.species[iOff]
atoms.r[:,jOff] = atoms.r[:,iOff]
atoms.p[:,jOff] = atoms.p[:,iOff]
atoms.f[:,jOff] = atoms.f[:,iOff]
end
function moveAtom(boxes, atoms, iId, iBox, jBox)
nj = boxes.nAtoms[jBox] + 1
copyAtom(boxes, atoms, iId, iBox, nj, jBox)
boxes.nAtoms[jBox] += 1
boxes.nAtoms[iBox] -= 1
ni = boxes.nAtoms[iBox] + 1
if ni != 0
copyAtom(boxes, atoms, ni, iBox, iId, iBox)
end
if jBox >= boxes.nLocalBoxes
atoms.nLocal -= 1
end
end
function emptyHaloCells(boxes)
for ii in boxes.nLocalBoxes+1 : boxes.nTotalBoxes
boxes.nAtoms[ii] = 0
end
end
function updateLinkCells(atoms, boxes)
emptyHaloCells(boxes)
for iBox in 1:boxes.nLocalBoxes
iOff = (iBox-1) * boxes.MAXATOMS
ii = 1
while ii <= boxes.nAtoms[iBox]
jBox = getBoxFromCoord(boxes, atoms.r[:,iOff + ii])
if jBox != iBox
moveAtom(boxes, atoms, ii, iBox, jBox)
else
ii += 1
end
end
end
end
include("constants.jl")
using Devectorize
type LJ_Pot
sigma::Float
epsilon::Float
mass::Float
lat::Float
lattice_type::String
cutoff::Float
name::String
atomic_no::Int
forceFunc
end
function init_lj_pot()
sigma = 2.315
epsilon = 0.167
mass = 63.55 * amuToInternalMass
lat = 3.615
lattice_type = "FCC"
cutoff = 2.5*sigma
name = "Cu"
atomic_no = 29
return LJ_Pot(sigma, epsilon, mass, lat, lattice_type, cutoff, name, atomic_no, computeForce)
end
@fastmath function computeForce(pot, sim)
POT_SHIFT = 1.0
atoms = sim.atoms
rCut2 = pot.cutoff * pot.cutoff
for iBox in 1:sim.boxes.nLocalBoxes
iOff = sim.boxes.MAXATOMS * (iBox-1) + 1
for ii in 1:sim.boxes.nAtoms[iBox]
#@devec atoms.f[:, iOff] = 0.0
for k in 1:3
atoms.f[k, iOff] = 0.0
end
iOff += 1
end
end
ePot = 0.0
s6 = pot.sigma * pot.sigma * pot.sigma * pot.sigma * pot.sigma * pot.sigma
rCut6 = s6/(rCut2 * rCut2 * rCut2)
eShift = POT_SHIFT * rCut6 * (rCut6 - 1.0)
nbrBoxes = zeros(Int, 27)
# Hoist allocation of temporary out of loop
dd = zeros(Float, 3)
for iBox in 1:sim.boxes.nLocalBoxes
nIBox = sim.boxes.nAtoms[iBox]
nNbrBoxes = getNeighborBoxes(sim.boxes, iBox, nbrBoxes)
for jTmp in 1:nNbrBoxes
jBox = nbrBoxes[jTmp]
nJBox = sim.boxes.nAtoms[jBox]
if nJBox == 0
continue
end
for ii in 1:nIBox
iOff = sim.boxes.MAXATOMS * (iBox-1) + ii
iId = atoms.gid[iOff]
for jj in 1:nJBox
jOff = sim.boxes.MAXATOMS * (jBox-1) + jj
jId = atoms.gid[jOff]
if (jBox <= sim.boxes.nLocalBoxes) && (jId <= iId)
continue
end
# improves performance, but allocation for dd is
# not hoisted out of the loop
#@devec dd = atoms.r[:, iOff] - atoms.r[:,jOff]
# Use temporary
for k in 1:3
dd[k] = atoms.r[k, iOff] - atoms.r[k,jOff]
end
# Makes function call
#r2 = dot(dd, dd)
# Loop is faster for short vector
r2 = 0.0
for k in 1:3
r2 += dd[k]*dd[k]
end
if r2 > rCut2
continue
end
ir2 = 1.0/r2
r6 = s6 * (ir2*ir2*ir2)
eLocal = r6*(r6 - 1.0) - eShift
if jBox <= sim.boxes.nLocalBoxes
ePot += eLocal
else
ePot += 0.5*eLocal
end
fr = -4.0 * pot.epsilon * r6 * ir2*(12.0*r6 - 6.0)
# Slowest
#atoms.f[:,iOff] -= dd*fr
#atoms.f[:,jOff] += dd*fr
# Faster. The operation must be '.*' instead of '*'
#@devec atoms.f[:,iOff] -= dd.*fr
#@devec atoms.f[:,jOff] += dd.*fr
# Fastest
for k in 1:3
atoms.f[k,iOff] -= dd[k]*fr
atoms.f[k,jOff] += dd[k]*fr
end
end
end
end
end
ePot = ePot*4.0*pot.epsilon
sim.ePot = ePot
end
include("initAtoms.jl")
include("constants.jl")
include("ljforce.jl")
include("linkcell.jl")
include("halo.jl")
type Species
mass::Float
end
Species() = Species(1.0)
function initSpecies(pot)
[Species(pot.mass)]
end
type SimFlat
nSteps::Int
printRate::Int
dt::Float
atoms::Atoms
species::Array{Species}
pot::LJ_Pot
ePot::Float
eKinetic::Float
boxes::LinkCell
halo::HaloExchange
end
function dumpPos(atoms, boxes)
for iBox in 1:boxes.nLocalBoxes
iOff = boxes.MAXATOMS * (iBox-1) + 1
for ii in 1:boxes.nAtoms[iBox]
@printf "%d: %20.10f %20.10f %20.10f\n" iOff atoms.r[1, iOff] atoms.r[2,iOff] atoms.r[3,iOff]
iOff += 1
end
end
end
function dumpVel(atoms, boxes)
for iBox in 1:boxes.nLocalBoxes
iOff = boxes.MAXATOMS * (iBox-1) + 1
for ii in 1:boxes.nAtoms[iBox]
@printf "%d: %20.10f %20.10f %20.10f\n" iOff atoms.p[1, iOff] atoms.p[2,iOff] atoms.p[3,iOff]
iOff += 1
end
end
end
function dumpForce(atoms, boxes)
for iBox in 1:boxes.nLocalBoxes
iOff = boxes.MAXATOMS * (iBox-1) + 1
for ii in 1:boxes.nAtoms[iBox]
@printf "%d: %20.10f %20.10f %20.10f\n" iOff atoms.f[1, iOff] atoms.f[2,iOff] atoms.f[3,iOff]
iOff += 1
end
end
end
function initSimulation(cmd)
nSteps = cmd["nSteps"]
dt = cmd["dt"]
printRate = cmd["printRate"]
nx = cmd["nx"]
ny = cmd["ny"]
nz = cmd["nz"]
temp = cmd["temp"]
pot = init_lj_pot()
species = initSpecies(pot)
latticeConstant = cmd["lat"]
if latticeConstant < 0.0
latticeConstant = pot.lat
end
box_size = [nx, ny, nz]*latticeConstant
boxes = initLinkCells(pot, box_size)
halo = initHalo(boxes)
atoms = initAtoms(boxes.nTotalBoxes * boxes.MAXATOMS)
setBounds(atoms, nx, ny, nz, latticeConstant)
createFccLattice(nx, ny, nz, latticeConstant, atoms, boxes)
setTemperature(atoms, boxes, species, temp)
redistributeAtoms(atoms, boxes, halo)
return SimFlat(nSteps, printRate, dt, atoms, species, pot, 0.0, 0.0, boxes, halo)
end
function kineticEnergy(atoms, boxes, species)
ke = 0.0
for iBox in 1:boxes.nLocalBoxes
iOff = boxes.MAXATOMS * (iBox-1) + 1
for ii in 1:boxes.nAtoms[iBox]
iSpecies = atoms.species[iOff]
invMass = 0.5/species[iSpecies].mass
ke += invMass * dot(atoms.p[:,iOff], atoms.p[:,iOff])
iOff += 1
end
end
return ke
end
function computeVcm(atoms, boxes, species)
vcm = zeros(Float, 3)
totalMass = 0.0
for iBox in 1:boxes.nLocalBoxes
iOff = boxes.MAXATOMS * (iBox-1) + 1
for ii in 1:boxes.nAtoms[iBox]
vcm += atoms.p[:,iOff]
iSpecies = atoms.species[iOff]
totalMass += species[iSpecies].mass
iOff += 1
end
end
vcm = vcm/totalMass
end
function setVcm(atoms, boxes, species, newVcm)
oldVcm = computeVcm(atoms, boxes, species)
shift = newVcm - oldVcm
for iBox in 1:boxes.nLocalBoxes
iOff = boxes.MAXATOMS * (iBox-1) + 1
for ii in 1:boxes.nAtoms[iBox]
iSpecies = atoms.species[iOff]
mass = species[iSpecies].mass
atoms.p[:,iOff] += mass*shift
iOff += 1
end
end
end
function setTemperature(atoms, boxes, species, temperature)
for iBox in 1:boxes.nLocalBoxes
iOff = boxes.MAXATOMS * (iBox-1) + 1
for ii in 1:boxes.nAtoms[iBox]
iSpecies = atoms.species[iOff]
mass = species[iSpecies].mass
sigma = sqrt(kB_eV * temperature/mass)
atoms.p[:,iOff] = mass * sigma * randn(3)
iOff += 1
end
end
if temperature == 0.0
return
end
vZero = zeros(Float, 3)
setVcm(atoms, boxes, species, vZero)
ke = kineticEnergy(atoms, boxes, species)
temp = (ke/atoms.nGlobal)/kB_eV/1.5
scaleFactor = sqrt(temperature/temp)
for iBox in 1:boxes.nLocalBoxes
iOff = boxes.MAXATOMS * (iBox-1) + 1
for ii in 1:boxes.nAtoms[iBox]
atoms.p[:,iOff] *= scaleFactor
iOff += 1
end
end
end
function advanceVelocity(atoms, boxes, dt)
for iBox in 1:boxes.nLocalBoxes
iOff = boxes.MAXATOMS * (iBox-1) + 1
for ii in 1:boxes.nAtoms[iBox]
#atoms.p[:,iOff] += dt*atoms.f[:,iOff]
for k in 1:3
atoms.p[k,iOff] += dt*atoms.f[k,iOff]
end
iOff += 1
end
end
end
function advancePosition(sim, dt)
for iBox in 1:sim.boxes.nLocalBoxes
iOff = sim.boxes.MAXATOMS * (iBox-1) + 1
for ii in 1:sim.boxes.nAtoms[iBox]
iSpecies = sim.atoms.species[iOff]
invMass = 1.0/sim.species[iSpecies].mass
for k in 1:3
sim.atoms.r[k,iOff] += dt*sim.atoms.p[k,iOff] * invMass
end
iOff += 1
end
end
end
function timestep(sim, nSteps, dt)
for ii in 1:nSteps
advanceVelocity(sim.atoms, sim.boxes, 0.5*dt)
advancePosition(sim, dt)
redistributeAtoms(sim.atoms, sim.boxes, sim.halo)
sim.pot.forceFunc(sim.pot, sim)
advanceVelocity(sim.atoms, sim.boxes, 0.5*dt)
end
sim.eKinetic = kineticEnergy(sim.atoms, sim.boxes, sim.species)
end
function redistributeAtoms(atoms, boxes, halo)
updateLinkCells(atoms, boxes)
haloExchange(halo, atoms, boxes)
natom = 0
for iBox in 1:boxes.nLocalBoxes
natom += boxes.nAtoms[iBox]
end
if natom != atoms.nGlobal
println("Number of atoms incorrect = $natom, should be $(atoms.nGlobal)")
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment