Created
October 2, 2015 16:26
-
-
Save markdewing/54709a0fd6a17348a7cb to your computer and use it in GitHub Desktop.
Optimized Julia code for CoMD cell method variant
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
include("initAtoms.jl") | |
include("simflat.jl") | |
include("ljforce.jl") | |
using ArgParse | |
# Translation of CoMD to Julia | |
function initValidate(sim) | |
ke = sim.eKinetic | |
pe = sim.ePot | |
natoms = sim.atoms.nGlobal | |
@printf "Initial PE (cohesive energy) : %f\n" pe/natoms | |
@printf "Initial energy : %f atom count : %d\n" (ke+pe)/natoms natoms | |
end | |
firstCall = true | |
iStepPrev = -1 | |
function printInfo(sim, iStep, elapsedTime) | |
global firstCall, iStepPrev | |
if firstCall | |
firstCall = false | |
@printf "%100s\n" "Perfomance" | |
@printf "#%6s %10s %18s %18s %18s %12s %12s %10s\n" "Loop" "Time(fs)" "Total Energy" "Potential Energy" "Kinetic Energy" "Temperature" "(us/atom)" "# Atoms" | |
end | |
nEval = iStep - iStepPrev | |
iStepPrev = iStep | |
simtime = iStep * sim.dt | |
n = sim.atoms.nGlobal | |
eTotal = (sim.ePot + sim.eKinetic)/n | |
eK = sim.eKinetic/n | |
eU = sim.ePot /n | |
Temp = (sim.eKinetic/n)/(kB_eV * 1.5) | |
timePerAtom = 1.0e6 * elapsedTime/(n*nEval) | |
@printf "%6d %10.2f %18.12f %18.12f %18.12f %12.4f %10.4f %10d\n" iStep simtime eTotal eU eK Temp timePerAtom n | |
end | |
function printPerformanceResults(sim, elapsedTime) | |
n = sim.atoms.nGlobal | |
nEval = sim.nSteps | |
timePerAtom = 1.0e6 * elapsedTime/(n*nEval) | |
@printf "\nAverage all atom update rate: %10.f us/atom\n" timePerAtom | |
end | |
function run_comd() | |
cmd = parseCommandLine() | |
sim = initSimulation(cmd) | |
@code_llvm sim.pot.forceFunc(sim.pot, sim) | |
sim.eKinetic = kineticEnergy(sim.atoms, sim.boxes, sim.species) | |
initValidate(sim) | |
timestepTimeOneIteration = 0.0 | |
timestepTime= 0.0 | |
iStep = 0 | |
for jStep in 1:sim.printRate:sim.nSteps | |
printInfo(sim, iStep, timestepTimeOneIteration) | |
tic() | |
timestep(sim, sim.printRate, sim.dt) | |
timestepTimeOneIteration = toq() | |
timestepTime += timestepTimeOneIteration | |
iStep += sim.printRate | |
end | |
printInfo(sim, iStep, timestepTimeOneIteration) | |
printPerformanceResults(sim, timestepTime) | |
end | |
function parseCommandLine() | |
s = ArgParseSettings() | |
@add_arg_table s begin | |
"--nx", "-x" | |
help = "number of unit cells in x" | |
arg_type = Int | |
default = 10 | |
"--ny", "-y" | |
help = "number of unit cells in y" | |
arg_type = Int | |
default = 10 | |
"--nz", "-z" | |
help = "number of unit cells in z" | |
arg_type = Int | |
default = 10 | |
"--nSteps", "-N" | |
help = "total number of time steps" | |
arg_type = Int | |
default = 100 | |
"--printRate", "-n" | |
help = "number of steps between output" | |
arg_type = Int | |
default = 10 | |
"--dt", "-D" | |
help = "time step (in fs)" | |
arg_type = Float | |
default = 1.0 | |
"--lat", "-l" | |
help = "lattice parameter (Angstroms)" | |
arg_type = Float | |
default = -1.0 | |
"--temp", "-T" | |
help = "initial temperature (K)" | |
arg_type = Float | |
default = 600.0 | |
end | |
return parse_args(s) | |
end | |
run_comd() | |
#@profile run_comd() | |
#s = open("prof4.txt","w") | |
#Profile.print(s) | |
#close(s) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
include("constants.jl") | |
type LinkCell | |
MAXATOMS::Int | |
nLocalBoxes::Int | |
nTotalBoxes::Int | |
nAtoms::Array{Int} | |
gridSize::Array{Int} | |
invBoxSize::Array{Float} | |
end | |
function initLinkCells(pot, box_size) | |
gridSize = zeros(Int, 3) | |
for i in 1:3 | |
gridSize[i] = floor(Int, box_size[i]/pot.cutoff) | |
end | |
boxSize = [box_size[i]*1.0/gridSize[i] for i in 1:3] | |
nLocalBoxes = gridSize[1] * gridSize[2] * gridSize[3] | |
nHaloBoxes = 2*((gridSize[1] + 2) * (gridSize[2] + gridSize[3] + 2) + (gridSize[2] * gridSize[3])) | |
nTotalBoxes = nLocalBoxes + nHaloBoxes | |
invBoxSize = zeros(Float, 3) | |
for i in 1:3 | |
invBoxSize[i] = 1.0/boxSize[i] | |
end | |
nAtoms = zeros(Int, nTotalBoxes) | |
LinkCell(64, nLocalBoxes, nTotalBoxes, nAtoms, gridSize, invBoxSize) | |
end | |
function getBoxFromTuple(boxes, ix, iy, iz) | |
gs = boxes.gridSize | |
n = boxes.nLocalBoxes | |
iBox = 0 | |
# Halo in Z+ | |
if iz == gs[3] | |
iBox = n + 2*gs[3]*gs[2] + 2*gs[3]*(gs[1]+2) + (gs[1]+2)*(gs[2]+2) + (gs[1]+2)*(iy+1) + (ix+1) | |
# Halo in Z- | |
elseif iz == -1 | |
iBox = n + 2*gs[3]*gs[2] + 2*gs[3]*(gs[1]+2) + (gs[1]+2)*(iy+1) + (ix+1) | |
# Halo in Y+ | |
elseif iy == gs[2] | |
iBox = n + 2*gs[3]*gs[2] + gs[3]*(gs[1]+2) + (gs[1]+2)*iz + (ix+1) | |
# Halo in Y- | |
elseif iy == -1 | |
iBox = n + 2*gs[3]*gs[2] + iz*(gs[1]+2) + (ix+1) | |
# Halo in X+ | |
elseif ix == gs[1] | |
iBox = n + gs[2]*gs[3] + iz*gs[2] + iy | |
# Halo in X- | |
elseif ix == -1 | |
iBox = n + iz*gs[2] + iy | |
# Local link cell | |
else | |
iBox = ix + gs[1]*iy + gs[1]*gs[2]*iz | |
end | |
# One-based indexing | |
iBox += 1 | |
if iBox > boxes.nTotalBoxes | |
println("iBox too large: $iBox, Total Boxes = $(boxes.nTotalBoxes)") | |
println(" ix,iy,iz $ix, $iy, $iz") | |
println(" gridSize $(boxes.gridSize)") | |
end | |
return iBox | |
end | |
function getBoxFromCoord(boxes, r) | |
ix = floor(Int, r[1]*boxes.invBoxSize[1]) | |
iy = floor(Int, r[2]*boxes.invBoxSize[2]) | |
iz = floor(Int, r[3]*boxes.invBoxSize[3]) | |
getBoxFromTuple(boxes, ix, iy, iz) | |
end | |
function putAtomInBox(boxes, atoms, gid, iType, r, p=[0.0, 0.0, 0.0]) | |
#println("x,y,z = $x,$y,$z") | |
iBox = getBoxFromCoord(boxes, r) | |
iOff = (iBox-1)*boxes.MAXATOMS | |
iOff += boxes.nAtoms[iBox] + 1 | |
if iBox < boxes.nLocalBoxes | |
atoms.nLocal += 1 | |
end | |
boxes.nAtoms[iBox] += 1 | |
atoms.gid[iOff] = gid | |
atoms.species[iOff] = iType | |
for k in 1:3 | |
atoms.r[k,iOff] = r[k] | |
atoms.p[k,iOff] = p[k] | |
end | |
end | |
function getTuple(boxes, iBox) | |
ix = 0 | |
iy = 0 | |
iz = 0 | |
gs = boxes.gridSize | |
iBox = iBox-1 # One-based indexing | |
# local box | |
if iBox < boxes.nLocalBoxes | |
ix = iBox % gs[1] | |
iBox = div(iBox, gs[1]) | |
iy = iBox % gs[2] | |
iz = div(iBox, gs[2]) | |
else # halo box | |
ink = iBox - boxes.nLocalBoxes | |
if ink < 2*gs[2]*gs[3] | |
if ink < gs[2]*gs[3] | |
ix = 0 | |
else | |
ink -= gs[2]*gs[3] | |
ix = gs[1] + 1 | |
end | |
iy = 1 + ink % gs[2] | |
iz = 1 + div(ink, gs[2]) | |
elseif ink < 2*gs[3]*(gs[2] + gs[1] + 2) | |
ink -= 2*gs[3]*gs[2] | |
if ink < (gs[1]+2)*gs[3] | |
iy = 0 | |
else | |
ink -= (gs[1]+2)*gs[3] | |
iy = gs[2]+1 | |
end | |
ix = ink % (gs[2] + 2) | |
iz = 1 + div(ink, gs[1] + 2) | |
else | |
ink -= 2*gs[3]*(gs[2] + gs[1] + 2) | |
if ink < (gs[1] + 2)*(gs[2] + 2) | |
iz = 0 | |
else | |
ink -= (gs[1] + 2)*(gs[2] + 2) | |
iz = gs[3] + 1 | |
end | |
ix = ink %(gs[1] + 2) | |
iy = div(ink, gs[1] + 2) | |
end | |
ix -= 1 | |
iy -= 1 | |
iz -= 1 | |
end | |
return ix,iy,iz | |
end | |
function getNeighborBoxes(boxes, iBox, nbrBoxes) | |
ix, iy, iz = getTuple(boxes, iBox) | |
count = 0 | |
for i in ix-1:ix+1 | |
for j in iy-1:iy+1 | |
for k in iz-1:iz+1 | |
count += 1 | |
nbrBoxes[count] = getBoxFromTuple(boxes, i, j, k) | |
end | |
end | |
end | |
return count | |
end | |
function copyAtom(boxes, atoms, iAtom, iBox, jAtom, jBox) | |
iOff = boxes.MAXATOMS * (iBox-1) + iAtom | |
jOff = boxes.MAXATOMS * (jBox-1) + jAtom | |
atoms.gid[jOff] = atoms.gid[iOff] | |
atoms.species[jOff] = atoms.species[iOff] | |
atoms.r[:,jOff] = atoms.r[:,iOff] | |
atoms.p[:,jOff] = atoms.p[:,iOff] | |
atoms.f[:,jOff] = atoms.f[:,iOff] | |
end | |
function moveAtom(boxes, atoms, iId, iBox, jBox) | |
nj = boxes.nAtoms[jBox] + 1 | |
copyAtom(boxes, atoms, iId, iBox, nj, jBox) | |
boxes.nAtoms[jBox] += 1 | |
boxes.nAtoms[iBox] -= 1 | |
ni = boxes.nAtoms[iBox] + 1 | |
if ni != 0 | |
copyAtom(boxes, atoms, ni, iBox, iId, iBox) | |
end | |
if jBox >= boxes.nLocalBoxes | |
atoms.nLocal -= 1 | |
end | |
end | |
function emptyHaloCells(boxes) | |
for ii in boxes.nLocalBoxes+1 : boxes.nTotalBoxes | |
boxes.nAtoms[ii] = 0 | |
end | |
end | |
function updateLinkCells(atoms, boxes) | |
emptyHaloCells(boxes) | |
for iBox in 1:boxes.nLocalBoxes | |
iOff = (iBox-1) * boxes.MAXATOMS | |
ii = 1 | |
while ii <= boxes.nAtoms[iBox] | |
jBox = getBoxFromCoord(boxes, atoms.r[:,iOff + ii]) | |
if jBox != iBox | |
moveAtom(boxes, atoms, ii, iBox, jBox) | |
else | |
ii += 1 | |
end | |
end | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
include("constants.jl") | |
using Devectorize | |
type LJ_Pot | |
sigma::Float | |
epsilon::Float | |
mass::Float | |
lat::Float | |
lattice_type::String | |
cutoff::Float | |
name::String | |
atomic_no::Int | |
forceFunc | |
end | |
function init_lj_pot() | |
sigma = 2.315 | |
epsilon = 0.167 | |
mass = 63.55 * amuToInternalMass | |
lat = 3.615 | |
lattice_type = "FCC" | |
cutoff = 2.5*sigma | |
name = "Cu" | |
atomic_no = 29 | |
return LJ_Pot(sigma, epsilon, mass, lat, lattice_type, cutoff, name, atomic_no, computeForce) | |
end | |
@fastmath function computeForce(pot, sim) | |
POT_SHIFT = 1.0 | |
atoms = sim.atoms | |
rCut2 = pot.cutoff * pot.cutoff | |
for iBox in 1:sim.boxes.nLocalBoxes | |
iOff = sim.boxes.MAXATOMS * (iBox-1) + 1 | |
for ii in 1:sim.boxes.nAtoms[iBox] | |
#@devec atoms.f[:, iOff] = 0.0 | |
for k in 1:3 | |
atoms.f[k, iOff] = 0.0 | |
end | |
iOff += 1 | |
end | |
end | |
ePot = 0.0 | |
s6 = pot.sigma * pot.sigma * pot.sigma * pot.sigma * pot.sigma * pot.sigma | |
rCut6 = s6/(rCut2 * rCut2 * rCut2) | |
eShift = POT_SHIFT * rCut6 * (rCut6 - 1.0) | |
nbrBoxes = zeros(Int, 27) | |
# Hoist allocation of temporary out of loop | |
dd = zeros(Float, 3) | |
for iBox in 1:sim.boxes.nLocalBoxes | |
nIBox = sim.boxes.nAtoms[iBox] | |
nNbrBoxes = getNeighborBoxes(sim.boxes, iBox, nbrBoxes) | |
for jTmp in 1:nNbrBoxes | |
jBox = nbrBoxes[jTmp] | |
nJBox = sim.boxes.nAtoms[jBox] | |
if nJBox == 0 | |
continue | |
end | |
for ii in 1:nIBox | |
iOff = sim.boxes.MAXATOMS * (iBox-1) + ii | |
iId = atoms.gid[iOff] | |
for jj in 1:nJBox | |
jOff = sim.boxes.MAXATOMS * (jBox-1) + jj | |
jId = atoms.gid[jOff] | |
if (jBox <= sim.boxes.nLocalBoxes) && (jId <= iId) | |
continue | |
end | |
# improves performance, but allocation for dd is | |
# not hoisted out of the loop | |
#@devec dd = atoms.r[:, iOff] - atoms.r[:,jOff] | |
# Use temporary | |
for k in 1:3 | |
dd[k] = atoms.r[k, iOff] - atoms.r[k,jOff] | |
end | |
# Makes function call | |
#r2 = dot(dd, dd) | |
# Loop is faster for short vector | |
r2 = 0.0 | |
for k in 1:3 | |
r2 += dd[k]*dd[k] | |
end | |
if r2 > rCut2 | |
continue | |
end | |
ir2 = 1.0/r2 | |
r6 = s6 * (ir2*ir2*ir2) | |
eLocal = r6*(r6 - 1.0) - eShift | |
if jBox <= sim.boxes.nLocalBoxes | |
ePot += eLocal | |
else | |
ePot += 0.5*eLocal | |
end | |
fr = -4.0 * pot.epsilon * r6 * ir2*(12.0*r6 - 6.0) | |
# Slowest | |
#atoms.f[:,iOff] -= dd*fr | |
#atoms.f[:,jOff] += dd*fr | |
# Faster. The operation must be '.*' instead of '*' | |
#@devec atoms.f[:,iOff] -= dd.*fr | |
#@devec atoms.f[:,jOff] += dd.*fr | |
# Fastest | |
for k in 1:3 | |
atoms.f[k,iOff] -= dd[k]*fr | |
atoms.f[k,jOff] += dd[k]*fr | |
end | |
end | |
end | |
end | |
end | |
ePot = ePot*4.0*pot.epsilon | |
sim.ePot = ePot | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
include("initAtoms.jl") | |
include("constants.jl") | |
include("ljforce.jl") | |
include("linkcell.jl") | |
include("halo.jl") | |
type Species | |
mass::Float | |
end | |
Species() = Species(1.0) | |
function initSpecies(pot) | |
[Species(pot.mass)] | |
end | |
type SimFlat | |
nSteps::Int | |
printRate::Int | |
dt::Float | |
atoms::Atoms | |
species::Array{Species} | |
pot::LJ_Pot | |
ePot::Float | |
eKinetic::Float | |
boxes::LinkCell | |
halo::HaloExchange | |
end | |
function dumpPos(atoms, boxes) | |
for iBox in 1:boxes.nLocalBoxes | |
iOff = boxes.MAXATOMS * (iBox-1) + 1 | |
for ii in 1:boxes.nAtoms[iBox] | |
@printf "%d: %20.10f %20.10f %20.10f\n" iOff atoms.r[1, iOff] atoms.r[2,iOff] atoms.r[3,iOff] | |
iOff += 1 | |
end | |
end | |
end | |
function dumpVel(atoms, boxes) | |
for iBox in 1:boxes.nLocalBoxes | |
iOff = boxes.MAXATOMS * (iBox-1) + 1 | |
for ii in 1:boxes.nAtoms[iBox] | |
@printf "%d: %20.10f %20.10f %20.10f\n" iOff atoms.p[1, iOff] atoms.p[2,iOff] atoms.p[3,iOff] | |
iOff += 1 | |
end | |
end | |
end | |
function dumpForce(atoms, boxes) | |
for iBox in 1:boxes.nLocalBoxes | |
iOff = boxes.MAXATOMS * (iBox-1) + 1 | |
for ii in 1:boxes.nAtoms[iBox] | |
@printf "%d: %20.10f %20.10f %20.10f\n" iOff atoms.f[1, iOff] atoms.f[2,iOff] atoms.f[3,iOff] | |
iOff += 1 | |
end | |
end | |
end | |
function initSimulation(cmd) | |
nSteps = cmd["nSteps"] | |
dt = cmd["dt"] | |
printRate = cmd["printRate"] | |
nx = cmd["nx"] | |
ny = cmd["ny"] | |
nz = cmd["nz"] | |
temp = cmd["temp"] | |
pot = init_lj_pot() | |
species = initSpecies(pot) | |
latticeConstant = cmd["lat"] | |
if latticeConstant < 0.0 | |
latticeConstant = pot.lat | |
end | |
box_size = [nx, ny, nz]*latticeConstant | |
boxes = initLinkCells(pot, box_size) | |
halo = initHalo(boxes) | |
atoms = initAtoms(boxes.nTotalBoxes * boxes.MAXATOMS) | |
setBounds(atoms, nx, ny, nz, latticeConstant) | |
createFccLattice(nx, ny, nz, latticeConstant, atoms, boxes) | |
setTemperature(atoms, boxes, species, temp) | |
redistributeAtoms(atoms, boxes, halo) | |
return SimFlat(nSteps, printRate, dt, atoms, species, pot, 0.0, 0.0, boxes, halo) | |
end | |
function kineticEnergy(atoms, boxes, species) | |
ke = 0.0 | |
for iBox in 1:boxes.nLocalBoxes | |
iOff = boxes.MAXATOMS * (iBox-1) + 1 | |
for ii in 1:boxes.nAtoms[iBox] | |
iSpecies = atoms.species[iOff] | |
invMass = 0.5/species[iSpecies].mass | |
ke += invMass * dot(atoms.p[:,iOff], atoms.p[:,iOff]) | |
iOff += 1 | |
end | |
end | |
return ke | |
end | |
function computeVcm(atoms, boxes, species) | |
vcm = zeros(Float, 3) | |
totalMass = 0.0 | |
for iBox in 1:boxes.nLocalBoxes | |
iOff = boxes.MAXATOMS * (iBox-1) + 1 | |
for ii in 1:boxes.nAtoms[iBox] | |
vcm += atoms.p[:,iOff] | |
iSpecies = atoms.species[iOff] | |
totalMass += species[iSpecies].mass | |
iOff += 1 | |
end | |
end | |
vcm = vcm/totalMass | |
end | |
function setVcm(atoms, boxes, species, newVcm) | |
oldVcm = computeVcm(atoms, boxes, species) | |
shift = newVcm - oldVcm | |
for iBox in 1:boxes.nLocalBoxes | |
iOff = boxes.MAXATOMS * (iBox-1) + 1 | |
for ii in 1:boxes.nAtoms[iBox] | |
iSpecies = atoms.species[iOff] | |
mass = species[iSpecies].mass | |
atoms.p[:,iOff] += mass*shift | |
iOff += 1 | |
end | |
end | |
end | |
function setTemperature(atoms, boxes, species, temperature) | |
for iBox in 1:boxes.nLocalBoxes | |
iOff = boxes.MAXATOMS * (iBox-1) + 1 | |
for ii in 1:boxes.nAtoms[iBox] | |
iSpecies = atoms.species[iOff] | |
mass = species[iSpecies].mass | |
sigma = sqrt(kB_eV * temperature/mass) | |
atoms.p[:,iOff] = mass * sigma * randn(3) | |
iOff += 1 | |
end | |
end | |
if temperature == 0.0 | |
return | |
end | |
vZero = zeros(Float, 3) | |
setVcm(atoms, boxes, species, vZero) | |
ke = kineticEnergy(atoms, boxes, species) | |
temp = (ke/atoms.nGlobal)/kB_eV/1.5 | |
scaleFactor = sqrt(temperature/temp) | |
for iBox in 1:boxes.nLocalBoxes | |
iOff = boxes.MAXATOMS * (iBox-1) + 1 | |
for ii in 1:boxes.nAtoms[iBox] | |
atoms.p[:,iOff] *= scaleFactor | |
iOff += 1 | |
end | |
end | |
end | |
function advanceVelocity(atoms, boxes, dt) | |
for iBox in 1:boxes.nLocalBoxes | |
iOff = boxes.MAXATOMS * (iBox-1) + 1 | |
for ii in 1:boxes.nAtoms[iBox] | |
#atoms.p[:,iOff] += dt*atoms.f[:,iOff] | |
for k in 1:3 | |
atoms.p[k,iOff] += dt*atoms.f[k,iOff] | |
end | |
iOff += 1 | |
end | |
end | |
end | |
function advancePosition(sim, dt) | |
for iBox in 1:sim.boxes.nLocalBoxes | |
iOff = sim.boxes.MAXATOMS * (iBox-1) + 1 | |
for ii in 1:sim.boxes.nAtoms[iBox] | |
iSpecies = sim.atoms.species[iOff] | |
invMass = 1.0/sim.species[iSpecies].mass | |
for k in 1:3 | |
sim.atoms.r[k,iOff] += dt*sim.atoms.p[k,iOff] * invMass | |
end | |
iOff += 1 | |
end | |
end | |
end | |
function timestep(sim, nSteps, dt) | |
for ii in 1:nSteps | |
advanceVelocity(sim.atoms, sim.boxes, 0.5*dt) | |
advancePosition(sim, dt) | |
redistributeAtoms(sim.atoms, sim.boxes, sim.halo) | |
sim.pot.forceFunc(sim.pot, sim) | |
advanceVelocity(sim.atoms, sim.boxes, 0.5*dt) | |
end | |
sim.eKinetic = kineticEnergy(sim.atoms, sim.boxes, sim.species) | |
end | |
function redistributeAtoms(atoms, boxes, halo) | |
updateLinkCells(atoms, boxes) | |
haloExchange(halo, atoms, boxes) | |
natom = 0 | |
for iBox in 1:boxes.nLocalBoxes | |
natom += boxes.nAtoms[iBox] | |
end | |
if natom != atoms.nGlobal | |
println("Number of atoms incorrect = $natom, should be $(atoms.nGlobal)") | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment