Skip to content

Instantly share code, notes, and snippets.

@foota
Created May 14, 2012 18:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save foota/2695505 to your computer and use it in GitHub Desktop.
Save foota/2695505 to your computer and use it in GitHub Desktop.
Simple molecular dynamics using GPU devices
////////////////////////////////////////////////////////////////
// Simple Molecular Dynamics using GPU by nox, 2009.12.10.
//
// Perform molecular dynamics for pseudo-atoms without internal
// interactions, such as bonds, angles, dihedrals. Only vdW and
// Coulomb interactions.
#include <iostream>
#include <iomanip>
#include <fstream>
#include <sstream>
#include <algorithm>
#include <numeric>
#include <cmath>
#include <cutil_inline.h>
using namespace std;
const int BLOCK = 256;
__device__ float Distance(float* crd, int i, int j)
{
float dx = crd[i*3] - crd[j*3];
float dy = crd[i*3+1] - crd[j*3+1];
float dz = crd[i*3+2] - crd[j*3+2];
return sqrtf(dx * dx + dy * dy + dz * dz);
}
__global__ void Center(float* crd, float cx, float cy, float cz, int num_atoms)
{
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i >= num_atoms) return;
crd[i*3] -= cx;
crd[i*3+1] -= cy;
crd[i*3+2] -= cz;
}
// Calculate energies and forces
// Total energy = vdW + Coulomb
// vdW
// U = eps * [(Rij / r[i])^12 - 2 * (Rij / r[i])^6]
// F = -12 * eps / Rij * [(Rij / r[i])^13 - (Rij / r[i])^7] * r_xyz / r[i]
// Coulomb
// U = SUM_i>j qiqj / r[i]
// F = SUM_j qiqj / r[i]^3 * r_xyz
__global__ void Calc(float* crd, float* f, float* ene, float* chg, int num_atoms)
{
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i >= num_atoms) return;
const float eps = 0.2f;
const float Rij = 2.5f;
float r, rij, r12, r6, r3, r_, q, x;
float e0, f0, f1, f2;
e0 = f0 = f1 = f2 = 0.0f;
float c0 = crd[i*3];
float c1 = crd[i*3+1];
float c2 = crd[i*3+2];
float q0 = chg[i];
for (int j = 0; j < num_atoms; j++) {
if (i == j) continue;
r = Distance(crd, i, j);
r_ = 1.0f / r;
q = q0 * chg[j];
rij = Rij * r_;
r3 = rij * rij * rij;
r6 = r3 * r3;
r12 = r6 * r6;
x = ((12 * eps) * (r12 - r6) + q * r_) * r_ * r_;
e0 += eps * (r12 - 2.0f * r6) + q * r_;
f0 += x * (c0 - crd[j*3]);
f1 += x * (c1 - crd[j*3+1]);
f2 += x * (c2 - crd[j*3+2]);
}
ene[i] = e0;
f[i*3] = f0;
f[i*3+1] = f1;
f[i*3+2] = f2;
}
template<unsigned int blockSize>
__global__ void Energy(float* ene, float* oene, unsigned int n)
{
extern __shared__ float sdata[];
unsigned int tid = threadIdx.x;
unsigned int i = blockIdx.x * (blockSize * 2) + tid;
unsigned int gridSize = blockSize * 2 * gridDim.x;
sdata[tid] = 0;
while (i < n) { sdata[tid] += ene[i] + ene[i+blockSize]; i += gridSize; }
__syncthreads();
if (blockSize >= 512) { if (tid < 256) { sdata[tid] += sdata[tid+256]; } __syncthreads(); }
if (blockSize >= 256) { if (tid < 128) { sdata[tid] += sdata[tid+128]; } __syncthreads(); }
if (blockSize >= 128) { if (tid < 64) { sdata[tid] += sdata[tid+ 64]; } __syncthreads(); }
if (tid < 32) {
if (blockSize >= 64) sdata[tid] += sdata[tid+32];
if (blockSize >= 32) sdata[tid] += sdata[tid+16];
if (blockSize >= 16) sdata[tid] += sdata[tid+ 8];
if (blockSize >= 8) sdata[tid] += sdata[tid+ 4];
if (blockSize >= 4) sdata[tid] += sdata[tid+ 2];
if (blockSize >= 2) sdata[tid] += sdata[tid+ 1];
}
if (tid == 0) oene[blockIdx.x] = sdata[0];
}
__global__ void Move(float* crd, float* f, int num_atoms, float cap_range)
{
int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i >= num_atoms) return;
crd[i*3] += f[i*3] * 0.01f;
crd[i*3+1] += f[i*3+1] * 0.01f;
crd[i*3+2] += f[i*3+2] * 0.01f;
float r = crd[i*3] * crd[i*3] + crd[i*3+1] * crd[i*3+1] + crd[i*3+2] * crd[i*3+2];
float dr = r - cap_range * cap_range;
if (dr > 0.0f) {
f[i*3] = -crd[i*3] / cap_range * dr * 0.01f;
f[i*3+1] = -crd[i*3+1] / cap_range * dr * 0.01f;
f[i*3+2] = -crd[i*3+2] / cap_range * dr * 0.01f;
}
}
class SimpleMD {
private:
dim3 grid;
dim3 threads;
int width;
int num_steps;
int num_atoms;
int num_center;
float cap_range;
float *h_crd, *h_f, *h_ene, *h_chg;
float *d_crd, *d_f, *d_ene, *d_chg, *d_oene;
float tene;
public:
SimpleMD(int n, int nc, char*);
~SimpleMD();
void ReadCrd(char*);
void PrintCrd();
void CenterPosition();
unsigned int NextPow2(unsigned int x);
void GetBlocksAndThreads(int n, int& blocks, int& threads);
void TotalEnergyReduce(int threads, int blocks);
void TotalEnergy();
void Run();
};
SimpleMD::SimpleMD(int n, int nc, char* fname) : num_steps(n), num_center(nc)
{
ReadCrd(fname);
h_f = new float[num_atoms * 3];
fill(h_f, h_f + num_atoms * 3, 0.0f);
h_ene = new float[NextPow2(num_atoms)];
fill(h_ene, h_ene + NextPow2(num_atoms), 0.0f);
width = (num_atoms - 1) / BLOCK + 1;
grid.x = width;
grid.y = 1;
grid.z = 1;
threads.x = BLOCK;
threads.y = 1;
threads.z = 1;
cudaMalloc((void**)&d_crd, sizeof(float) * num_atoms * 3);
cudaMalloc((void**)&d_f, sizeof(float) * num_atoms * 3);
cudaMalloc((void**)&d_oene, sizeof(float) * num_atoms);
cudaMalloc((void**)&d_ene, sizeof(float) * NextPow2(num_atoms));
cudaMalloc((void**)&d_chg, sizeof(float) * num_atoms);
}
SimpleMD::~SimpleMD()
{
cudaFree(d_chg);
cudaFree(d_ene);
cudaFree(d_f);
cudaFree(d_crd);
delete[] h_ene;
delete[] h_f;
delete[] h_chg;
delete[] h_crd;
}
void SimpleMD::ReadCrd(char* fname)
{
fstream fs(fname, ios_base::in);
string line;
stringstream ss;
if (!fs.is_open()) {
cerr << "File open error: " << fname << endl;
exit(1);
}
getline(fs, line); cout << line << endl;
getline(fs, line); ss.str(line); ss >> num_atoms; ss.clear();
getline(fs, line); ss.str(line); ss >> cap_range; ss.clear();
h_crd = new float[num_atoms * 3];
h_chg = new float[num_atoms];
for (int i = 0; i < num_atoms; i++) {
getline(fs, line);
ss.str(line);
ss >> h_crd[i*3] >> h_crd[i*3+1] >> h_crd[i*3+2] >> h_chg[i];
ss.clear();
ss.str("");
}
fs.close();
}
void SimpleMD::PrintCrd()
{
cout << endl;
cout << num_atoms << endl;
cout << cap_range << endl;
for (int i = 0; i < num_atoms; i++) {
for (int j = 0; j < 3; j++) cout << " " << fixed << setw(10) << setprecision(6) << h_crd[i*3+j];
cout << " " << fixed << setw(8) << setprecision(4) << h_chg[i];
cout << endl;
}
}
void SimpleMD::CenterPosition()
{
float d[3];
cudaMemcpy(h_crd, d_crd, sizeof(float) * num_atoms * 3, cudaMemcpyDeviceToHost);
for (int i = 0; i < num_atoms; i++)
for (int j = 0; j < 3; j++) d[j] += h_crd[i*3+j];
for (int i = 0; i < 3; i++) d[i] /= num_atoms;
cudaMemcpy(d_crd, h_crd, sizeof(float) * num_atoms * 3, cudaMemcpyHostToDevice);
Center<<<grid, threads>>>(d_crd, d[0], d[1], d[2], num_atoms);
cudaThreadSynchronize();
}
unsigned int SimpleMD::NextPow2(unsigned int x)
{
--x;
x |= x >> 1;
x |= x >> 2;
x |= x >> 4;
x |= x >> 8;
x |= x >> 16;
return ++x;
}
void SimpleMD::GetBlocksAndThreads(int n, int& blocks, int& threads)
{
threads = (n < BLOCK * 2) ? NextPow2((n + 1) / 2) : BLOCK;
blocks = (n + (threads * 2 - 1)) / (threads * 2);
blocks = min(width, blocks);
}
void SimpleMD::TotalEnergyReduce(int threads, int blocks)
{
switch (threads) {
case 512:
Energy<512><<<blocks, threads, sizeof(float) * threads>>>(d_ene, d_oene, num_atoms); break;
case 256:
Energy<256><<<blocks, threads, sizeof(float) * threads>>>(d_ene, d_oene, num_atoms); break;
case 128:
Energy<128><<<blocks, threads, sizeof(float) * threads>>>(d_ene, d_oene, num_atoms); break;
case 64:
Energy< 64><<<blocks, threads, sizeof(float) * threads>>>(d_ene, d_oene, num_atoms); break;
case 32:
Energy< 32><<<blocks, threads, sizeof(float) * threads>>>(d_ene, d_oene, num_atoms); break;
case 16:
Energy< 16><<<blocks, threads, sizeof(float) * threads>>>(d_ene, d_oene, num_atoms); break;
case 8:
Energy< 8><<<blocks, threads, sizeof(float) * threads>>>(d_ene, d_oene, num_atoms); break;
case 4:
Energy< 4><<<blocks, threads, sizeof(float) * threads>>>(d_ene, d_oene, num_atoms); break;
case 2:
Energy< 2><<<blocks, threads, sizeof(float) * threads>>>(d_ene, d_oene, num_atoms); break;
case 1:
Energy< 1><<<blocks, threads, sizeof(float) * threads>>>(d_ene, d_oene, num_atoms); break;
}
}
void SimpleMD::TotalEnergy()
{
if (num_atoms > 1024) {
int th, bl;
GetBlocksAndThreads(num_atoms, bl, th);
int s = bl;
while (s > 1) {
GetBlocksAndThreads(s, bl, th);
TotalEnergyReduce(th, bl);
s = (s + (th * 2 - 1)) / (th * 2);
}
cudaThreadSynchronize();
cudaMemcpy(&tene, d_oene, sizeof(float), cudaMemcpyDeviceToHost);
tene /= 2.0f;
} else {
cudaMemcpy(h_ene, d_ene, sizeof(float) * num_atoms, cudaMemcpyDeviceToHost);
tene = accumulate(h_ene, h_ene + num_atoms, 0.0f) / 2.0f;
}
}
void SimpleMD::Run()
{
cudaMemcpy(d_crd, h_crd, sizeof(float) * num_atoms * 3, cudaMemcpyHostToDevice);
cudaMemcpy(d_f, h_f, sizeof(float) * num_atoms * 3, cudaMemcpyHostToDevice);
cudaMemcpy(d_ene, h_ene, sizeof(float) * NextPow2(num_atoms), cudaMemcpyHostToDevice);
cudaMemcpy(d_chg, h_chg, sizeof(float) * num_atoms, cudaMemcpyHostToDevice);
for (int i = 0; i < num_steps; i++) {
if (i % num_center == 0) CenterPosition();
Calc<<<grid, threads>>>(d_crd, d_f, d_ene, d_chg, num_atoms);
cudaThreadSynchronize();
TotalEnergy();
Move<<<grid, threads>>>(d_crd, d_f, num_atoms, cap_range);
cudaThreadSynchronize();
cout << "Energy (" << setw(7) << i + 1 << "): ";
cout << fixed << setw(15) << setprecision(5) << tene << endl;
}
cudaMemcpy(h_crd, d_crd, sizeof(float) * num_atoms * 3, cudaMemcpyDeviceToHost);
cudaMemcpy(h_f, d_f, sizeof(float) * num_atoms * 3, cudaMemcpyDeviceToHost);
cudaMemcpy(h_ene, d_ene, sizeof(float) * num_atoms, cudaMemcpyDeviceToHost);
}
int main(int argc, char** argv)
{
if (argc != 3) {
cerr << "Usage: " << argv[0] << " input_file number_of_steps" << endl;
cerr << "Input: line 1 : title" << endl;
cerr << " line 2 : number of atoms" << endl;
cerr << " line 3 : radius of droplet" << endl;
cerr << " line 4-: x-crd y-crd z-crd charge" << endl;
exit(1);
}
stringstream ss;
int nstep;
cutilDeviceInit(1, argv);
ss.str(argv[2]); ss >> nstep;
int ncenter = 100;
SimpleMD md(nstep, ncenter, argv[1]);
md.PrintCrd();
md.Run();
md.PrintCrd();
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment