Skip to content

Instantly share code, notes, and snippets.

@ivan-pi
Last active February 14, 2024 15:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ivan-pi/010c98f9344fd6f0a543f3a753b13365 to your computer and use it in GitHub Desktop.
Save ivan-pi/010c98f9344fd6f0a543f3a753b13365 to your computer and use it in GitHub Desktop.
Lattice Boltzmann in 2D on a periodic domain
// example.cpp
//
// Compile with:
// icpx -fsycl -O3 -march=native example.cpp
//
#include <vector>
#include <algorithm>
#include <cassert>
#include <cstdio>
#include <string>
#include <iostream>
#include <chrono>
#include <CL/sycl.hpp>
const int N = 512;
const int NX = N;
const int NY = N;
const int num_steps = 50000;
const int num_steps_io = 10000;
const double U0 = 0.1 / std::sqrt(3);
const double nu = U0 * (double) N / 3000;
const double tau = 3*nu + 0.5;
const double omega = 1./tau;
const double w0 = 4./9.;
const double ws = 1./9.;
const double wd = 1./36.;
const std::array<double,9> w{w0,ws,ws,ws,ws,wd,wd,wd,wd};
const double PI = 4*std::atan(1.0);
struct shearlayer {
double length;
double kappa;
double delta;
double umax;
std::array<double,2> operator()(std::array<double,2> xy) {
const double x = xy[0]/length;
const double y = xy[1]/length;
const double ux = y < 0.5 ?
umax*std::tanh(kappa*(y - 0.25)) : umax*std::tanh(kappa*(0.75 - y));
const double uy = umax*delta*std::sin(2.0*PI*(x + 0.25));
return {ux,uy};
}
};
struct shearlayer layer{N,80.,0.05,U0};
template<typename T>
void writeVTK_rectilinear(
const char* filepath,
int nx, int ny,
const T rho[],
const T ux[], const T uy[]) {
std::ofstream vtkfile(filepath);
vtkfile << "# vtk DataFile Version 3.0\n";
vtkfile << "lbm grid output\n";
vtkfile << "ASCII\n";
#if 0
vtkfile << "DATASET RECTILINEAR_GRID\n";
vtkfile << "DIMENSIONS " << nx+1 << ' ' << ny+1 << " 2\n";
vtkfile << "X_COORDINATES " << nx+1 << " float\n";
for (int ix = 0; ix <= nx; ++ix) {
vtkfile << (T) ix << '\n';
}
vtkfile << "Y_COORDINATES " << ny+1 << " float\n";
for (int iy = 0; iy <= ny; ++iy) {
vtkfile << (T) iy << '\n';
}
vtkfile << "Z_COORDINATES 2 float\n";
vtkfile << (T) 0.0 << '\n';
vtkfile << (T) 1.0 << '\n';
#else
vtkfile << "DATASET STRUCTURED_POINTS\n";
vtkfile << "DIMENSIONS " << nx << ' ' << ny << " 1\n";
vtkfile << "ORIGIN 0.5 0.5 0.5\n";
vtkfile << "SPACING 1.0 1.0 0.0\n";
#endif
vtkfile << '\n';
vtkfile << "POINT_DATA " << nx*ny << '\n';
vtkfile << "SCALARS Density float 1\n";
vtkfile << "LOOKUP_TABLE default\n";
for (int iy = 0; iy < ny; ++iy) {
for (int ix = 0; ix < nx; ++ix) {
vtkfile << rho[ix*ny + iy] << '\n';
}
}
vtkfile << "VECTORS Velocity float\n";
for (int iy = 0; iy < ny; ++iy) {
for (int ix = 0; ix < nx; ++ix) {
vtkfile << ux[ix*ny+iy] << ' ' << uy[ix*nx+iy] << " 0\n";
}
}
vtkfile.close();
}
void set_equilibrium(sycl::queue &queue,
sycl::buffer<double,1> brho,
sycl::buffer<double,2> bvel,
sycl::buffer<double,2> bf) {
queue.submit([&](sycl::handler& h){
sycl::accessor f(bf,h,sycl::write_only);
sycl::accessor rho(brho,h,sycl::read_only);
sycl::accessor vel(bvel,h,sycl::read_only);
h.parallel_for(sycl::range{NX,NY},[=](sycl::id<2> id){
const std::size_t i = id[0]*NY + id[1];
const double ux = vel[0][i];
const double uy = vel[1][i];
const double usqr = ux*ux + uy*uy;
double cu[9];
cu[0] = 0;
cu[1] = ux;
cu[2] = uy;
cu[3] = -ux;
cu[4] = -uy;
cu[5] = ux + uy;
cu[6] = -ux + uy;
cu[7] = -ux - uy;
cu[8] = ux - uy;
for (int q = 0; q < 9; ++q) {
f[q][i] = rho[i]*w[q]*(1 + 3*cu[q] + 4.5*cu[q]*cu[q] - 1.5*usqr);
}
});
});
queue.wait();
}
void macros(sycl::queue &Q,
sycl::buffer<double,2> b_f,
sycl::buffer<double,1> b_rho,
sycl::buffer<double,2> b_vel) {
Q.submit([&](sycl::handler& h) {
sycl::accessor f(b_f,h,sycl::read_only);
sycl::accessor rho(b_rho,h,sycl::write_only);
sycl::accessor vel(b_vel,h,sycl::write_only);
h.parallel_for<class macro>(NX*NY,[=](sycl::id<1> i) {
const double rho_ =
((f[5][i] + f[7][i]) + (f[6][i] + f[8][i])) +
((f[1][i] + f[3][i]) + (f[2][i] + f[4][i])) + f[0][i];
rho[i] = rho_;
const double invrho = 1.0/rho_;
vel[0][i] = invrho * (((f[5][i] - f[7][i]) + (f[8][i] - f[6][i])) + (f[1][i] - f[3][i]));
vel[1][i] = invrho * (((f[5][i] - f[7][i]) + (f[6][i] - f[8][i])) + (f[2][i] - f[4][i]));
});
});
}
void collide(sycl::queue &Q,
sycl::buffer<double,2> b_f,
sycl::buffer<double,1> b_rho,
sycl::buffer<double,2> b_vel,
double omega) {
Q.submit([&](sycl::handler &h) {
//sycl::accessor rho(b_rho,h,sycl::read_only);
//sycl::accessor vel(b_vel,h,sycl::read_only);
sycl::accessor f(b_f,h,sycl::read_write);
const double w[3] = {w0, ws, wd};
const double omegabar = 1.0 - omega;
const double one_third = 1.0 / 3.0;
const double one_half = 1.0 / 2.0;
const double omega_w0 = 3.0 * omega * w[0];
const double omega_ws = 3.0 * omega * w[1];
const double omega_wd = 3.0 * omega * w[2];
h.parallel_for<class collision>(NX*NY,[=](sycl::id<1> i) {
// --- constants ---
// --- common terms and direction 0 ---
const double rho =
((f[5][i] + f[7][i]) + (f[6][i] + f[8][i])) +
((f[1][i] + f[3][i]) + (f[2][i] + f[4][i])) + f[0][i];
const double invrho = 1.0/rho;
const double velx = invrho * (((f[5][i] - f[7][i]) + (f[8][i] - f[6][i])) + (f[1][i] - f[3][i]));
const double vely = invrho * (((f[5][i] - f[7][i]) + (f[6][i] - f[8][i])) + (f[2][i] - f[4][i]));
const double velxx = velx*velx;
const double velyy = vely*vely;
const double indp = one_third - one_half * (velxx + velyy);
f[0][i] = omegabar * f[0][i] + omega_w0 * rho * indp;
// ------
const double vel_trm_13 = indp + (3./2.)*velxx;
f[1][i] = omegabar * f[1][i] + omega_ws*rho*(vel_trm_13 + velx);
f[3][i] = omegabar * f[3][i] + omega_ws*rho*(vel_trm_13 - velx);
// ------
const double vel_trm_24 = indp + (3./2.)*velyy;
f[2][i] = omegabar * f[2][i] + omega_ws * rho * (vel_trm_24 + vely);
f[4][i] = omegabar * f[4][i] + omega_ws * rho * (vel_trm_24 - vely);
// ------
const double velxpy = velx + vely;
const double vel_trm_57 = indp + (3./2.) * velxpy * velxpy;
f[5][i] = omegabar * f[5][i] + omega_wd * rho * (vel_trm_57 + velxpy);
f[7][i] = omegabar * f[7][i] + omega_wd * rho * (vel_trm_57 - velxpy);
// ------
const double velxmy = velx - vely;
const double vel_trm_68 = indp + (3./2.) * velxmy * velxmy;
f[6][i] = omegabar * f[6][i] + omega_wd * rho * (vel_trm_68 - velxmy);
f[8][i] = omegabar * f[8][i] + omega_wd * rho * (vel_trm_68 + velxmy);
});
});
}
void stream(sycl::queue &Q,
sycl::buffer<double,2> bfold,
sycl::buffer<double,2> bfnew) {
Q.submit([&](sycl::handler& h) {
sycl::accessor fold(bfold,h,sycl::read_only);
sycl::accessor fnew(bfnew,h,sycl::write_only);
h.parallel_for(sycl::range{NX,NY},[=](sycl::id<2> id) {
auto ix = id[0];
auto iy = id[1];
size_t xp1 = (ix + 1) % NX;
size_t xm1 = (ix + NX - 1) % NX;
size_t yp1 = (iy + 1) % NY;
size_t ym1 = (iy + NY - 1) % NY;
size_t curr = ix*NY + iy;
fnew[0][curr] = fold[0][curr]; /* central cell, no movement */
#if 1
fnew[1][curr] = fold[1][iy + xp1*NY]; /* east */
fnew[2][curr] = fold[2][yp1 + ix*NY]; /* north */
fnew[3][curr] = fold[3][iy + xm1*NY]; /* west */
fnew[4][curr] = fold[4][ym1 + ix*NY]; /* south */
fnew[5][curr] = fold[5][yp1 + xp1*NY]; /* north-east */
fnew[6][curr] = fold[6][yp1 + xm1*NY]; /* north-west */
fnew[7][curr] = fold[7][ym1 + xm1*NY]; /* south-west */
fnew[8][curr] = fold[8][ym1 + xp1*NY]; /* south-east */
#else
fnew[1][iy + xm1*NY] = fold[1][curr]; /* east */
fnew[2][ym1 + ix*NY] = fold[2][curr]; /* north */
fnew[3][iy + xp1*NY] = fold[3][curr]; /* west */
fnew[4][yp1 + ix*NY] = fold[4][curr]; /* south */
fnew[5][ym1 + ym1*NY] = fold[5][curr]; /* north-east */
fnew[6][ym1 + xp1*NY] = fold[6][curr]; /* north-west */
fnew[7][yp1 + xp1*NY] = fold[7][curr]; /* south-west */
fnew[8][yp1 + xm1*NY] = fold[8][curr]; /* south-east */
#endif
});
});
}
std::string filename(int n, std::string basename) {
char buffer[13];
std::snprintf(buffer, sizeof(buffer), "%08d.vtk", n);
return basename.append(buffer);
}
int main(int argc, char const *argv[])
{
sycl::device my_dev{sycl::default_selector{}};
std::cout << "Running tests on " << my_dev.get_info<sycl::info::device::name>() << ".\n";
std::vector<double> fold(NX*NY*9);
std::vector<double> fnew(NX*NY*9);
std::vector<double> rho(NX*NY);
std::vector<double> vel(NX*NY*2);
// Initial density
std::fill(rho.begin(),rho.end(),1.0);
// Initial velocity
{
double *vx = &vel[0];
double *vy = &vel[NX*NY];
for (int ix = 0; ix < NX; ++ix) {
for (int iy = 0; iy < NY; ++iy) {
const double x = (double) ix + 0.5;
const double y = (double) iy + 0.5;
auto vxy = layer({x,y});
vx[ix*NY + iy] = vxy[0];
vy[ix*NY + iy] = vxy[1];
}
}
//writeVTK_rectilinear("begin.vtk",NX,NY,rho.data(),vx,vy);
}
// Catch asynchronous exceptions
auto exception_handler = [](sycl::exception_list exceptions) {
for (std::exception_ptr const &e : exceptions) {
try {
std::rethrow_exception(e);
}
catch (sycl::exception const &e) {
std::cout << "Caught asynchronous SYCL "
"exception during execution:\n"
<< e.what() << std::endl;
}
}
};
auto start = std::chrono::steady_clock::now();
{ /* kernel region */
sycl::queue Q(my_dev, exception_handler);
sycl::buffer<double,2> b_fold{fold.data(),{9,NX*NY}};
sycl::buffer<double,2> b_fnew{fnew.data(),{9,NX*NY}};
sycl::buffer<double,1> b_rho{rho.data(),{NX*NY}};
sycl::buffer<double,2> b_vel{vel.data(),{2,NX*NY}};
set_equilibrium(Q,b_rho,b_vel,b_fold);
for (int step = 0; step < num_steps; ++step) {
if (step % 2 == 0) {
//macros(Q,b_fold,b_rho,b_vel);
collide(Q,b_fold,b_rho,b_vel,omega);
stream(Q,b_fold,b_fnew);
} else {
//macros(Q,b_fnew,b_rho,b_vel);
collide(Q,b_fnew,b_rho,b_vel,omega);
stream(Q,b_fnew,b_fold);
}
if (step % num_steps_io == 0 && num_steps_io > 0) {
if (step % 2 == 0) {
macros(Q,b_fold,b_rho,b_vel);
} else {
macros(Q,b_fnew,b_rho,b_vel);
}
sycl::host_accessor h_rho(b_rho);
sycl::host_accessor h_vel(b_vel);
double *hvx = h_vel.get_pointer();
double *hvy = hvx + NX*NY;
auto file = filename(step,"lbm");
writeVTK_rectilinear(file.c_str(),NX,NY,h_rho.get_pointer(),hvx,hvy);
}
}
} /* end kernel region */
auto end = std::chrono::steady_clock::now();
std::chrono::duration<double> elapsed_seconds = end - start;
std::cout << "MLUPS = " << double(NX)*double(NY)*double(num_steps)*1.e-6/elapsed_seconds.count() << '\n';
#if 0
{
double *r = rho.data();
double *vx = &vel[0];
double *vy = &vel[NX*NY];
writeVTK_rectilinear("end.vtk",NX,NY,r,vx,vy);
}
#endif
return 0;
}
CXX=icpx
CXXFLAGS=-Wall -g -O2 -march=native
.PHONY: all
all: example
example: example.cpp
$(CXX) -o $@ $(CXXFLAGS) -fsycl -fsycl-targets=spir64_x86_64 $<
.PHONY: clean
clean:
rm -f example
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment