Skip to content

Instantly share code, notes, and snippets.

@itzmeanjan
Last active September 13, 2021 14:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save itzmeanjan/6ee0ec55723f8df04b7260f65bc71ff6 to your computer and use it in GitHub Desktop.
Save itzmeanjan/6ee0ec55723f8df04b7260f65bc71ff6 to your computer and use it in GitHub Desktop.
Sequential + Parallel Root Finding using Bisection Method, powered by SYCL DPC++
#include <CL/sycl.hpp>
#include <array>
#include <chrono>
#include <cmath>
#include <iostream>
using namespace sycl;
constexpr uint N = 32;
float C = powf(10.0, -5.0);
float f(float x) { return x * x * x - 4.0 * x - 5.0; }
float bisect(queue &Q, buffer<float, 1> &args, buffer<float, 1> &evals) {
while (true) {
Q.submit([&](handler &h) {
accessor dacc_args{args, h, read_write};
accessor dacc_evals{evals, h, read_write};
h.parallel_for(nd_range<1>{{N}, {N}}, [=](nd_item<1> item) {
int idx = item.get_global_id()[0];
float idx_f = static_cast<float>(idx);
if (idx == 0) {
dacc_evals[idx] = f(dacc_args[0]);
} else if (idx == N - 1) {
dacc_evals[idx] = f(dacc_args[1]);
} else {
dacc_evals[idx] = f(dacc_args[0] + idx_f * dacc_args[2]);
}
item.barrier();
if (idx != 0 && dacc_evals[idx - 1] * dacc_evals[idx] < 0.0) {
float a = dacc_args[0] + (idx_f - 1.0) * dacc_args[2];
float b = a + dacc_args[2];
dacc_args[0] = a;
dacc_args[1] = b;
dacc_args[2] = (b - a) / float(N - 1);
}
});
});
host_accessor hacc_args{args, read_only};
float a = hacc_args[0];
float b = hacc_args[1];
if (std::abs(b - a) < C) {
break;
}
}
host_accessor hacc_args{args, read_only};
return hacc_args[0];
}
int main(int argc, char **argv) {
device D{default_selector{}};
queue Q{D};
std::cout << "running on: " << D.get_info<info::device::name>() << std::endl;
float a = -10.0;
float b = 10.0;
float s = (b - a) / static_cast<float>(N - 1);
std::array<float, 3> arg_arr = {a, b, s};
buffer<float, 1> args{arg_arr};
buffer<float, 1> evals{range<1>{N}};
Q.submit([&](handler &h) {
accessor dacc{args, h, write_only, noinit};
h.single_task([=]() {
dacc[0] = a;
dacc[1] = b;
dacc[2] = s;
});
});
auto start = std::chrono::steady_clock::now();
float root = bisect(Q, args, evals);
auto end = std::chrono::steady_clock::now();
std::cout << "root: " << root << ", in "
<< std::chrono::duration_cast<std::chrono::milliseconds>(end -
start)
.count()
<< "ms" << std::endl;
return 0;
}
use std::time::Instant;
// equation whose root being searched
fn f(x: f32) -> f32 {
x.powi(3) - 4f32 * x - 5f32
}
// sequential bisection method implementation
fn bisect(f: &dyn Fn(f32) -> f32, mut a: f32, mut b: f32, c: f32) -> f32 {
while (b - a).abs() >= c {
let m = (a + b) / 2f32;
if f(a) * f(m) < 0f32 {
b = m;
} else {
a = m;
}
}
a
}
fn main() {
// approximate with max error 10 ** -5
let c = 10f32.powi(-5);
let start = Instant::now();
let a_ = bisect(&f1, -3f32, 3f32, c);
println!("{}, in {:?}", a_, start.elapsed());
}
#!/usr/bin/python3
from matplotlib import pyplot as plt
import numpy as np
f = lambda x: x ** 3 - 4 * x - 5
x = np.arange(-3, 3, 0.01)
y = f(x)
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.spines['left'].set_position('zero')
ax.spines['bottom'].set_position('zero')
ax.spines['top'].set_color('none')
ax.spines['right'].set_color('none')
ax.xaxis.set_ticks_position('bottom')
ax.yaxis.set_ticks_position('left')
plt.plot(x, y)
plt.legend(['f(x) = x^3 - 4x - 5'], loc='upper left')
plt.savefig('plot.png')
@itzmeanjan
Copy link
Author

Introduction

If you've not yet read https://itzmeanjan.in/pages/parallel-root-finding-using-sycl-dpcpp.html, you should probably consider doing so, given this gist accompanies that document.

Here I keep both sequential & parallel implementation of bisection method, used for finding root of non-linear equation of single variable. SIMD version of it is written in DPC++.

Usage

  • Make sure you've built DPC++ compiler. I found this one helpful.
  • Now compile bisection.parallel.cpp with
clang++ -fsycl bisection.parallel.cpp -o a.out
  • Run it, you'll see something like 👇
./a.out

running on: Intel Xeon Processor (Skylake, IBRS)
root: 2.45668, in 26ms
  • For running sequential version bisection.sequential.rs, you should copy/ paste it in cargo project's main.rs.
mkdir bisection_sequential
pushd bisection_sequential
cargo init

# copy content of bisection.sequential.rs to main.rs

cargo run
  • You may also run python script for plotting function f(x) = x ** 3 - 4 * x - 5
python3 plot.py

Generated plot looks like 👇

plot

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment