Sequential + Parallel Root Finding using Bisection Method, powered by SYCL DPC++
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <CL/sycl.hpp> | |
#include <array> | |
#include <chrono> | |
#include <cmath> | |
#include <iostream> | |
using namespace sycl; | |
constexpr uint N = 32; | |
float C = powf(10.0, -5.0); | |
float f(float x) { return x * x * x - 4.0 * x - 5.0; } | |
float bisect(queue &Q, buffer<float, 1> &args, buffer<float, 1> &evals) { | |
while (true) { | |
Q.submit([&](handler &h) { | |
accessor dacc_args{args, h, read_write}; | |
accessor dacc_evals{evals, h, read_write}; | |
h.parallel_for(nd_range<1>{{N}, {N}}, [=](nd_item<1> item) { | |
int idx = item.get_global_id()[0]; | |
float idx_f = static_cast<float>(idx); | |
if (idx == 0) { | |
dacc_evals[idx] = f(dacc_args[0]); | |
} else if (idx == N - 1) { | |
dacc_evals[idx] = f(dacc_args[1]); | |
} else { | |
dacc_evals[idx] = f(dacc_args[0] + idx_f * dacc_args[2]); | |
} | |
item.barrier(); | |
if (idx != 0 && dacc_evals[idx - 1] * dacc_evals[idx] < 0.0) { | |
float a = dacc_args[0] + (idx_f - 1.0) * dacc_args[2]; | |
float b = a + dacc_args[2]; | |
dacc_args[0] = a; | |
dacc_args[1] = b; | |
dacc_args[2] = (b - a) / float(N - 1); | |
} | |
}); | |
}); | |
host_accessor hacc_args{args, read_only}; | |
float a = hacc_args[0]; | |
float b = hacc_args[1]; | |
if (std::abs(b - a) < C) { | |
break; | |
} | |
} | |
host_accessor hacc_args{args, read_only}; | |
return hacc_args[0]; | |
} | |
int main(int argc, char **argv) { | |
device D{default_selector{}}; | |
queue Q{D}; | |
std::cout << "running on: " << D.get_info<info::device::name>() << std::endl; | |
float a = -10.0; | |
float b = 10.0; | |
float s = (b - a) / static_cast<float>(N - 1); | |
std::array<float, 3> arg_arr = {a, b, s}; | |
buffer<float, 1> args{arg_arr}; | |
buffer<float, 1> evals{range<1>{N}}; | |
Q.submit([&](handler &h) { | |
accessor dacc{args, h, write_only, noinit}; | |
h.single_task([=]() { | |
dacc[0] = a; | |
dacc[1] = b; | |
dacc[2] = s; | |
}); | |
}); | |
auto start = std::chrono::steady_clock::now(); | |
float root = bisect(Q, args, evals); | |
auto end = std::chrono::steady_clock::now(); | |
std::cout << "root: " << root << ", in " | |
<< std::chrono::duration_cast<std::chrono::milliseconds>(end - | |
start) | |
.count() | |
<< "ms" << std::endl; | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::time::Instant; | |
// equation whose root being searched | |
fn f(x: f32) -> f32 { | |
x.powi(3) - 4f32 * x - 5f32 | |
} | |
// sequential bisection method implementation | |
fn bisect(f: &dyn Fn(f32) -> f32, mut a: f32, mut b: f32, c: f32) -> f32 { | |
while (b - a).abs() >= c { | |
let m = (a + b) / 2f32; | |
if f(a) * f(m) < 0f32 { | |
b = m; | |
} else { | |
a = m; | |
} | |
} | |
a | |
} | |
fn main() { | |
// approximate with max error 10 ** -5 | |
let c = 10f32.powi(-5); | |
let start = Instant::now(); | |
let a_ = bisect(&f1, -3f32, 3f32, c); | |
println!("{}, in {:?}", a_, start.elapsed()); | |
} | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
from matplotlib import pyplot as plt | |
import numpy as np | |
f = lambda x: x ** 3 - 4 * x - 5 | |
x = np.arange(-3, 3, 0.01) | |
y = f(x) | |
fig = plt.figure() | |
ax = fig.add_subplot(1, 1, 1) | |
ax.spines['left'].set_position('zero') | |
ax.spines['bottom'].set_position('zero') | |
ax.spines['top'].set_color('none') | |
ax.spines['right'].set_color('none') | |
ax.xaxis.set_ticks_position('bottom') | |
ax.yaxis.set_ticks_position('left') | |
plt.plot(x, y) | |
plt.legend(['f(x) = x^3 - 4x - 5'], loc='upper left') | |
plt.savefig('plot.png') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Introduction
If you've not yet read https://itzmeanjan.in/pages/parallel-root-finding-using-sycl-dpcpp.html, you should probably consider doing so, given this gist accompanies that document.
Here I keep both sequential & parallel implementation of bisection method, used for finding root of non-linear equation of single variable. SIMD version of it is written in DPC++.
Usage
bisection.parallel.cpp
with./a.out running on: Intel Xeon Processor (Skylake, IBRS) root: 2.45668, in 26ms
bisection.sequential.rs
, you should copy/ paste it in cargo project'smain.rs
.f(x) = x ** 3 - 4 * x - 5