Skip to content

Instantly share code, notes, and snippets.

@mdakin
Last active August 29, 2015 13:56
Show Gist options
  • Save mdakin/9232849 to your computer and use it in GitHub Desktop.
Save mdakin/9232849 to your computer and use it in GitHub Desktop.
Approximate cos with simd
import "dart:typed_data";
import "dart:math";
class AMath {
final Float32x4 a = new Float32x4.splat(0.9999932946);
final Float32x4 b = new Float32x4.splat(-0.4999124376);
final Float32x4 c = new Float32x4.splat(0.0414877472);
final Float32x4 d = new Float32x4.splat(-0.0012712095);
Float32x4 apcos(Float32x4 x) {
Float32x4 x2 = x * x;
return a + x2 * (b + x2 * (c + x2 * d));
}
}
double test(Float64List rnd, int iter) {
int rs = rnd.length;
double total = 0.0;
for (int j = 0; j < iter; j++) {
for (int i = 0; i < rs; i++) {
total += cos(rnd[i]);
}
}
return total;
}
Float32x4 testp(AMath m, Float32x4List rndp, int iter) {
int rs = rndp.length;
Float32x4 total = new Float32x4.zero();
for (int j = 0; j < iter; j++) {
for (int i = 0; i < rs; i++) {
total += m.apcos(rndp[i]);
}
}
return total;
}
void main() {
int rs = 1 << 20;
int iter = 100;
Float64List rnd = new Float64List(rs);
Random r = new Random(1);
// Create a random array with values 0-Pi/4
for (int i = 0; i < rs; i++) {
rnd[i] = r.nextDouble() * PI / 4;
}
// Create packed version of the same array
Float32x4List rndp = new Float32x4List(rs >> 2);
int j = 0;
for (int i = 0; i < rs; i += 4) {
rndp[j++] = new Float32x4(rnd[i], rnd[i + 1], rnd[i + 2], rnd[i + 3]);
}
// math.cos
// Warm up
test(rnd, 1);
Stopwatch sw = new Stopwatch();
sw.start();
// Run the test
test(rnd, iter);
var e = sw.elapsedMilliseconds;
print("math.cos time: $e");
// Approximate simd cos
// Warmup
AMath m = new AMath();
testp(m, rndp, 1);
sw.reset();
sw.start();
// Run the test
testp(m, rndp, iter);
e = sw.elapsedMilliseconds;
print("Approx cos time: $e");
double maxErr = 0.0;
for (int i = 0; i < rndp.length; i++) {
Float32x4 ap = m.apcos(rndp[i]);
j = i * 4;
Float32x4 mv = new Float32x4(cos(rnd[j]), cos(rnd[j + 1]), cos(rnd[j + 2]),
cos(rnd[j + 3]));
Float32x4 d = (ap - mv).abs();
maxErr = max(maxErr, max(max(d.x, d.y), max(d.z, d.w)));
}
print("Max err: $maxErr");
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment