Last active
August 29, 2015 13:56
-
-
Save mdakin/9232849 to your computer and use it in GitHub Desktop.
Approximate cos with simd
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import "dart:typed_data"; | |
import "dart:math"; | |
class AMath { | |
final Float32x4 a = new Float32x4.splat(0.9999932946); | |
final Float32x4 b = new Float32x4.splat(-0.4999124376); | |
final Float32x4 c = new Float32x4.splat(0.0414877472); | |
final Float32x4 d = new Float32x4.splat(-0.0012712095); | |
Float32x4 apcos(Float32x4 x) { | |
Float32x4 x2 = x * x; | |
return a + x2 * (b + x2 * (c + x2 * d)); | |
} | |
} | |
double test(Float64List rnd, int iter) { | |
int rs = rnd.length; | |
double total = 0.0; | |
for (int j = 0; j < iter; j++) { | |
for (int i = 0; i < rs; i++) { | |
total += cos(rnd[i]); | |
} | |
} | |
return total; | |
} | |
Float32x4 testp(AMath m, Float32x4List rndp, int iter) { | |
int rs = rndp.length; | |
Float32x4 total = new Float32x4.zero(); | |
for (int j = 0; j < iter; j++) { | |
for (int i = 0; i < rs; i++) { | |
total += m.apcos(rndp[i]); | |
} | |
} | |
return total; | |
} | |
void main() { | |
int rs = 1 << 20; | |
int iter = 100; | |
Float64List rnd = new Float64List(rs); | |
Random r = new Random(1); | |
// Create a random array with values 0-Pi/4 | |
for (int i = 0; i < rs; i++) { | |
rnd[i] = r.nextDouble() * PI / 4; | |
} | |
// Create packed version of the same array | |
Float32x4List rndp = new Float32x4List(rs >> 2); | |
int j = 0; | |
for (int i = 0; i < rs; i += 4) { | |
rndp[j++] = new Float32x4(rnd[i], rnd[i + 1], rnd[i + 2], rnd[i + 3]); | |
} | |
// math.cos | |
// Warm up | |
test(rnd, 1); | |
Stopwatch sw = new Stopwatch(); | |
sw.start(); | |
// Run the test | |
test(rnd, iter); | |
var e = sw.elapsedMilliseconds; | |
print("math.cos time: $e"); | |
// Approximate simd cos | |
// Warmup | |
AMath m = new AMath(); | |
testp(m, rndp, 1); | |
sw.reset(); | |
sw.start(); | |
// Run the test | |
testp(m, rndp, iter); | |
e = sw.elapsedMilliseconds; | |
print("Approx cos time: $e"); | |
double maxErr = 0.0; | |
for (int i = 0; i < rndp.length; i++) { | |
Float32x4 ap = m.apcos(rndp[i]); | |
j = i * 4; | |
Float32x4 mv = new Float32x4(cos(rnd[j]), cos(rnd[j + 1]), cos(rnd[j + 2]), | |
cos(rnd[j + 3])); | |
Float32x4 d = (ap - mv).abs(); | |
maxErr = max(maxErr, max(max(d.x, d.y), max(d.z, d.w))); | |
} | |
print("Max err: $maxErr"); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment