Skip to content

Instantly share code, notes, and snippets.

@Veedrac
Created December 27, 2015 20:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Veedrac/dbb0c07994bc7882098e to your computer and use it in GitHub Desktop.
Save Veedrac/dbb0c07994bc7882098e to your computer and use it in GitHub Desktop.
Naïve (inaccurate, prone to overflow) floating point parser vs. Rust builtin parser benchmark
#![feature(test)]
extern crate test;
use std::f64;
pub fn parse_builtin(string: &str) -> Option<f64> {
string.parse().ok()
}
const CACHED_POWERS_10: [f64; 29] = [
1.0e-14, 1.0e-13, 1.0e-12, 1.0e-11, 1.0e-10, 1.0e-9,
1.0e-8, 1.0e-7, 1.0e-6, 1.0e-5, 1.0e-4, 1.0e-3, 1.0e-2,
1.0e-1, 1.0e0, 1.0e1, 1.0e2, 1.0e3, 1.0e4, 1.0e5, 1.0e6,
1.0e7, 1.0e8, 1.0e9, 1.0e10, 1.0e11, 1.0e12, 1.0e13, 1.0e14
];
fn is_digit(byte: u8) -> bool {
b'0' <= byte && byte <= b'9'
}
fn as_digit(byte: u8) -> u8 {
byte - b'0'
}
pub fn parse_custom(string: &str) -> Option<f64> {
let mut integer_part = 0;
let mut exponent_sign = 1;
let mut exponent = 0;
let mut decimal_exponent_offset = 0;
let bytes = string.as_bytes();
// Remove sign, if any.
let (sign, mut bytes) = match bytes.split_first() {
Some((&b'+', rest)) => (1, rest),
Some((&b'-', rest)) => (-1, rest),
Some(_) => (1, bytes),
None => return None,
};
if bytes.is_empty() {
return None;
}
// Now the string (if valid) starts
// with an integer or decimal point.
let mut has_decimals = false;
if bytes[0] == b'.' {
bytes = &bytes[1..];
has_decimals = true;
// We need at least one decimal value
match bytes.first() {
Some(&b'0'...b'9') => (),
_ => return None,
}
}
else {
loop {
if bytes[0] == b'.' {
bytes = &bytes[1..];
has_decimals = true;
break;
}
if !is_digit(bytes[0]) {
break;
}
integer_part *= 10;
integer_part += as_digit(bytes[0]) as i64;
bytes = &bytes[1..];
if bytes.is_empty() {
break;
}
}
}
if has_decimals {
while let Some(&byte @ b'0'...b'9') = bytes.first() {
bytes = &bytes[1..];
integer_part *= 10;
integer_part += as_digit(byte) as i64;
decimal_exponent_offset += 1;
}
}
// Now we might have the exponent
if !bytes.is_empty() {
// An exponent only starts with 'e' or 'E'
if !(bytes[0] == b'e' || bytes[0] == b'E') {
return None;
}
bytes = &bytes[1..];
if bytes.is_empty() {
return None;
}
else if bytes[0] == b'+' {
bytes = &bytes[1..];
}
else if bytes[0] == b'-' {
bytes = &bytes[1..];
exponent_sign = -1;
}
if bytes.is_empty() {
return None;
}
// The rest of the string is just a natural number
for &byte in bytes {
if !is_digit(byte) {
return None;
}
exponent *= 10;
exponent += as_digit(byte) as i32;
}
}
let mut as_float = (sign * integer_part) as f64;
let exponent = exponent_sign * exponent - decimal_exponent_offset;
if exponent.abs() <= 14 {
as_float *= CACHED_POWERS_10[(exponent + 14) as usize];
}
else {
as_float *= f64::powi(10.0, exponent);
}
Some(as_float)
}
#[cfg(test)]
mod tests {
use super::*;
use test::{Bencher, black_box};
#[bench]
fn parse_builtin_exponent(b: &mut Bencher) {
b.iter(|| assert!({
let t = parse_builtin(black_box("3.1453326e-4")).unwrap();
3.1453325e-4 < t && t < 3.1453327e-4
}));
}
#[bench]
fn parse_custom_exponent(b: &mut Bencher) {
b.iter(|| assert!({
let t = parse_custom(black_box("3.1453326e-4")).unwrap();
3.1453325e-4 < t && t < 3.1453327e-4
}));
}
#[bench]
fn parse_builtin_simple(b: &mut Bencher) {
b.iter(|| assert!({
let t = parse_builtin(black_box("31234.14533")).unwrap();
31234.14532 < t && t < 31234.14534
}));
}
#[bench]
fn parse_custom_simple(b: &mut Bencher) {
b.iter(|| assert!({
let t = parse_custom(black_box("31234.14533")).unwrap();
31234.14532 < t && t < 31234.14534
}));
}
#[bench]
fn parse_builtin_integer(b: &mut Bencher) {
b.iter(|| assert!({
let t = parse_builtin(black_box("3123414533")).unwrap();
3123414532.0 < t && t < 3123414534.0
}));
}
#[bench]
fn parse_custom_integer(b: &mut Bencher) {
b.iter(|| assert!({
let t = parse_custom(black_box("3123414533")).unwrap();
3123414532.0 < t && t < 3123414534.0
}));
}
#[bench]
fn parse_builtin_decimal(b: &mut Bencher) {
b.iter(|| assert!({
let t = parse_builtin(black_box(".3123414533")).unwrap();
0.3123414532 < t && t < 0.3123414534
}));
}
#[bench]
fn parse_custom_decimal(b: &mut Bencher) {
b.iter(|| assert!({
let t = parse_custom(black_box(".3123414533")).unwrap();
0.3123414532 < t && t < 0.3123414534
}));
}
}
@Veedrac
Copy link
Author

Veedrac commented Dec 27, 2015

test tests::parse_builtin_decimal  ... bench:          67 ns/iter (+/- 25)
test tests::parse_builtin_exponent ... bench:          68 ns/iter (+/- 10)
test tests::parse_builtin_integer  ... bench:          40 ns/iter (+/- 2)
test tests::parse_builtin_simple   ... bench:          67 ns/iter (+/- 8)
test tests::parse_custom_decimal   ... bench:          13 ns/iter (+/- 0)
test tests::parse_custom_exponent  ... bench:          14 ns/iter (+/- 1)
test tests::parse_custom_integer   ... bench:          14 ns/iter (+/- 3)
test tests::parse_custom_simple    ... bench:          12 ns/iter (+/- 0)

I thought I could get it down to ~10ns, but that was before I'd finished it.

@hanna-kruppe
Copy link

Improved by rust-lang/rust#30639. Before:

test tests::parse_builtin_decimal  ... bench:          95 ns/iter (+/- 8)
test tests::parse_builtin_exponent ... bench:         100 ns/iter (+/- 6)
test tests::parse_builtin_integer  ... bench:          62 ns/iter (+/- 7)
test tests::parse_builtin_simple   ... bench:          97 ns/iter (+/- 1)
test tests::parse_custom_decimal   ... bench:          20 ns/iter (+/- 1)
test tests::parse_custom_exponent  ... bench:          20 ns/iter (+/- 1)
test tests::parse_custom_integer   ... bench:          19 ns/iter (+/- 1)
test tests::parse_custom_simple    ... bench:          18 ns/iter (+/- 1)

After:

test tests::parse_builtin_decimal  ... bench:          64 ns/iter (+/- 4)
test tests::parse_builtin_exponent ... bench:          69 ns/iter (+/- 2)
test tests::parse_builtin_integer  ... bench:          60 ns/iter (+/- 5)
test tests::parse_builtin_simple   ... bench:          63 ns/iter (+/- 6)
test tests::parse_custom_decimal   ... bench:          20 ns/iter (+/- 1)
test tests::parse_custom_exponent  ... bench:          21 ns/iter (+/- 1)
test tests::parse_custom_integer   ... bench:          18 ns/iter (+/- 1)
test tests::parse_custom_simple    ... bench:          19 ns/iter (+/- 2)

More improvements incoming but from now on it will probably consist of "shuffling code around to help the compiler" :(

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment