Created
December 27, 2015 20:21
-
-
Save Veedrac/dbb0c07994bc7882098e to your computer and use it in GitHub Desktop.
Naïve (inaccurate, prone to overflow) floating point parser vs. Rust builtin parser benchmark
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#![feature(test)] | |
extern crate test; | |
use std::f64; | |
pub fn parse_builtin(string: &str) -> Option<f64> { | |
string.parse().ok() | |
} | |
const CACHED_POWERS_10: [f64; 29] = [ | |
1.0e-14, 1.0e-13, 1.0e-12, 1.0e-11, 1.0e-10, 1.0e-9, | |
1.0e-8, 1.0e-7, 1.0e-6, 1.0e-5, 1.0e-4, 1.0e-3, 1.0e-2, | |
1.0e-1, 1.0e0, 1.0e1, 1.0e2, 1.0e3, 1.0e4, 1.0e5, 1.0e6, | |
1.0e7, 1.0e8, 1.0e9, 1.0e10, 1.0e11, 1.0e12, 1.0e13, 1.0e14 | |
]; | |
fn is_digit(byte: u8) -> bool { | |
b'0' <= byte && byte <= b'9' | |
} | |
fn as_digit(byte: u8) -> u8 { | |
byte - b'0' | |
} | |
pub fn parse_custom(string: &str) -> Option<f64> { | |
let mut integer_part = 0; | |
let mut exponent_sign = 1; | |
let mut exponent = 0; | |
let mut decimal_exponent_offset = 0; | |
let bytes = string.as_bytes(); | |
// Remove sign, if any. | |
let (sign, mut bytes) = match bytes.split_first() { | |
Some((&b'+', rest)) => (1, rest), | |
Some((&b'-', rest)) => (-1, rest), | |
Some(_) => (1, bytes), | |
None => return None, | |
}; | |
if bytes.is_empty() { | |
return None; | |
} | |
// Now the string (if valid) starts | |
// with an integer or decimal point. | |
let mut has_decimals = false; | |
if bytes[0] == b'.' { | |
bytes = &bytes[1..]; | |
has_decimals = true; | |
// We need at least one decimal value | |
match bytes.first() { | |
Some(&b'0'...b'9') => (), | |
_ => return None, | |
} | |
} | |
else { | |
loop { | |
if bytes[0] == b'.' { | |
bytes = &bytes[1..]; | |
has_decimals = true; | |
break; | |
} | |
if !is_digit(bytes[0]) { | |
break; | |
} | |
integer_part *= 10; | |
integer_part += as_digit(bytes[0]) as i64; | |
bytes = &bytes[1..]; | |
if bytes.is_empty() { | |
break; | |
} | |
} | |
} | |
if has_decimals { | |
while let Some(&byte @ b'0'...b'9') = bytes.first() { | |
bytes = &bytes[1..]; | |
integer_part *= 10; | |
integer_part += as_digit(byte) as i64; | |
decimal_exponent_offset += 1; | |
} | |
} | |
// Now we might have the exponent | |
if !bytes.is_empty() { | |
// An exponent only starts with 'e' or 'E' | |
if !(bytes[0] == b'e' || bytes[0] == b'E') { | |
return None; | |
} | |
bytes = &bytes[1..]; | |
if bytes.is_empty() { | |
return None; | |
} | |
else if bytes[0] == b'+' { | |
bytes = &bytes[1..]; | |
} | |
else if bytes[0] == b'-' { | |
bytes = &bytes[1..]; | |
exponent_sign = -1; | |
} | |
if bytes.is_empty() { | |
return None; | |
} | |
// The rest of the string is just a natural number | |
for &byte in bytes { | |
if !is_digit(byte) { | |
return None; | |
} | |
exponent *= 10; | |
exponent += as_digit(byte) as i32; | |
} | |
} | |
let mut as_float = (sign * integer_part) as f64; | |
let exponent = exponent_sign * exponent - decimal_exponent_offset; | |
if exponent.abs() <= 14 { | |
as_float *= CACHED_POWERS_10[(exponent + 14) as usize]; | |
} | |
else { | |
as_float *= f64::powi(10.0, exponent); | |
} | |
Some(as_float) | |
} | |
#[cfg(test)] | |
mod tests { | |
use super::*; | |
use test::{Bencher, black_box}; | |
#[bench] | |
fn parse_builtin_exponent(b: &mut Bencher) { | |
b.iter(|| assert!({ | |
let t = parse_builtin(black_box("3.1453326e-4")).unwrap(); | |
3.1453325e-4 < t && t < 3.1453327e-4 | |
})); | |
} | |
#[bench] | |
fn parse_custom_exponent(b: &mut Bencher) { | |
b.iter(|| assert!({ | |
let t = parse_custom(black_box("3.1453326e-4")).unwrap(); | |
3.1453325e-4 < t && t < 3.1453327e-4 | |
})); | |
} | |
#[bench] | |
fn parse_builtin_simple(b: &mut Bencher) { | |
b.iter(|| assert!({ | |
let t = parse_builtin(black_box("31234.14533")).unwrap(); | |
31234.14532 < t && t < 31234.14534 | |
})); | |
} | |
#[bench] | |
fn parse_custom_simple(b: &mut Bencher) { | |
b.iter(|| assert!({ | |
let t = parse_custom(black_box("31234.14533")).unwrap(); | |
31234.14532 < t && t < 31234.14534 | |
})); | |
} | |
#[bench] | |
fn parse_builtin_integer(b: &mut Bencher) { | |
b.iter(|| assert!({ | |
let t = parse_builtin(black_box("3123414533")).unwrap(); | |
3123414532.0 < t && t < 3123414534.0 | |
})); | |
} | |
#[bench] | |
fn parse_custom_integer(b: &mut Bencher) { | |
b.iter(|| assert!({ | |
let t = parse_custom(black_box("3123414533")).unwrap(); | |
3123414532.0 < t && t < 3123414534.0 | |
})); | |
} | |
#[bench] | |
fn parse_builtin_decimal(b: &mut Bencher) { | |
b.iter(|| assert!({ | |
let t = parse_builtin(black_box(".3123414533")).unwrap(); | |
0.3123414532 < t && t < 0.3123414534 | |
})); | |
} | |
#[bench] | |
fn parse_custom_decimal(b: &mut Bencher) { | |
b.iter(|| assert!({ | |
let t = parse_custom(black_box(".3123414533")).unwrap(); | |
0.3123414532 < t && t < 0.3123414534 | |
})); | |
} | |
} |
Improved by rust-lang/rust#30639. Before:
test tests::parse_builtin_decimal ... bench: 95 ns/iter (+/- 8)
test tests::parse_builtin_exponent ... bench: 100 ns/iter (+/- 6)
test tests::parse_builtin_integer ... bench: 62 ns/iter (+/- 7)
test tests::parse_builtin_simple ... bench: 97 ns/iter (+/- 1)
test tests::parse_custom_decimal ... bench: 20 ns/iter (+/- 1)
test tests::parse_custom_exponent ... bench: 20 ns/iter (+/- 1)
test tests::parse_custom_integer ... bench: 19 ns/iter (+/- 1)
test tests::parse_custom_simple ... bench: 18 ns/iter (+/- 1)
After:
test tests::parse_builtin_decimal ... bench: 64 ns/iter (+/- 4)
test tests::parse_builtin_exponent ... bench: 69 ns/iter (+/- 2)
test tests::parse_builtin_integer ... bench: 60 ns/iter (+/- 5)
test tests::parse_builtin_simple ... bench: 63 ns/iter (+/- 6)
test tests::parse_custom_decimal ... bench: 20 ns/iter (+/- 1)
test tests::parse_custom_exponent ... bench: 21 ns/iter (+/- 1)
test tests::parse_custom_integer ... bench: 18 ns/iter (+/- 1)
test tests::parse_custom_simple ... bench: 19 ns/iter (+/- 2)
More improvements incoming but from now on it will probably consist of "shuffling code around to help the compiler" :(
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I thought I could get it down to ~10ns, but that was before I'd finished it.