-
-
Save ljedrz/c5d949c01097bb76c1cbe68b019ad6ea to your computer and use it in GitHub Desktop.
String::from_utf16 using pre-allocated push loops and different capacities vs. collect()
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#![feature(test)] | |
extern crate test; | |
use test::{Bencher, black_box}; | |
use std::string::FromUtf16Error; | |
use std::char::decode_utf16; | |
const SHORT_ALL_LOW: &[u16]= &[ | |
0x0000, 0x0045, 0x0000, 0x003f, 0x0000, 0x003b, 0x0000, 0x0046, 0x0000, 0x0039, | |
0x0000, 0x003b, 0x0000, 0x0030, 0x000a, 0x0001 | |
]; | |
const MEDIUM_ALL_LOW: &[u16] = &[ | |
0x0001, 0x008b, 0x0001, 0x0098, 0x0001, 0x0088, 0x0001, 0x0091, 0x0001, 0x009b, | |
0x0001, 0x0092, 0x0020, 0x0001, 0x0095, 0x0001, 0x0093, 0x0020, 0x0001, 0x0088, | |
0x0001, 0x009a, 0x0001, 0x008d, 0x0020, 0x0001, 0x008f, 0x0001, 0x009c, 0x0001, | |
0x0092, 0x0001 | |
]; | |
const LONG_ALL_LOW: &[u16] = &[ | |
0x0001, 0x008b, 0x0001, 0x0098, 0x0001, 0x0088, 0x0001, 0x0091, 0x0001, 0x009b, | |
0x0001, 0x0092, 0x0020, 0x0001, 0x0095, 0x0001, 0x0093, 0x0020, 0x0001, 0x0088, | |
0x0001, 0x009a, 0x0001, 0x008d, 0x0020, 0x0001, 0x008f, 0x0001, 0x009c, 0x0001, | |
0x0001, 0x008b, 0x0001, 0x0098, 0x0001, 0x0088, 0x0001, 0x0091, 0x0001, 0x009b, | |
0x0001, 0x0092, 0x0020, 0x0001, 0x0095, 0x0001, 0x0093, 0x0020, 0x0001, 0x0088, | |
0x0001, 0x009a, 0x0001, 0x008d, 0x0020, 0x0001, 0x008f, 0x0001, 0x009c, 0x0001, | |
0x0092, 0x0001, 0x0092, 0x0001 | |
]; | |
const SHORT_SOME_HIGH: &[u16]= &[ | |
0x0000, 0x0045, 0xdf00, 0x003f, 0xdf00, 0x003b, 0xdf00, 0x0046, 0x0000, 0x0039, | |
0x0000, 0xdf3b, 0x0000, 0x0030, 0x000a, 0x0001 | |
]; | |
const MEDIUM_SOME_HIGH: &[u16] = &[ | |
0xdf01, 0x008b, 0x0001, 0x0098, 0x0001, 0x0088, 0x0001, 0x0091, 0xdf01, 0x009b, | |
0x0001, 0xdf92, 0x0020, 0xdf01, 0x0095, 0xdf01, 0x0093, 0x0020, 0x0001, 0x0088, | |
0xdf01, 0x009a, 0xdf01, 0x008d, 0x0020, 0x0001, 0xdf8f, 0x0001, 0x009c, 0xdf01, | |
0x0092, 0xdf01 | |
]; | |
const LONG_SOME_HIGH: &[u16] = &[ | |
0x0001, 0x008b, 0x0001, 0xdf98, 0x0001, 0x0088, 0xdf01, 0x0091, 0xdf01, 0x009b, | |
0xdf01, 0x0092, 0xdf20, 0x0001, 0xdf95, 0x0001, 0x0093, 0xdf20, 0x0001, 0x0088, | |
0x0001, 0xdf9a, 0x0001, 0x008d, 0x0020, 0xdf01, 0x008f, 0x0001, 0x009c, 0x0001, | |
0xdf01, 0x008b, 0x0001, 0x0098, 0x0001, 0x0088, 0xdf01, 0x0091, 0x0001, 0x009b, | |
0x0001, 0xdf92, 0x0020, 0x0001, 0x0095, 0x0001, 0x0093, 0xdf20, 0xdf01, 0x0088, | |
0xdf01, 0x009a, 0xdf01, 0x008d, 0x0020, 0x0001, 0xdf8f, 0x0001, 0x009c, 0xdf01, | |
0x0092, 0xdf01, 0x0092, 0xdf01 | |
]; | |
const SHORT_ALL_HIGH: &[u16]= &[ | |
0xd800, 0xdf45, 0xd800, 0xdf3f, 0xd800, 0xdf3b, 0xd800, 0xdf46, 0xd800, 0xdf39, | |
0xd800, 0xdf3b, 0xd800, 0xdf30, 0xdc0a, 0xd801 | |
]; | |
const MEDIUM_ALL_HIGH: &[u16] = &[ | |
0xd801, 0xdc8b, 0xd801, 0xdc98, 0xd801, 0xdc88, 0xd801, 0xdc91, 0xd801, 0xdc9b, | |
0xd801, 0xdc92, 0xdc20, 0xd801, 0xdc95, 0xd801, 0xdc93, 0xdc20, 0xd801, 0xdc88, | |
0xd801, 0xdc9a, 0xd801, 0xdc8d, 0xdc20, 0xd801, 0xdc8f, 0xd801, 0xdc9c, 0xd801, | |
0xdc92, 0xd801 | |
]; | |
const LONG_ALL_HIGH: &[u16] = &[ | |
0xd801, 0xdc8b, 0xd801, 0xdc98, 0xd801, 0xdc88, 0xd801, 0xdc91, 0xd801, 0xdc9b, | |
0xd801, 0xdc92, 0xdc20, 0xd801, 0xdc95, 0xd801, 0xdc93, 0xdc20, 0xd801, 0xdc88, | |
0xd801, 0xdc9a, 0xd801, 0xdc8d, 0xdc20, 0xd801, 0xdc8f, 0xd801, 0xdc9c, 0xd801, | |
0xd801, 0xdc8b, 0xd801, 0xdc98, 0xd801, 0xdc88, 0xd801, 0xdc91, 0xd801, 0xdc9b, | |
0xd801, 0xdc92, 0xdc20, 0xd801, 0xdc95, 0xd801, 0xdc93, 0xdc20, 0xd801, 0xdc88, | |
0xd801, 0xdc9a, 0xd801, 0xdc8d, 0xdc20, 0xd801, 0xdc8f, 0xd801, 0xdc9c, 0xd801, | |
0xdc92, 0xd801, 0xdc92, 0xd801 | |
]; | |
fn old(str_u16: &[u16]) -> Result<String, FromUtf16Error> { | |
String::from_utf16(str_u16) | |
} | |
fn new_len(str_u16: &[u16]) -> Result<String, ()> { | |
let mut ret = String::with_capacity(str_u16.len()); | |
for c in decode_utf16(str_u16.iter().cloned()) { | |
if let Ok(c) = c { | |
ret.push(c); | |
} else { | |
return Err(()); | |
} | |
} | |
Ok(ret) | |
} | |
fn new_len15(str_u16: &[u16]) -> Result<String, ()> { | |
let mut ret = String::with_capacity((str_u16.len() as f64 * 1.5).ceil() as usize); | |
for c in decode_utf16(str_u16.iter().cloned()) { | |
if let Ok(c) = c { | |
ret.push(c); | |
} else { | |
return Err(()); | |
} | |
} | |
Ok(ret) | |
} | |
fn new_len2(str_u16: &[u16]) -> Result<String, ()> { | |
let mut ret = String::with_capacity(str_u16.len() * 2); | |
for c in decode_utf16(str_u16.iter().cloned()) { | |
if let Ok(c) = c { | |
ret.push(c); | |
} else { | |
return Err(()); | |
} | |
} | |
Ok(ret) | |
} | |
fn new_len3(str_u16: &[u16]) -> Result<String, ()> { | |
let mut ret = String::with_capacity(str_u16.len() * 3); | |
for c in decode_utf16(str_u16.iter().cloned()) { | |
if let Ok(c) = c { | |
ret.push(c); | |
} else { | |
return Err(()); | |
} | |
} | |
Ok(ret) | |
} | |
#[bench] | |
fn bench_short_old(b: &mut Bencher) { | |
b.iter(|| (old(black_box(SHORT_ALL_LOW)), old(black_box(SHORT_SOME_HIGH)), old(black_box(SHORT_ALL_HIGH)))) | |
} | |
#[bench] | |
fn bench_medium_old(b: &mut Bencher) { | |
b.iter(|| (old(black_box(MEDIUM_ALL_LOW)), old(black_box(MEDIUM_SOME_HIGH)), old(black_box(MEDIUM_ALL_HIGH)))) | |
} | |
#[bench] | |
fn bench_long_old(b: &mut Bencher) { | |
b.iter(|| (old(black_box(LONG_ALL_LOW)), old(black_box(LONG_SOME_HIGH)), old(black_box(LONG_ALL_HIGH)))) | |
} | |
#[bench] | |
fn bench_short_new_len(b: &mut Bencher) { | |
b.iter(|| (new_len(black_box(SHORT_ALL_LOW)), new_len(black_box(SHORT_SOME_HIGH)), new_len(black_box(SHORT_ALL_HIGH)))) | |
} | |
#[bench] | |
fn bench_medium_new_len(b: &mut Bencher) { | |
b.iter(|| (new_len(black_box(MEDIUM_ALL_LOW)), new_len(black_box(MEDIUM_SOME_HIGH)), new_len(black_box(MEDIUM_ALL_HIGH)))) | |
} | |
#[bench] | |
fn bench_long_new_len(b: &mut Bencher) { | |
b.iter(|| (new_len(black_box(LONG_ALL_LOW)), new_len(black_box(LONG_SOME_HIGH)), new_len(black_box(LONG_ALL_HIGH)))) | |
} | |
#[bench] | |
fn bench_short_new_len15(b: &mut Bencher) { | |
b.iter(|| (new_len15(black_box(SHORT_ALL_LOW)), new_len15(black_box(SHORT_SOME_HIGH)), new_len15(black_box(SHORT_ALL_HIGH)))) | |
} | |
#[bench] | |
fn bench_medium_new_len15(b: &mut Bencher) { | |
b.iter(|| (new_len15(black_box(MEDIUM_ALL_LOW)), new_len15(black_box(MEDIUM_SOME_HIGH)), new_len15(black_box(MEDIUM_ALL_HIGH)))) | |
} | |
#[bench] | |
fn bench_long_new_len15(b: &mut Bencher) { | |
b.iter(|| (new_len15(black_box(LONG_ALL_LOW)), new_len15(black_box(LONG_SOME_HIGH)), new_len15(black_box(LONG_ALL_HIGH)))) | |
} | |
#[bench] | |
fn bench_short_new_len2(b: &mut Bencher) { | |
b.iter(|| (new_len2(black_box(SHORT_ALL_LOW)), new_len2(black_box(SHORT_SOME_HIGH)), new_len2(black_box(SHORT_ALL_HIGH)))) | |
} | |
#[bench] | |
fn bench_medium_new_len2(b: &mut Bencher) { | |
b.iter(|| (new_len2(black_box(MEDIUM_ALL_LOW)), new_len2(black_box(MEDIUM_SOME_HIGH)), new_len2(black_box(MEDIUM_ALL_HIGH)))) | |
} | |
#[bench] | |
fn bench_long_new_len2(b: &mut Bencher) { | |
b.iter(|| (new_len2(black_box(LONG_ALL_LOW)), new_len2(black_box(LONG_SOME_HIGH)), new_len2(black_box(LONG_ALL_HIGH)))) | |
} | |
#[bench] | |
fn bench_short_new_len3(b: &mut Bencher) { | |
b.iter(|| (new_len3(black_box(SHORT_ALL_LOW)), new_len3(black_box(SHORT_SOME_HIGH)), new_len3(black_box(SHORT_ALL_HIGH)))) | |
} | |
#[bench] | |
fn bench_medium_new_len3(b: &mut Bencher) { | |
b.iter(|| (new_len3(black_box(MEDIUM_ALL_LOW)), new_len3(black_box(MEDIUM_SOME_HIGH)), new_len3(black_box(MEDIUM_ALL_HIGH)))) | |
} | |
#[bench] | |
fn bench_long_new_len3(b: &mut Bencher) { | |
b.iter(|| (new_len3(black_box(LONG_ALL_LOW)), new_len3(black_box(LONG_SOME_HIGH)), new_len3(black_box(LONG_ALL_HIGH)))) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment