-
-
Save thomcc/a39c9bf5c7c50b0db1e5f1d4f92429a7 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pub use bstr::ByteSlice; | |
#[inline] | |
fn make_byte_set(bytes: &[u8]) -> [u8; 256] { | |
// TODO: is it worth using `[0u64; 4]` and bit-manipulation? | |
let mut members = [0u8; 256]; | |
bytes.iter().for_each(|&x| { | |
members[x as usize] = 1; | |
}); | |
members | |
} | |
pub fn skip_while<'a>(bytes: &'a [u8], set: &[u8]) -> &'a [u8] { | |
if set.len() == 0 || bytes.len() == 0 { | |
return bytes; | |
} | |
// TODO: for small sets it seems likely that nested loop is faster. | |
let matching = make_byte_set(set); | |
for (i, &b) in bytes.iter().enumerate() { | |
if matching[b as usize] == 0 { | |
return &bytes[i..]; | |
} | |
} | |
b"" | |
} | |
pub fn skip_until<'a>(bytes: &'a [u8], set: &[u8]) -> &'a [u8] { | |
if bytes.len() == 0 { | |
return bytes; | |
} | |
match set.len() { | |
0 => &[], | |
1 => memchr::memchr(set[0], bytes).map(|i| &bytes[i..]).unwrap_or_default(), | |
2 => memchr::memchr2(set[0], set[1], bytes).map(|i| &bytes[i..]).unwrap_or_default(), | |
3 => memchr::memchr3(set[0], set[2], set[3], bytes).map(|i| &bytes[i..]).unwrap_or_default(), | |
n => { | |
let matching = make_byte_set(set); | |
for (i, &b) in bytes.iter().enumerate() { | |
if matching[b as usize] != 0 { | |
return &bytes[i..]; | |
} | |
} | |
&[] | |
} | |
} | |
} | |
pub fn split_at_byte(s: &[u8], c: u8) -> Option<(&[u8], &[u8])> { | |
let idx = s.find_byte(c)?; | |
let (l, r) = s.split_at(idx); | |
Some((l, &r[1..])) | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment