Skip to content

Instantly share code, notes, and snippets.

@thomcc
Created July 15, 2019 05:39
Show Gist options
  • Save thomcc/a39c9bf5c7c50b0db1e5f1d4f92429a7 to your computer and use it in GitHub Desktop.
Save thomcc/a39c9bf5c7c50b0db1e5f1d4f92429a7 to your computer and use it in GitHub Desktop.
pub use bstr::ByteSlice;
#[inline]
fn make_byte_set(bytes: &[u8]) -> [u8; 256] {
// TODO: is it worth using `[0u64; 4]` and bit-manipulation?
let mut members = [0u8; 256];
bytes.iter().for_each(|&x| {
members[x as usize] = 1;
});
members
}
pub fn skip_while<'a>(bytes: &'a [u8], set: &[u8]) -> &'a [u8] {
if set.len() == 0 || bytes.len() == 0 {
return bytes;
}
// TODO: for small sets it seems likely that nested loop is faster.
let matching = make_byte_set(set);
for (i, &b) in bytes.iter().enumerate() {
if matching[b as usize] == 0 {
return &bytes[i..];
}
}
b""
}
pub fn skip_until<'a>(bytes: &'a [u8], set: &[u8]) -> &'a [u8] {
if bytes.len() == 0 {
return bytes;
}
match set.len() {
0 => &[],
1 => memchr::memchr(set[0], bytes).map(|i| &bytes[i..]).unwrap_or_default(),
2 => memchr::memchr2(set[0], set[1], bytes).map(|i| &bytes[i..]).unwrap_or_default(),
3 => memchr::memchr3(set[0], set[2], set[3], bytes).map(|i| &bytes[i..]).unwrap_or_default(),
n => {
let matching = make_byte_set(set);
for (i, &b) in bytes.iter().enumerate() {
if matching[b as usize] != 0 {
return &bytes[i..];
}
}
&[]
}
}
}
pub fn split_at_byte(s: &[u8], c: u8) -> Option<(&[u8], &[u8])> {
let idx = s.find_byte(c)?;
let (l, r) = s.split_at(idx);
Some((l, &r[1..]))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment