Skip to content

Instantly share code, notes, and snippets.

@connorskees
Created January 13, 2023 23:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save connorskees/955439d1ad62a4dcbe4f594a293c6187 to your computer and use it in GitHub Desktop.
Save connorskees/955439d1ad62a4dcbe4f594a293c6187 to your computer and use it in GitHub Desktop.
#[inline]
fn memcpy(
out_slice: &mut [u8],
base: &[u8],
) {
let chunk_size = base.len();
let mut chunks = out_slice.chunks_exact_mut(chunk_size);
while let Some(chunk) = chunks.next() {
chunk.copy_from_slice(base);
}
let rem = chunks.into_remainder();
let base = &base[..rem.len()];
rem.copy_from_slice(base);
}
#[inline]
fn transfer(
out_slice: &mut [u8],
mut source_pos: usize,
mut out_pos: usize,
match_len: usize,
out_buf_size_mask: usize,
) {
debug_assert!(out_pos > source_pos);
// special case that comes up surprisingly often. in the case that `source_pos`
// is 1 less than `out_pos`, we can say that the entire range will be the same
// value and optimize this to be a simple `memset`
//
// using `memset` here is significantly (~3x) faster than using the general case memcpy,
// since the general case is effectively a slow loop that goes 1 byte at a time for this
// case
if out_buf_size_mask == usize::MAX && source_pos.abs_diff(out_pos) == 1 {
let init = out_slice[out_pos - 1];
let end = (match_len >> 2) * 4 + out_pos;
out_slice[out_pos..end].fill(init);
out_pos = end;
source_pos = end - 1;
} else if out_buf_size_mask == usize::MAX {
let (start, out) = out_slice.split_at_mut(out_pos);
let base = &start[source_pos..];
let end = (match_len >> 2) * 4 + out_pos;
memcpy(&mut out[..end - out_pos], base);
source_pos = end - source_pos.abs_diff(out_pos);
out_pos = end;
} else {
for _ in 0..match_len >> 2 {
out_slice[out_pos] = out_slice[source_pos & out_buf_size_mask];
out_slice[out_pos + 1] = out_slice[(source_pos + 1) & out_buf_size_mask];
out_slice[out_pos + 2] = out_slice[(source_pos + 2) & out_buf_size_mask];
out_slice[out_pos + 3] = out_slice[(source_pos + 3) & out_buf_size_mask];
source_pos += 4;
out_pos += 4;
}
}
match match_len & 3 {
0 => (),
1 => out_slice[out_pos] = out_slice[source_pos & out_buf_size_mask],
2 => {
out_slice[out_pos] = out_slice[source_pos & out_buf_size_mask];
out_slice[out_pos + 1] = out_slice[(source_pos + 1) & out_buf_size_mask];
}
3 => {
out_slice[out_pos] = out_slice[source_pos & out_buf_size_mask];
out_slice[out_pos + 1] = out_slice[(source_pos + 1) & out_buf_size_mask];
out_slice[out_pos + 2] = out_slice[(source_pos + 2) & out_buf_size_mask];
}
_ => unreachable!(),
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment