Skip to content

Instantly share code, notes, and snippets.

@dmgolembiowski
Created June 4, 2024 16:45
Show Gist options
  • Save dmgolembiowski/fde007758b7b9ed2dae171dd53804ac8 to your computer and use it in GitHub Desktop.
Save dmgolembiowski/fde007758b7b9ed2dae171dd53804ac8 to your computer and use it in GitHub Desktop.
The argument in the standard library's File API's File::read_to_end method does not support vectors backed by non-Global allocators. This recipe addresses that.
#![feature(allocator_api, core_io_borrowed_buf, read_buf)]
#![allow(unused)]
#[rustfmt::skip]
use {
tmfalloc as tm,
std::{
cmp,
io::{prelude::*, BorrowedRef, ErrorKind},
path::Path,
},
};
type RawAlloc = tm::Allocator;
type Vector = Vec<u8, RawAlloc>;
fn read_file_into<P: AsRef<Path>>(path: P, buf: &mut Vector) -> std::io::Result<usize> {
const DEFAULT_BUF_SIZE: usize = 8_192;
const PROBE_SIZE: usize = 32;
let mut initialized = 0;
let mut max_read_size = DEFAULT_BUF_SIZE;
fn small_probe_read<R: Read + ?Sized>(r: &mut R, buf: &mut Vector) -> std::io::Result<usize> {
let mut probe = [0_u8; PROBE_SIZE];
loop {
match r.read(&mut probe) {
Ok(n) => {
buf.extend_from_slice(&probe[..n]);
return Ok(n);
}
Err(ref e) if e.kind() == ErrorKind::Interrupted => continue,
Err(e) => return Err(e),
}
}
}
let mut file = std::fs::File::open(path).unwrap();
let start_len = buf.len();
let start_cap = buf.capacity();
if buf.capacity() - buf.len() < PROBE_SIZE {
let read = small_probe_read(&mut file, buf)?;
if read == 0 {
return Ok(0);
}
}
loop {
if buf.len() == buf.capacity() && buf.capacity() == start_cap {
let read = small_probe_read(&mut file, buf)?;
if read == 0 {
return Ok(buf.len() - start_len);
}
}
if buf.len() == buf.capacity() {
buf.try_reserve(PROBE_SIZE)?;
}
let mut spare = buf.spare_capacity_mut();
let buf_len = cmp::min(spare.len(), max_read_size);
spare = &mut spare[..buf_len];
let mut read_buf: BorrowedBuf<'_> = spare.into();
unsafe {
read_buf.set_init(initialized);
}
let mut cursor = read_buf.unfilled();
loop {
match file.read_buf(cursor.reborrow()) {
Ok(()) => break,
Err(e) if e.kind() == ErrorKind::Interrupted => continue,
Err(e) => return Err(e),
}
}
let unfilled_but_initialized = cursor.init_ref().len();
let bytes_read = cursor.written();
let _was_fully_initialized: bool = read_buf.init_len() == buf_len;
if bytes_read == 0 {
return Ok(buf.len() - start_len);
}
initialized = unfilled_but_initialized;
unsafe {
let new_len = bytes_read + buf.len();
buf.set_len(new_len);
}
if buf_len >= max_read_size && bytes_read == buf_len {
max_read_size = max_read_size.saturating_mul(2);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment