Skip to content

Instantly share code, notes, and snippets.

@gwenn
Created September 5, 2017 20:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gwenn/801d1d808bf653770bcb5e1b09e8b7e7 to your computer and use it in GitHub Desktop.
Save gwenn/801d1d808bf653770bcb5e1b09e8b7e7 to your computer and use it in GitHub Desktop.
Bypass Rust borrow checker with a transmute
#![feature(read_initializer)]
use std::fs::File;
use std::io::{self, BufRead, Read};
// Like a `BufReader` but with a growable buffer.
#[derive(Debug)]
struct Scanner<R: Read> {
inner: R,
buf: Vec<u8>,
pos: usize,
cap: usize,
eof: bool,
}
impl<R: Read> Scanner<R> {
fn new(inner: R) -> Scanner<R> {
Self::with_capacity(inner, 4096)
}
fn with_capacity(inner: R, capacity: usize) -> Scanner<R> {
let mut buf = Vec::with_capacity(capacity);
unsafe {
buf.set_len(capacity);
inner.initializer().initialize(&mut buf);
}
Scanner {
inner: inner,
buf: buf,
pos: 0,
cap: 0,
eof: false,
}
}
}
impl<R: Read> Scanner<R> {
//
fn scan_unshifted(_data: &[u8], _eof: bool) -> (Option<&[u8]>, usize) {
(None, 0)
}
}
impl<R: Read> Scanner<R> {
// Advance the Scanner to next token.
// Return the token as a byte slice.
// Return `None` when the end of the input is reached.
// Return any error that occurs while reading the input. (TODO impl our own Result)
fn scan(&mut self) -> io::Result<Option<&[u8]>> {
// Loop until we have a token.
loop {
// See if we can get a token with what we already have.
if self.cap > self.pos || self.eof {
// TODO: I don't know how to make the borrow checker happy!
let data = unsafe { std::mem::transmute(&self.buf[self.pos..self.cap]) };
match Self::scan_unshifted(data, self.eof) {
(None, 0) => {}
(None, amt) => {
self.consume(amt);
continue;
}
(tok, amt) => {
self.consume(amt);
return Ok(tok);
}
}
}
// We cannot generate a token with what we are holding.
// If we've already hit EOF, we are done.
if self.eof {
self.pos = 0;
self.cap = 0;
return Ok(None);
}
self.fill_buf()?;
}
}
}
impl<R: Read> BufRead for Scanner<R> {
fn fill_buf(&mut self) -> io::Result<&[u8]> {
// First, shift data to beginning of buffer if there's lots of empty space
// or space is needed.
if self.pos > 0 && (self.cap == self.buf.len() || self.pos > self.buf.len() / 2) {
unsafe {
use std::ptr;
ptr::copy(
self.buf.as_mut_ptr().offset(self.pos as isize),
self.buf.as_mut_ptr(),
self.cap - self.pos,
);
}
self.cap -= self.pos;
self.pos = 0
}
// Is the buffer full? If so, resize.
if self.cap == self.buf.len() {
// TODO maxTokenSize
let additional = self.buf.capacity();
self.buf.reserve(additional);
let cap = self.buf.capacity();
unsafe {
self.buf.set_len(cap);
self.inner
.initializer()
.initialize(&mut self.buf[self.cap..])
}
self.cap -= self.pos;
self.pos = 0;
}
// Finally we can read some input.
loop {
match self.inner.read(&mut self.buf[self.cap..]) {
Ok(0) => {
self.eof = true;
break;
}
Ok(n) => {
self.cap += n;
break;
}
Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {}
Err(e) => {
return Err(e);
}
}
}
Ok(&self.buf[self.pos..self.cap])
}
fn consume(&mut self, amt: usize) {
assert!(self.pos + amt <= self.cap);
self.pos += amt;
}
}
impl<R: Read> Read for Scanner<R> {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
let nread = {
let mut rem = self.fill_buf()?;
rem.read(buf)?
};
self.consume(nread);
Ok(nread)
}
}
fn main() {
let f = File::open("abc.txt").unwrap();
let mut s = Scanner::new(f);
{
// field must be consumed before next call to scan
let field = s.scan();
println!("{:?}", field);
}
let field = s.scan();
println!("{:?}", field);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment