Created
September 5, 2017 20:09
-
-
Save gwenn/801d1d808bf653770bcb5e1b09e8b7e7 to your computer and use it in GitHub Desktop.
Bypass Rust borrow checker with a transmute
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#![feature(read_initializer)] | |
use std::fs::File; | |
use std::io::{self, BufRead, Read}; | |
// Like a `BufReader` but with a growable buffer. | |
#[derive(Debug)] | |
struct Scanner<R: Read> { | |
inner: R, | |
buf: Vec<u8>, | |
pos: usize, | |
cap: usize, | |
eof: bool, | |
} | |
impl<R: Read> Scanner<R> { | |
fn new(inner: R) -> Scanner<R> { | |
Self::with_capacity(inner, 4096) | |
} | |
fn with_capacity(inner: R, capacity: usize) -> Scanner<R> { | |
let mut buf = Vec::with_capacity(capacity); | |
unsafe { | |
buf.set_len(capacity); | |
inner.initializer().initialize(&mut buf); | |
} | |
Scanner { | |
inner: inner, | |
buf: buf, | |
pos: 0, | |
cap: 0, | |
eof: false, | |
} | |
} | |
} | |
impl<R: Read> Scanner<R> { | |
// | |
fn scan_unshifted(_data: &[u8], _eof: bool) -> (Option<&[u8]>, usize) { | |
(None, 0) | |
} | |
} | |
impl<R: Read> Scanner<R> { | |
// Advance the Scanner to next token. | |
// Return the token as a byte slice. | |
// Return `None` when the end of the input is reached. | |
// Return any error that occurs while reading the input. (TODO impl our own Result) | |
fn scan(&mut self) -> io::Result<Option<&[u8]>> { | |
// Loop until we have a token. | |
loop { | |
// See if we can get a token with what we already have. | |
if self.cap > self.pos || self.eof { | |
// TODO: I don't know how to make the borrow checker happy! | |
let data = unsafe { std::mem::transmute(&self.buf[self.pos..self.cap]) }; | |
match Self::scan_unshifted(data, self.eof) { | |
(None, 0) => {} | |
(None, amt) => { | |
self.consume(amt); | |
continue; | |
} | |
(tok, amt) => { | |
self.consume(amt); | |
return Ok(tok); | |
} | |
} | |
} | |
// We cannot generate a token with what we are holding. | |
// If we've already hit EOF, we are done. | |
if self.eof { | |
self.pos = 0; | |
self.cap = 0; | |
return Ok(None); | |
} | |
self.fill_buf()?; | |
} | |
} | |
} | |
impl<R: Read> BufRead for Scanner<R> { | |
fn fill_buf(&mut self) -> io::Result<&[u8]> { | |
// First, shift data to beginning of buffer if there's lots of empty space | |
// or space is needed. | |
if self.pos > 0 && (self.cap == self.buf.len() || self.pos > self.buf.len() / 2) { | |
unsafe { | |
use std::ptr; | |
ptr::copy( | |
self.buf.as_mut_ptr().offset(self.pos as isize), | |
self.buf.as_mut_ptr(), | |
self.cap - self.pos, | |
); | |
} | |
self.cap -= self.pos; | |
self.pos = 0 | |
} | |
// Is the buffer full? If so, resize. | |
if self.cap == self.buf.len() { | |
// TODO maxTokenSize | |
let additional = self.buf.capacity(); | |
self.buf.reserve(additional); | |
let cap = self.buf.capacity(); | |
unsafe { | |
self.buf.set_len(cap); | |
self.inner | |
.initializer() | |
.initialize(&mut self.buf[self.cap..]) | |
} | |
self.cap -= self.pos; | |
self.pos = 0; | |
} | |
// Finally we can read some input. | |
loop { | |
match self.inner.read(&mut self.buf[self.cap..]) { | |
Ok(0) => { | |
self.eof = true; | |
break; | |
} | |
Ok(n) => { | |
self.cap += n; | |
break; | |
} | |
Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {} | |
Err(e) => { | |
return Err(e); | |
} | |
} | |
} | |
Ok(&self.buf[self.pos..self.cap]) | |
} | |
fn consume(&mut self, amt: usize) { | |
assert!(self.pos + amt <= self.cap); | |
self.pos += amt; | |
} | |
} | |
impl<R: Read> Read for Scanner<R> { | |
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { | |
let nread = { | |
let mut rem = self.fill_buf()?; | |
rem.read(buf)? | |
}; | |
self.consume(nread); | |
Ok(nread) | |
} | |
} | |
fn main() { | |
let f = File::open("abc.txt").unwrap(); | |
let mut s = Scanner::new(f); | |
{ | |
// field must be consumed before next call to scan | |
let field = s.scan(); | |
println!("{:?}", field); | |
} | |
let field = s.scan(); | |
println!("{:?}", field); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment