colin-kiegel/self_contained_reference.rs

## self_contained_reference.rs
use std::mem;

/// This is an attempt to reduce allocations in the Twig-Rust template engine.
///
/// During compilation the inial template string is transformed
/// - to a token stream during lexing
/// - to a node tree during parsing
///
/// The tokens and nodes mostly contain slices of the original string. But the
/// current implementation uses new allocations and copy-by-value instead of
/// references. The reason is that rusts notion of lifetimes can not express a
/// guarantee for owned objects. However it seems neccessary to pass ownership
/// of the original template string *along* with its references, to make sure
/// it really survives the references. `Rc`ing the template string in each of
/// its slices does not seem to be a good fit, because it would introduce
/// its own overhead ~O(n).
///
/// There might be a way to construct a new object taking care of the following
/// - ownership of the template string
/// - management of all slice references
/// - ensuring the template string to survive all references
///
/// This approach has some drawbacks, too
/// - the objects API should be rust-safe, which greatly constrains its
///     flexibility. It seems as if any operation that operates on slice
///     references needs to be managed by this new object in one way or
///     the other. E.g. constructing a new `Token` referencing a match
///     in the template string must be done by this object, because this
///     object would be solely responsible for the pointers validity.
///     Rusts lifetimes can not help here anymore, becaue they can not
///     express this concept of self-contained-reference. At this point
///     there seem to be three options
///         (a) runtime-overhead = status quo
///         (b) architectural restrictions, but static safety guarantee
///         (c) unsafe API
///     The consequences of (b) and (c) seem to be far-reaching. E.g.
///     (b) means to move a lot of very different logic into this object
///     most likely violating separation of concerns. And (c) would
///     require `unsafe{}` code at other places, too - which looks
///     like a last resort, only.
/// - need to refactor *a lot* of code of lexer + parser. Especially if
///     road (b) is taken, a lot of logic must move into this new object.
///
/// It seems reasonable to leave things as they are and continue with (a).
/// However this could be a starting point for further investigations.

#[allow(dead_code)]
#[derive(Debug)]
enum Token<T> {
    Integer(u64),
    Value(T)
}

pub type RefToken<'a> = Token<&'a str>;
pub type UnsafeToken = Token<*const str>;
pub type OwnedToken = Token<String>;

#[allow(dead_code)]
struct Stream {
    /// The `unsafe_buffer` contains all token string data (contiguously in memory).
    ///
    /// Stream wrapper may *never* be mutate `unsafe_buffer` as long as any
    /// `UnsafeToken` may reference a slice of `unsafe_buffer`.
    /// It is marked unsafe_... because of this implicit contract.
    ///
    unsafe_buffer: String,
    /// `UnsafeToken` may contain *str slices to `buffer`
    ///
    /// The public interface must always convert `UnsafeToken` to `Token<'a>`
    /// and connect its lifetime `'a` to a reference `&'a Self`.
    /// It is marked unsafe_... because of this implicit contract.
    unsafe_str: *const str,
    unsafe_token: UnsafeToken,
    unsafe_token_vec: Vec<UnsafeToken>,
}

#[allow(dead_code)]
#[derive(Debug)]
struct StreamHandle<'a> {
    /// The `buffer` contains all token string data (contiguously in memory).
    ///
    /// Stream wrapper may *never* be mutate the `buffer` as long as any
    /// `UnsafeToken` may reference a slice of `buffer`.
    safe_buffer: &'a str,
    /// `UnsafeToken` may contain *str slices to `buffer`
    ///
    /// The public interface must always convert `UnsafeToken` to `Token<'a>`
    /// and connect its lifetime `'a` to a reference `&'a Self`
    //token: Vec<UnsafeToken>,
    safe_str: &'a str,
    safe_token: &'a RefToken<'a>,
    safe_token_vec: &'a Vec<RefToken<'a>>,
}

#[allow(dead_code)]
impl Stream {
    pub fn new(buffer: String) -> Stream {
        Stream {
            unsafe_buffer: buffer,
            unsafe_str: "Hello World!",//token: Default::default(),
            unsafe_token: Token::Value("Good morning!"),
            unsafe_token_vec: Default::default(),
        }
    }

    /// *Read-only* access to `unsafe_buffer` is safe.
    pub fn safe_buffer(&self) -> &str {
        &self.unsafe_buffer
    }

    pub fn update_str(&mut self) {
        if let Some(word) = self.unsafe_buffer.split_whitespace().next() {
            self.unsafe_str = word;
        }
    }

    pub fn update_vec(&mut self) {
        for word in self.unsafe_buffer.split_whitespace() {
            self.unsafe_token_vec.push(Token::Value(word));
        }
    }

    pub fn safe_str<'a>(&'a self) -> &'a str {
        unsafe {
            &*self.unsafe_str
        }
    }

    pub fn safe_token<'a>(&'a self) -> &'a RefToken<'a> {
        unsafe {
            mem::transmute::<
                &Token<*const str>,
                &Token<&str>>
                (&self.unsafe_token)
        }
    }

    pub fn safe_token_vec<'a>(&'a self) -> &'a Vec<RefToken<'a>> {
        unsafe {
            mem::transmute::<
                &Vec<Token<*const str>>,
                &Vec<Token<&str>>>
                (&self.unsafe_token_vec)
        }
    }

    pub fn safe_handle<'a>(&'a self) -> StreamHandle<'a> {
        StreamHandle {
            safe_buffer: self.safe_buffer(),
            safe_str: self.safe_str(),
            safe_token: self.safe_token(),
            safe_token_vec: self.safe_token_vec(),
        }
    }
}

#[allow(dead_code)]
fn main() {
    let buffer = "Hello World!".to_string();
    let mut stream = Stream::new(buffer);
    stream.update_str();
    stream.update_vec();

    println!("safe_str() = {:?}", stream.safe_str());
    println!("safe_token() = {:?}", stream.safe_token());
    println!("safe_token_vec() = {:?}", stream.safe_token_vec());
    println!("safe_handle() = {:?}", stream.safe_handle());
}
	use std::mem;

	/// This is an attempt to reduce allocations in the Twig-Rust template engine.
	///
	/// During compilation the inial template string is transformed
	/// - to a token stream during lexing
	/// - to a node tree during parsing
	///
	/// The tokens and nodes mostly contain slices of the original string. But the
	/// current implementation uses new allocations and copy-by-value instead of
	/// references. The reason is that rusts notion of lifetimes can not express a
	/// guarantee for owned objects. However it seems neccessary to pass ownership
	/// of the original template string along with its references, to make sure
	/// it really survives the references. `Rc`ing the template string in each of
	/// its slices does not seem to be a good fit, because it would introduce
	/// its own overhead ~O(n).
	///
	/// There might be a way to construct a new object taking care of the following
	/// - ownership of the template string
	/// - management of all slice references
	/// - ensuring the template string to survive all references
	///
	/// This approach has some drawbacks, too
	/// - the objects API should be rust-safe, which greatly constrains its
	/// flexibility. It seems as if any operation that operates on slice
	/// references needs to be managed by this new object in one way or
	/// the other. E.g. constructing a new `Token` referencing a match
	/// in the template string must be done by this object, because this
	/// object would be solely responsible for the pointers validity.
	/// Rusts lifetimes can not help here anymore, becaue they can not
	/// express this concept of self-contained-reference. At this point
	/// there seem to be three options
	/// (a) runtime-overhead = status quo
	/// (b) architectural restrictions, but static safety guarantee
	/// (c) unsafe API
	/// The consequences of (b) and (c) seem to be far-reaching. E.g.
	/// (b) means to move a lot of very different logic into this object
	/// most likely violating separation of concerns. And (c) would
	/// require `unsafe{}` code at other places, too - which looks
	/// like a last resort, only.
	/// - need to refactor a lot of code of lexer + parser. Especially if
	/// road (b) is taken, a lot of logic must move into this new object.
	///
	/// It seems reasonable to leave things as they are and continue with (a).
	/// However this could be a starting point for further investigations.

	#[allow(dead_code)]
	#[derive(Debug)]
	enum Token<T> {
	Integer(u64),
	Value(T)
	}

	pub type RefToken<'a> = Token<&'a str>;
	pub type UnsafeToken = Token<*const str>;
	pub type OwnedToken = Token<String>;

	#[allow(dead_code)]
	struct Stream {
	/// The `unsafe_buffer` contains all token string data (contiguously in memory).
	///
	/// Stream wrapper may never be mutate `unsafe_buffer` as long as any
	/// `UnsafeToken` may reference a slice of `unsafe_buffer`.
	/// It is marked unsafe_... because of this implicit contract.
	///
	unsafe_buffer: String,
	/// `UnsafeToken` may contain *str slices to `buffer`
	///
	/// The public interface must always convert `UnsafeToken` to `Token<'a>`
	/// and connect its lifetime `'a` to a reference `&'a Self`.
	/// It is marked unsafe_... because of this implicit contract.
	unsafe_str: *const str,
	unsafe_token: UnsafeToken,
	unsafe_token_vec: Vec<UnsafeToken>,
	}

	#[allow(dead_code)]
	#[derive(Debug)]
	struct StreamHandle<'a> {
	/// The `buffer` contains all token string data (contiguously in memory).
	///
	/// Stream wrapper may never be mutate the `buffer` as long as any
	/// `UnsafeToken` may reference a slice of `buffer`.
	safe_buffer: &'a str,
	/// `UnsafeToken` may contain *str slices to `buffer`
	///
	/// The public interface must always convert `UnsafeToken` to `Token<'a>`
	/// and connect its lifetime `'a` to a reference `&'a Self`
	//token: Vec<UnsafeToken>,
	safe_str: &'a str,
	safe_token: &'a RefToken<'a>,
	safe_token_vec: &'a Vec<RefToken<'a>>,
	}

	#[allow(dead_code)]
	impl Stream {
	pub fn new(buffer: String) -> Stream {
	Stream {
	unsafe_buffer: buffer,
	unsafe_str: "Hello World!",//token: Default::default(),
	unsafe_token: Token::Value("Good morning!"),
	unsafe_token_vec: Default::default(),
	}
	}

	/// Read-only access to `unsafe_buffer` is safe.
	pub fn safe_buffer(&self) -> &str {
	&self.unsafe_buffer
	}

	pub fn update_str(&mut self) {
	if let Some(word) = self.unsafe_buffer.split_whitespace().next() {
	self.unsafe_str = word;
	}
	}

	pub fn update_vec(&mut self) {
	for word in self.unsafe_buffer.split_whitespace() {
	self.unsafe_token_vec.push(Token::Value(word));
	}
	}

	pub fn safe_str<'a>(&'a self) -> &'a str {
	unsafe {
	&*self.unsafe_str
	}
	}

	pub fn safe_token<'a>(&'a self) -> &'a RefToken<'a> {
	unsafe {
	mem::transmute::<
	&Token<*const str>,
	&Token<&str>>
	(&self.unsafe_token)
	}
	}

	pub fn safe_token_vec<'a>(&'a self) -> &'a Vec<RefToken<'a>> {
	unsafe {
	mem::transmute::<
	&Vec<Token<*const str>>,
	&Vec<Token<&str>>>
	(&self.unsafe_token_vec)
	}
	}

	pub fn safe_handle<'a>(&'a self) -> StreamHandle<'a> {
	StreamHandle {
	safe_buffer: self.safe_buffer(),
	safe_str: self.safe_str(),
	safe_token: self.safe_token(),
	safe_token_vec: self.safe_token_vec(),
	}
	}
	}

	#[allow(dead_code)]
	fn main() {
	let buffer = "Hello World!".to_string();
	let mut stream = Stream::new(buffer);
	stream.update_str();
	stream.update_vec();

	println!("safe_str() = {:?}", stream.safe_str());
	println!("safe_token() = {:?}", stream.safe_token());
	println!("safe_token_vec() = {:?}", stream.safe_token_vec());
	println!("safe_handle() = {:?}", stream.safe_handle());
	}