Created
October 14, 2015 01:47
-
-
Save anonymous/21a00b0e181a918f8ca4 to your computer and use it in GitHub Desktop.
Shared via Rust Playground
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// This program demonstrates sound unchecked indexing | |
// by having slices generate valid indices, and "signing" | |
// them with an invariant lifetime. These indices cannot be used on another | |
// slice, nor can they be stored until the array is no longer valid | |
// (consider adapting this to Vec, and then trying to use indices after a push). | |
// | |
// This represents a design "one step removed" from iterators, providing greater | |
// control to the consumer of the API. Instead of getting references to elements | |
// we get indices, from which we can get references or hypothetically perform | |
// any other "index-related" operation (slicing?). Normally, these operations | |
// would need to be checked at runtime to avoid indexing out of bounds, but | |
// because the array knows it personally minted the indices, it can trust them. | |
// This hypothetically enables greater composition. Using this technique | |
// one could also do "only once" checked indexing (let idx = arr.validate(idx)). | |
// | |
// The major drawback of this design is that it requires a closure to | |
// create an environment that the signatures are bound to, complicating | |
// any logic that flows between the two (e.g. moving values in/out and try!). | |
// In principle, the compiler could be "taught" this trick to eliminate the | |
// need for the closure, as far as I know. Although how one would communicate | |
// that they're trying to do this to the compiler is another question. | |
// It also relies on wrapping the structure of interest to provide a constrained | |
// API (again, consider applying this to Vec -- need to prevent `push` and `pop` | |
// being called). This is the same principle behind Entry and Iterator. | |
// | |
// It also produces terrible compile errors (random lifetime failures), | |
// because we're hacking novel semantics on top of the borrowchecker which | |
// it doesn't understand. | |
// | |
// This technique was first pioneered by gereeter to enable safely constructing | |
// search paths in BTreeMap. See Haskell's ST Monad for a related design. | |
// | |
// The example isn't maximally generic or fleshed out because I got bored trying | |
// to express the bounds necessary to handle &[T] and &mut [T] appropriately. | |
fn main() { | |
use indexing::indices; | |
let arr1: &[u32] = &[1, 2, 3, 4, 5]; | |
let arr2: &[u32] = &[10, 20, 30]; | |
// concurrent iteration (hardest thing to do with iterators) | |
indices(arr1, |arr1, it1| { | |
indices(arr2, move |arr2, it2| { | |
for (i, j) in it1.zip(it2) { | |
println!("{} {}", arr1.get(i), arr2.get(j)); | |
// println!("{} ", arr2.get(i)); // should be invalid to idx wrong source | |
// println!("{} ", arr1.get(j)); // should be invalid to idx wrong source | |
} | |
}); | |
}); | |
// can hold onto the indices for later, as long they stay in the closure | |
let _a = indices(arr1, |arr, mut it| { | |
let a = it.next().unwrap(); | |
let b = it.next_back().unwrap(); | |
println!("{} {}", arr.get(a), arr.get(b)); | |
// a // should be invalid to return an index | |
}); | |
// can get references out, just not indices | |
let (x, y) = indices(arr1, |arr, mut it| { | |
let a = it.next().unwrap(); | |
let b = it.next_back().unwrap(); | |
(arr.get(a), arr.get(b)) | |
}); | |
println!("{} {}", x, y); | |
// Excercise to the reader: sound multi-index mutable indexing!? | |
// (hint: it would be unsound with the current design) | |
} | |
mod indexing { | |
use std::marker::PhantomData; | |
use std::ops::Deref; | |
use std::iter::DoubleEndedIterator; | |
// Cell<T> is invariant in T; so Cell<&'id _> makes `id` invariant. | |
// This means that the inference engine is not allowed to shrink or | |
// grow 'id to solve the borrow system. | |
type Id<'id> = PhantomData<::std::cell::Cell<&'id mut ()>>; | |
pub struct Indexer<'id, Array> { | |
_id: Id<'id>, | |
arr: Array, | |
} | |
pub struct Indices<'id> { | |
_id: Id<'id>, | |
min: usize, | |
max: usize, | |
} | |
#[derive(Copy, Clone)] | |
pub struct Index<'id> { | |
_id: Id<'id>, | |
idx: usize, | |
} | |
impl<'id, 'a> Indexer<'id, &'a [u32]> { | |
pub fn get(&self, idx: Index<'id>) -> &'a u32 { | |
unsafe { | |
self.arr.get_unchecked(idx.idx) | |
} | |
} | |
} | |
impl<'id> Iterator for Indices<'id> { | |
type Item = Index<'id>; | |
fn next(&mut self) -> Option<Self::Item> { | |
if self.min != self.max { | |
self.min += 1; | |
Some(Index { _id: PhantomData, idx: self.min - 1 }) | |
} else { | |
None | |
} | |
} | |
} | |
impl<'id> DoubleEndedIterator for Indices<'id> { | |
fn next_back(&mut self) -> Option<Self::Item> { | |
if self.min != self.max { | |
self.max -= 1; | |
Some(Index { _id: PhantomData, idx: self.max }) | |
} else { | |
None | |
} | |
} | |
} | |
pub fn indices<Array, F, Out>(arr: Array, f: F) -> Out | |
where F: for<'id> FnOnce(Indexer<'id, Array>, Indices<'id>) -> Out, | |
Array: Deref<Target = [u32]>, | |
{ | |
// This is where the magic happens. We bind the indexer and indices | |
// to the same invariant lifetime (a constraint established by F's | |
// definition). As such, each call to `indices` produces a unique | |
// signature that only these two values can share. | |
// | |
// Within this function, the borrow solver can choose literally any lifetime, | |
// including `'static`, but we don't care what the borrow solver does in | |
// *this* function. We only need to trick the solver in the caller's | |
// scope. Since borrowck doesn't do interprocedural analysis, it | |
// sees every call to this function produces values with some opaque | |
// fresh lifetime and can't unify any of them. | |
// | |
// In principle a "super borrowchecker" that does interprocedural | |
// analysis would break this design, but we could go out of our way | |
// to somehow bind the lifetime to the inside of this function, making | |
// it sound again. Borrowck will never do such analysis, so we don't | |
// care. | |
let len = arr.len(); | |
let indexer = Indexer { _id: PhantomData, arr: arr }; | |
let indices = Indices { _id: PhantomData, min: 0, max: len }; | |
f(indexer, indices) | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment