Created
February 5, 2014 16:40
-
-
Save jasom/8827808 to your computer and use it in GitHub Desktop.
std::trie with improved insert performance (~3x on ordered inserts, ~1.3x on random inserts) and generic keys
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Copyright 2013-2014 The Rust Project Developers. See the COPYRIGHT | |
// file at the top-level directory of this distribution and at | |
// http://rust-lang.org/COPYRIGHT. | |
// | |
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | |
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | |
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
// option. This file may not be copied, modified, or distributed | |
// except according to those terms. | |
//! Ordered containers with integer keys, implemented as radix tries (`TrieSet` and `TrieMap` types) | |
#[feature(globs)]; | |
#[feature(macro_rules)]; | |
extern mod extra; | |
use std::mem; | |
use std::uint; | |
use std::util::replace; | |
use std::unstable::intrinsics::init; | |
use std::vec; | |
// FIXME: #5244: need to manually update the TrieNode constructor | |
static SHIFT: uint = 4; | |
static SIZE: uint = 1 << SHIFT; | |
static MASK: uint = SIZE - 1; | |
// FIXME: Need to get rid of NUM_CHUNKS and dynamicaly calculate this | |
//This should probably folded into a generic "stream of bits" trait | |
trait Chunkable : Eq{ | |
fn chunk(&self, idx: uint) -> uint; | |
fn mismatch<'a> (&self,other : &Self) -> Option<(uint,u8,u8)> ; | |
} | |
impl Chunkable for uint { | |
#[inline] | |
fn chunk(&self, idx: uint) -> uint | |
{ | |
let sh = uint::BITS - (SHIFT * (idx + 1)); | |
(self >> sh) & MASK | |
} | |
#[inline] | |
fn mismatch(&self,other : &uint) -> Option<(uint,u8,u8)>{ | |
if self == other {return None} | |
let mut shift = uint::BITS-8; | |
loop { | |
if (self >> shift) != (other >> shift) { | |
return Some(((uint::BITS-shift-8)/8, | |
(0xff&(self >> shift)) as u8, | |
(0xff&(other >> shift)) as u8)); | |
} | |
shift-=8 | |
} | |
} | |
} | |
impl Chunkable for ~str { | |
#[inline] | |
fn chunk(&self, idx: uint) -> uint{ | |
let off = idx/(8/SHIFT); | |
let bytes = self.as_bytes(); | |
if off >= bytes.len() { | |
0x00 | |
} | |
else { | |
(bytes[off] >> (8 - (SHIFT * (1 + (idx%(8/SHIFT)))))) as uint & MASK | |
} | |
} | |
#[inline] | |
fn mismatch (&self,other : &~str) -> Option<(uint,u8,u8)> { | |
let s1b = self.as_bytes(); | |
let s2b = other.as_bytes(); | |
let mut it = s1b.iter().zip(s2b.iter()).enumerate(); | |
loop | |
{ | |
match it.next() { | |
Some((count,(c1,c2))) => if c1 != c2 { | |
//io::print(fmt!("Mismatch: %u,%u,%u\n",count,*c1 as uint, *c2 as uint)); | |
return Some((count,*c1,*c2)) }, | |
None() => | |
return | |
if self.len() > other.len() { Some((other.len(),self[other.len()],0))} | |
else if other.len() > self.len() { Some((self.len(),0,other[self.len()]))} | |
else { None } | |
} | |
} | |
} | |
} | |
enum Child<K,V> { | |
Internal(~TrieNode<K,V>), | |
External(K, V), | |
Nothing | |
} | |
#[allow(missing_doc)] | |
pub struct TrieMap<K,V> { | |
priv root: TrieNode<K,V>, | |
priv length: uint | |
} | |
impl<K,V> Container for TrieMap<K,V> { | |
/// Return the number of elements in the map | |
#[inline] | |
fn len(&self) -> uint { self.length } | |
} | |
impl<K,V> Mutable for TrieMap<K,V> { | |
/// Clear the map, removing all values. | |
#[inline] | |
fn clear(&mut self) { | |
self.root = TrieNode::new(); | |
self.length = 0; | |
} | |
} | |
impl<K:Chunkable,V> Map<K,V> for TrieMap<K,V> { | |
/// Return a reference to the value corresponding to the key | |
#[inline] | |
fn find<'a>(&'a self, key: &K) -> Option<&'a V> { | |
let mut node: &'a TrieNode<K,V> = &self.root; | |
let mut idx = 0; | |
loop { | |
match node.children[key.chunk( idx)] { | |
Internal(ref x) => node = &**x, | |
External(ref stored, ref value) => { | |
if stored == key { | |
return Some(value) | |
} else { | |
return None | |
} | |
} | |
Nothing => return None | |
} | |
idx += 1; | |
} | |
} | |
} | |
impl<K: Chunkable + Eq,V> MutableMap<K,V> for TrieMap<K,V> { | |
/// Return a mutable reference to the value corresponding to the key | |
#[inline] | |
fn find_mut<'a>(&'a mut self, key: &K) -> Option<&'a mut V> { | |
find_mut(&mut self.root.children[key.chunk( 0)], key, 1) | |
} | |
/// Insert a key-value pair from the map. If the key already had a value | |
/// present in the map, that value is returned. Otherwise None is returned. | |
fn swap(&mut self, key: K, value: V) -> Option<V> { | |
let ret = insert(&mut self.root.count, | |
&mut self.root.children[key.chunk( 0)], | |
key, value, 1); | |
if ret.is_none() { self.length += 1 } | |
ret | |
} | |
/// Removes a key from the map, returning the value at the key if the key | |
/// was previously in the map. | |
fn pop(&mut self, key: &K) -> Option<V> { | |
let ret = remove(&mut self.root.count, | |
&mut self.root.children[key.chunk( 0)], | |
key, 1); | |
if ret.is_some() { self.length -= 1 } | |
ret | |
} | |
} | |
impl<K,V> TrieMap<K,V> { | |
/// Create an empty TrieMap | |
#[inline] | |
pub fn new() -> TrieMap<K,V> { | |
TrieMap{root: TrieNode::new(), length: 0} | |
} | |
/// Visit all key-value pairs in reverse order | |
#[inline] | |
pub fn each_reverse<'a>(&'a self, f: |&K, &'a V| -> bool) -> bool { | |
self.root.each_reverse(f) | |
} | |
//FIXME : This is slow. | |
fn depth(&self) -> uint { | |
self.root.depth()+1 | |
} | |
/// Get an iterator over the key-value pairs in the map | |
pub fn iter<'a>(&'a self) -> Entries<'a, K,V> { | |
let mut iter = unsafe {Entries::new(self.depth())}; | |
iter.stack[0] = self.root.children.iter(); | |
iter.length = 1; | |
iter.remaining_min = self.length; | |
iter.remaining_max = self.length; | |
iter | |
} | |
/// Get an iterator over the key-value pairs in the map, with the | |
/// ability to mutate the values. | |
pub fn mut_iter<'a>(&'a mut self) -> MutEntries<'a, K,V> { | |
let mut iter = unsafe {MutEntries::new(self.depth())}; | |
iter.stack[0] = self.root.children.mut_iter(); | |
iter.length = 1; | |
iter.remaining_min = self.length; | |
iter.remaining_max = self.length; | |
iter | |
} | |
} | |
// FIXME #5846 we want to be able to choose between &x and &mut x | |
// (with many different `x`) below, so we need to optionally pass mut | |
// as a tt, but the only thing we can do with a `tt` is pass them to | |
// other macros, so this takes the `& <mutability> <operand>` token | |
// sequence and forces their evalutation as an expression. (see also | |
// `item!` below.) | |
#[feature(macro_rules)] | |
macro_rules! addr { ($e:expr) => { $e } } | |
macro_rules! bound { | |
($iterator_name:ident, | |
// the current treemap | |
self = $this:expr, | |
// the key to look for | |
key = $key:expr, | |
// are we looking at the upper bound? | |
is_upper = $upper:expr, | |
// method names for slicing/iterating. | |
slice_from = $slice_from:ident, | |
iter = $iter:ident, | |
// see the comment on `addr!`, this is just an optional mut, but | |
// there's no 0-or-1 repeats yet. | |
mutability = $($mut_:tt)*) => { | |
{ | |
// # For `mut` | |
// We need an unsafe pointer here because we are borrowing | |
// mutable references to the internals of each of these | |
// mutable nodes, while still using the outer node. | |
// | |
// However, we're allowed to flaunt rustc like this because we | |
// never actually modify the "shape" of the nodes. The only | |
// place that mutation is can actually occur is of the actual | |
// values of the TrieMap (as the return value of the | |
// iterator), i.e. we can never cause a deallocation of any | |
// TrieNodes so the raw pointer is always valid. | |
// | |
// # For non-`mut` | |
// We like sharing code so much that even a little unsafe won't | |
// stop us. | |
let this = $this; | |
let mut node = addr!(& $($mut_)* this.root as * $($mut_)* TrieNode<K,V>); | |
let key = $key; | |
let mut it = unsafe {$iterator_name::new(this.depth())}; | |
// everything else is zero'd, as we want. | |
it.remaining_max = this.length; | |
// this addr is necessary for the `Internal` pattern. | |
addr!(loop { | |
let children = unsafe {addr!(& $($mut_)* (*node).children)}; | |
// it.length is the current depth in the iterator and the | |
// current depth through the `uint` key we've traversed. | |
let child_id = key.chunk( it.length); | |
let (slice_idx, ret) = match children[child_id] { | |
Internal(ref $($mut_)* n) => { | |
node = addr!(& $($mut_)* **n as * $($mut_)* TrieNode<K,V>); | |
(child_id + 1, false) | |
} | |
External(ref stored, _) => { | |
(match stored.mismatch(&key) { | |
Some((_,l,r)) if l<r => child_id + 1, | |
_ if ($upper && stored == &key) => child_id + 1, | |
_ => child_id, | |
}, true) | |
} | |
Nothing => { | |
(child_id + 1, true) | |
} | |
}; | |
// push to the stack. | |
it.stack[it.length] = children.$slice_from(slice_idx).$iter(); | |
it.length += 1; | |
if ret { return it } | |
}) | |
} | |
} | |
} | |
impl<K:Chunkable,V> TrieMap<K,V> { | |
// If `upper` is true then returns upper_bound else returns lower_bound. | |
#[inline] | |
fn bound<'a>(&'a self, key: K, upper: bool) -> Entries<'a, K,V> { | |
bound!(Entries, self = self, | |
key = key, is_upper = upper, | |
slice_from = slice_from, iter = iter, | |
mutability = ) | |
} | |
/// Get an iterator pointing to the first key-value pair whose key is not less than `key`. | |
/// If all keys in the map are less than `key` an empty iterator is returned. | |
pub fn lower_bound<'a>(&'a self, key: K) -> Entries<'a, K,V> { | |
self.bound(key, false) | |
} | |
/// Get an iterator pointing to the first key-value pair whose key is greater than `key`. | |
/// If all keys in the map are not greater than `key` an empty iterator is returned. | |
pub fn upper_bound<'a>(&'a self, key: K) -> Entries<'a, K,V> { | |
self.bound(key, true) | |
} | |
// If `upper` is true then returns upper_bound else returns lower_bound. | |
#[inline] | |
fn mut_bound<'a>(&'a mut self, key: K, upper: bool) -> MutEntries<'a, K,V> { | |
bound!(MutEntries, self = self, | |
key = key, is_upper = upper, | |
slice_from = mut_slice_from, iter = mut_iter, | |
mutability = mut) | |
} | |
/// Get an iterator pointing to the first key-value pair whose key is not less than `key`. | |
/// If all keys in the map are less than `key` an empty iterator is returned. | |
pub fn mut_lower_bound<'a>(&'a mut self, key: K) -> MutEntries<'a, K,V> { | |
self.mut_bound(key, false) | |
} | |
/// Get an iterator pointing to the first key-value pair whose key is greater than `key`. | |
/// If all keys in the map are not greater than `key` an empty iterator is returned. | |
pub fn mut_upper_bound<'a>(&'a mut self, key: K) -> MutEntries<'a, K,V> { | |
self.mut_bound(key, true) | |
} | |
} | |
impl<K: Chunkable,V> FromIterator<(K,V)> for TrieMap<K,V> { | |
fn from_iterator<Iter: Iterator<(K,V)>>(iter: &mut Iter) -> TrieMap<K,V> { | |
let mut map = TrieMap::new(); | |
map.extend(iter); | |
map | |
} | |
} | |
impl<K: Chunkable,V> Extendable<(K,V)> for TrieMap<K,V> { | |
fn extend<Iter: Iterator<(K,V)>>(&mut self, iter: &mut Iter) { | |
for (k, v) in *iter { | |
self.insert(k, v); | |
} | |
} | |
} | |
#[allow(missing_doc)] | |
pub struct TrieSet<K> { | |
priv map: TrieMap<K,()> | |
} | |
impl<K> Container for TrieSet<K> { | |
/// Return the number of elements in the set | |
#[inline] | |
fn len(&self) -> uint { self.map.len() } | |
} | |
impl<K> Mutable for TrieSet<K> { | |
/// Clear the set, removing all values. | |
#[inline] | |
fn clear(&mut self) { self.map.clear() } | |
} | |
impl<K : Chunkable> TrieSet<K> { | |
/// Create an empty TrieSet | |
#[inline] | |
pub fn new() -> TrieSet<K> { | |
TrieSet{map: TrieMap::new()} | |
} | |
/// Return true if the set contains a value | |
#[inline] | |
pub fn contains(&self, value: &K) -> bool { | |
self.map.contains_key(value) | |
} | |
/// Add a value to the set. Return true if the value was not already | |
/// present in the set. | |
#[inline] | |
pub fn insert(&mut self, value: K) -> bool { | |
self.map.insert(value, ()) | |
} | |
/// Remove a value from the set. Return true if the value was | |
/// present in the set. | |
#[inline] | |
pub fn remove(&mut self, value: &K) -> bool { | |
self.map.remove(value) | |
} | |
/// Visit all values in reverse order | |
#[inline] | |
pub fn each_reverse(&self, f: |&K| -> bool) -> bool { | |
self.map.each_reverse(|k, _| f(k)) | |
} | |
/// Get an iterator over the values in the set | |
#[inline] | |
pub fn iter<'a>(&'a self) -> SetItems<'a,K> { | |
SetItems{iter: self.map.iter()} | |
} | |
/// Get an iterator pointing to the first value that is not less than `val`. | |
/// If all values in the set are less than `val` an empty iterator is returned. | |
pub fn lower_bound<'a>(&'a self, val: K) -> SetItems<'a,K> { | |
SetItems{iter: self.map.lower_bound(val)} | |
} | |
/// Get an iterator pointing to the first value that key is greater than `val`. | |
/// If all values in the set are not greater than `val` an empty iterator is returned. | |
pub fn upper_bound<'a>(&'a self, val: K) -> SetItems<'a,K> { | |
SetItems{iter: self.map.upper_bound(val)} | |
} | |
} | |
impl<K: Chunkable> FromIterator<K> for TrieSet<K> { | |
fn from_iterator<Iter: Iterator<K>>(iter: &mut Iter) -> TrieSet<K> { | |
let mut set = TrieSet::new(); | |
set.extend(iter); | |
set | |
} | |
} | |
impl<K: Chunkable> Extendable<K> for TrieSet<K> { | |
fn extend<Iter: Iterator<K>>(&mut self, iter: &mut Iter) { | |
for elem in *iter { | |
self.insert(elem); | |
} | |
} | |
} | |
struct TrieNode<K,V> { | |
count: uint, | |
children: [Child<K,V>, ..SIZE] | |
} | |
impl<K,V> TrieNode<K,V> { | |
#[inline] | |
fn new() -> TrieNode<K,V> { | |
// FIXME: #5244: [Nothing, ..SIZE] should be possible without implicit | |
// copyability | |
TrieNode{count: 0, | |
children: [Nothing, Nothing, Nothing, Nothing, | |
Nothing, Nothing, Nothing, Nothing, | |
Nothing, Nothing, Nothing, Nothing, | |
Nothing, Nothing, Nothing, Nothing]} | |
} | |
fn depth(&self) ->uint { | |
let mut ret =0; | |
for elt in self.children.iter() { | |
let d = | |
match *elt { | |
Internal(ref x) => 1+x.depth(), | |
External(_,_) => 1, | |
Nothing => 0 | |
}; | |
if d > ret { ret = d} | |
} | |
ret | |
} | |
} | |
impl<K,V> TrieNode<K,V> { | |
fn each_reverse<'a>(&'a self, f: |&K, &'a V| -> bool) -> bool { | |
for elt in self.children.rev_iter() { | |
match *elt { | |
Internal(ref x) => if !x.each_reverse(|i,t| f(i,t)) { return false }, | |
External(ref k, ref v) => if !f(k, v) { return false }, | |
Nothing => () | |
} | |
} | |
true | |
} | |
} | |
fn find_mut<'r, K: Chunkable,V>(child: &'r mut Child<K,V>, key: &K, idx: uint) -> Option<&'r mut V> { | |
match *child { | |
External(ref stored, ref mut value) if stored == key => Some(value), | |
External(..) => None, | |
Internal(ref mut x) => find_mut(&mut x.children[key.chunk( idx)], key, idx + 1), | |
Nothing => None | |
} | |
} | |
fn insert<K: Chunkable,V>(count: &mut uint, mut child: &mut Child<K,V>, key: K, value: V, | |
mut idx: uint) -> Option<V> { | |
// we branch twice to avoid having to do the `replace` when we | |
// don't need to; this is much faster, especially for keys that | |
// have long shared prefixes. | |
let mut chld = child; | |
unsafe { | |
let mut count = std::ptr::to_mut_unsafe_ptr(count); | |
loop { | |
let tmp = chld; | |
match *tmp { | |
Nothing => { | |
*count += 1; | |
*tmp = External(key, value); | |
return None; | |
} | |
Internal(ref mut x) => { | |
chld=&mut x.children[key.chunk(idx)]; | |
count = std::ptr::to_mut_unsafe_ptr(&mut x.count); | |
idx+=1 | |
//return insert(&mut x.count, &mut x.children[key.chunk( idx)], key, value, idx + 1); | |
} | |
External(ref stored_key, ref mut stored_value) if stored_key == &key => { | |
// swap in the new value and return the old. | |
return Some(replace(stored_value, value)); | |
} | |
_ => {child = tmp; break} | |
} | |
} | |
} | |
// conflict, an external node with differing keys: we have to | |
// split the node, so we need the old value by value; hence we | |
// have to move out of `child`. | |
match replace(child, Nothing) { | |
External(stored_key, stored_value) => { | |
let mut new = ~TrieNode::new(); | |
insert(&mut new.count, | |
&mut new.children[stored_key.chunk( idx)], | |
stored_key, stored_value, idx + 1); | |
let ret = insert(&mut new.count, &mut new.children[key.chunk( idx)], | |
key, value, idx + 1); | |
*child = Internal(new); | |
return ret; | |
} | |
_ => unreachable!() | |
} | |
} | |
fn remove<K: Chunkable,V>(count: &mut uint, child: &mut Child<K,V>, key: &K, | |
idx: uint) -> Option<V> { | |
let (ret, this) = if match *child { | |
External(ref stored, _) if stored == key => true, | |
//match replace(child, Nothing) { | |
//External(_, value) => (Some(value), true), | |
_ => false | |
} { | |
//*child = Nothing; | |
match replace(child,Nothing) { | |
External(_, value) => (Some(value),true), | |
_ => fail!() | |
} | |
} | |
else { | |
match *child { | |
External(..) => (None, false), | |
Internal(ref mut x) => { | |
let ret = remove(&mut x.count, &mut x.children[key.chunk( idx)], | |
key, idx + 1); | |
(ret, x.count == 0) | |
} | |
Nothing => (None, false) | |
} | |
}; | |
if this { | |
*child = Nothing; | |
*count -= 1; | |
} | |
return ret; | |
} | |
/// Forward iterator over a map | |
pub struct Entries<'a, K,V> { | |
priv stack: ~[vec::Items<'a, Child<K,V>>], | |
priv length: uint, | |
priv remaining_min: uint, | |
priv remaining_max: uint | |
} | |
/// Forward iterator over the key-value pairs of a map, with the | |
/// values being mutable. | |
pub struct MutEntries<'a, K,V> { | |
priv stack: ~[vec::MutItems<'a, Child<K,V>>], | |
priv length: uint, | |
priv remaining_min: uint, | |
priv remaining_max: uint | |
} | |
// FIXME #5846: see `addr!` above. | |
macro_rules! item { ($i:item) => {$i}} | |
macro_rules! iterator_impl { | |
($name:ident, | |
iter = $iter:ident, | |
mutability = $($mut_:tt)*) => { | |
impl<'a, K, V> $name<'a, K,V> { | |
// Create new zero'd iterator. We have a thin gilding of safety by | |
// using init rather than uninit, so that the worst that can happen | |
// from failing to initialise correctly after calling these is a | |
// segfault. | |
unsafe fn new(depth : uint) -> $name<'a,K, V> { | |
$name { | |
remaining_min: 0, | |
remaining_max: 0, | |
length: 0, | |
// ick :( ... at least the compiler will tell us if we screwed up. | |
stack: vec::from_fn(depth,|_| init()) | |
} | |
} | |
} | |
item!(impl<'a, K,V> Iterator<(&'a K, &'a $($mut_)* V)> for $name<'a,K, V> { | |
// you might wonder why we're not even trying to act within the | |
// rules, and are just manipulating raw pointers like there's no | |
// such thing as invalid pointers and memory unsafety. The | |
// reason is performance, without doing this we can get the | |
// bench_iter_large microbenchmark down to about 30000 ns/iter | |
// (using .unsafe_ref to index self.stack directly, 38000 | |
// ns/iter with [] checked indexing), but this smashes that down | |
// to 13500 ns/iter. | |
// | |
// Fortunately, it's still safe... | |
// | |
// We have an invariant that every Internal node | |
// corresponds to one push to self.stack, and one pop, | |
// nested appropriately. self.stack has enough storage | |
// to store the maximum depth of Internal nodes in the | |
// trie (8 on 32-bit platforms, 16 on 64-bit). | |
fn next(&mut self) -> Option<(&'a K, &'a $($mut_)* V)> { | |
let start_ptr = self.stack.as_mut_ptr(); | |
unsafe { | |
// write_ptr is the next place to write to the stack. | |
// invariant: start_ptr <= write_ptr < end of the | |
// vector. | |
let mut write_ptr = start_ptr.offset(self.length as int); | |
while write_ptr != start_ptr { | |
// indexing back one is safe, since write_ptr > | |
// start_ptr now. | |
match (*write_ptr.offset(-1)).next() { | |
// exhausted this iterator (i.e. finished this | |
// Internal node), so pop from the stack. | |
// | |
// don't bother clearing the memory, because the | |
// next time we use it we'll've written to it | |
// first. | |
None => write_ptr = write_ptr.offset(-1), | |
Some(child) => { | |
addr!(match *child { | |
Internal(ref $($mut_)* node) => { | |
// going down a level, so push | |
// to the stack (this is the | |
// write referenced above) | |
*write_ptr = node.children.$iter(); | |
write_ptr = write_ptr.offset(1); | |
} | |
External(ref key, ref $($mut_)* value) => { | |
self.remaining_max -= 1; | |
if self.remaining_min > 0 { | |
self.remaining_min -= 1; | |
} | |
// store the new length of the | |
// stack, based on our current | |
// position. | |
self.length = (write_ptr as uint | |
- start_ptr as uint) / | |
mem::size_of_val(&*write_ptr); | |
return Some((key, value)); | |
} | |
Nothing => {} | |
}) | |
} | |
} | |
} | |
} | |
return None; | |
} | |
#[inline] | |
fn size_hint(&self) -> (uint, Option<uint>) { | |
(self.remaining_min, Some(self.remaining_max)) | |
} | |
}) | |
} | |
} | |
iterator_impl! { Entries, iter = iter, mutability = } | |
iterator_impl! { MutEntries, iter = mut_iter, mutability = mut } | |
/// Forward iterator over a set | |
pub struct SetItems<'a,K> { | |
priv iter: Entries<'a, K,()> | |
} | |
impl<'a,K: Chunkable> Iterator<&'a K> for SetItems<'a,K> { | |
fn next(&mut self) -> Option<&'a K> { | |
self.iter.next().map(|(key, _)| key) | |
} | |
fn size_hint(&self) -> (uint, Option<uint>) { | |
self.iter.size_hint() | |
} | |
} | |
#[cfg(test)] | |
pub fn check_integrity<K,V>(trie: &TrieNode<K,V>) { | |
assert!(trie.count != 0); | |
let mut sum = 0; | |
for x in trie.children.iter() { | |
match *x { | |
Nothing => (), | |
Internal(ref y) => { | |
check_integrity(&**y); | |
sum += 1 | |
} | |
External(_, _) => { sum += 1 } | |
} | |
} | |
assert_eq!(sum, trie.count); | |
} | |
#[cfg(test)] | |
mod test_map { | |
use super::*; | |
//use std::prelude::*; | |
use std::iter::range_step; | |
use std::uint; | |
#[test] | |
fn test_find_mut() { | |
let mut m = TrieMap::new(); | |
assert!(m.insert(1u, 12)); | |
assert!(m.insert(2, 8)); | |
assert!(m.insert(5, 14)); | |
let new = 100; | |
match m.find_mut(&5) { | |
None => fail!(), Some(x) => *x = new | |
} | |
assert_eq!(m.find(&5), Some(&new)); | |
} | |
#[test] | |
fn test_find_mut_missing() { | |
let mut m = TrieMap::new(); | |
assert!(m.find_mut(&0u).is_none()); | |
assert!(m.insert(1, 12)); | |
assert!(m.find_mut(&0).is_none()); | |
assert!(m.insert(2, 8)); | |
assert!(m.find_mut(&0).is_none()); | |
} | |
#[test] | |
fn test_step() { | |
let mut trie = TrieMap::new(); | |
let n = 300u; | |
for x in range_step(1u, n, 2) { | |
assert!(trie.insert(x, x + 1)); | |
assert!(trie.contains_key(&x)); | |
check_integrity(&trie.root); | |
} | |
for x in range_step(0u, n, 2) { | |
assert!(!trie.contains_key(&x)); | |
assert!(trie.insert(x, x + 1)); | |
check_integrity(&trie.root); | |
} | |
for x in range(0u, n) { | |
assert!(trie.contains_key(&x)); | |
assert!(!trie.insert(x, x + 1)); | |
check_integrity(&trie.root); | |
} | |
for x in range_step(1u, n, 2) { | |
assert!(trie.remove(&x)); | |
assert!(!trie.contains_key(&x)); | |
check_integrity(&trie.root); | |
} | |
for x in range_step(0u, n, 2) { | |
assert!(trie.contains_key(&x)); | |
assert!(!trie.insert(x, x + 1)); | |
check_integrity(&trie.root); | |
} | |
} | |
#[test] | |
fn test_each_reverse() { | |
let mut m = TrieMap::new(); | |
assert!(m.insert(3u, 6)); | |
assert!(m.insert(0, 0)); | |
assert!(m.insert(4, 8)); | |
assert!(m.insert(2, 4)); | |
assert!(m.insert(1, 2)); | |
let mut n = 4; | |
m.each_reverse(|k, v| { | |
assert_eq!(*k, n); | |
assert_eq!(*v, n * 2); | |
n -= 1; | |
true | |
}); | |
} | |
#[test] | |
fn test_each_reverse_break() { | |
let mut m = TrieMap::new(); | |
for x in range(uint::MAX - 10000, uint::MAX).rev() { | |
m.insert(x, x / 2); | |
} | |
let mut n = uint::MAX - 1; | |
m.each_reverse(|k, v| { | |
if n == uint::MAX - 5000 { false } else { | |
assert!(n > uint::MAX - 5000); | |
assert_eq!(*k, n); | |
assert_eq!(*v, n / 2); | |
n -= 1; | |
true | |
} | |
}); | |
} | |
#[test] | |
fn test_swap() { | |
let mut m = TrieMap::new(); | |
assert_eq!(m.swap(1u, 2), None); | |
assert_eq!(m.swap(1, 3), Some(2)); | |
assert_eq!(m.swap(1, 4), Some(3)); | |
} | |
#[test] | |
fn test_pop() { | |
let mut m = TrieMap::new(); | |
m.insert(1u, 2); | |
assert_eq!(m.pop(&1), Some(2)); | |
assert_eq!(m.pop(&1), None); | |
} | |
#[test] | |
fn test_from_iter() { | |
let xs = ~[(1u, 1i), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6)]; | |
let map: TrieMap<uint,int> = xs.iter().map(|&x| x).collect(); | |
for &(k, v) in xs.iter() { | |
assert_eq!(map.find(&k), Some(&v)); | |
} | |
} | |
#[test] | |
fn test_iteration() { | |
let empty_map : TrieMap<uint,uint> = TrieMap::new(); | |
assert_eq!(empty_map.iter().next(), None); | |
let first = uint::MAX - 10000; | |
let last = uint::MAX; | |
let mut map = TrieMap::new(); | |
for x in range(first, last).rev() { | |
map.insert(x, x / 2); | |
} | |
let mut i = 0; | |
for (&k, &v) in map.iter() { | |
assert_eq!(k, first + i); | |
assert_eq!(v, k / 2); | |
i += 1; | |
} | |
assert_eq!(i, last - first); | |
} | |
#[test] | |
fn test_mut_iter() { | |
let mut empty_map : TrieMap<uint,uint> = TrieMap::new(); | |
assert!(empty_map.mut_iter().next().is_none()); | |
let first = uint::MAX - 10000; | |
let last = uint::MAX; | |
let mut map = TrieMap::new(); | |
for x in range(first, last).rev() { | |
map.insert(x, x / 2); | |
} | |
let mut i = 0; | |
for (&k, v) in map.mut_iter() { | |
assert_eq!(k, first + i); | |
*v -= k / 2; | |
i += 1; | |
} | |
assert_eq!(i, last - first); | |
assert!(map.iter().all(|(_, &v)| v == 0)); | |
} | |
#[test] | |
fn test_bound() { | |
let empty_map : TrieMap<uint,uint> = TrieMap::new(); | |
assert_eq!(empty_map.lower_bound(0).next(), None); | |
assert_eq!(empty_map.upper_bound(0).next(), None); | |
let last = 999u; | |
let step = 3u; | |
let value = 42u; | |
let mut map : TrieMap<uint,uint> = TrieMap::new(); | |
for x in range_step(0u, last, step) { | |
assert!(x % step == 0); | |
map.insert(x, value); | |
} | |
for i in range(0u, last - step) { | |
let mut lb = map.lower_bound(i); | |
let mut ub = map.upper_bound(i); | |
let next_key = i - i % step + step; | |
let next_pair = (&next_key, &value); | |
if i % step == 0 { | |
assert_eq!(lb.next(), Some((&i, &value))); | |
} else { | |
assert_eq!(lb.next(), Some(next_pair)); | |
} | |
assert_eq!(ub.next(), Some(next_pair)); | |
} | |
let mut lb = map.lower_bound(last - step); | |
let foo = last-step; | |
assert_eq!(lb.next(), Some((&foo, &value))); | |
let mut ub = map.upper_bound(last - step); | |
assert_eq!(ub.next(), None); | |
for i in range(last - step + 1, last) { | |
let mut lb = map.lower_bound(i); | |
assert_eq!(lb.next(), None); | |
let mut ub = map.upper_bound(i); | |
assert_eq!(ub.next(), None); | |
} | |
} | |
#[test] | |
fn test_mut_bound() { | |
let empty_map : TrieMap<uint,uint> = TrieMap::new(); | |
assert_eq!(empty_map.lower_bound(0).next(), None); | |
assert_eq!(empty_map.upper_bound(0).next(), None); | |
let mut m_lower = TrieMap::new(); | |
let mut m_upper = TrieMap::new(); | |
for i in range(0u, 100) { | |
m_lower.insert(2 * i, 4 * i); | |
m_upper.insert(2 * i, 4 * i); | |
} | |
for i in range(0u, 199) { | |
let mut lb_it = m_lower.mut_lower_bound(i); | |
let (&k, v) = lb_it.next().unwrap(); | |
let lb = i + i % 2; | |
assert_eq!(lb, k); | |
*v -= k; | |
} | |
for i in range(0u, 198) { | |
let mut ub_it = m_upper.mut_upper_bound(i); | |
let (&k, v) = ub_it.next().unwrap(); | |
let ub = i + 2 - i % 2; | |
assert_eq!(ub, k); | |
*v -= k; | |
} | |
assert!(m_lower.mut_lower_bound(199).next().is_none()); | |
assert!(m_upper.mut_upper_bound(198).next().is_none()); | |
assert!(m_lower.iter().all(|(_, &x)| x == 0)); | |
assert!(m_upper.iter().all(|(_, &x)| x == 0)); | |
} | |
} | |
#[cfg(test)] | |
mod bench_map { | |
use super::*; | |
//use std::prelude::*; | |
use std::rand::{weak_rng, Rng}; | |
use extra::test::BenchHarness; | |
#[bench] | |
fn bench_iter_small(bh: &mut BenchHarness) { | |
let mut m = TrieMap::<uint,uint>::new(); | |
let mut rng = weak_rng(); | |
for _ in range(0, 20) { | |
m.insert(rng.gen(), rng.gen()); | |
} | |
bh.iter(|| for _ in m.iter() {}) | |
} | |
#[bench] | |
fn bench_iter_large(bh: &mut BenchHarness) { | |
let mut m = TrieMap::<uint,uint>::new(); | |
let mut rng = weak_rng(); | |
for _ in range(0, 1000) { | |
m.insert(rng.gen(), rng.gen()); | |
} | |
bh.iter(|| for _ in m.iter() {}) | |
} | |
#[bench] | |
fn bench_lower_bound(bh: &mut BenchHarness) { | |
let mut m = TrieMap::<uint,uint>::new(); | |
let mut rng = weak_rng(); | |
for _ in range(0, 1000) { | |
m.insert(rng.gen(), rng.gen()); | |
} | |
bh.iter(|| { | |
for _ in range(0, 10) { | |
m.lower_bound(rng.gen()); | |
} | |
}); | |
} | |
#[bench] | |
fn bench_upper_bound(bh: &mut BenchHarness) { | |
let mut m = TrieMap::<uint,uint>::new(); | |
let mut rng = weak_rng(); | |
for _ in range(0, 1000) { | |
m.insert(rng.gen(), rng.gen()); | |
} | |
bh.iter(|| { | |
for _ in range(0, 10) { | |
m.upper_bound(rng.gen()); | |
} | |
}); | |
} | |
#[bench] | |
fn bench_insert_large(bh: &mut BenchHarness) { | |
let mut m = TrieMap::<uint,[uint, .. 10]>::new(); | |
let mut rng = weak_rng(); | |
bh.iter(|| { | |
for _ in range(0, 1000) { | |
m.insert(rng.gen(), [1, .. 10]); | |
} | |
}) | |
} | |
#[bench] | |
fn bench_insert_large_low_bits(bh: &mut BenchHarness) { | |
let mut m = TrieMap::<uint,[uint, .. 10]>::new(); | |
let mut rng = weak_rng(); | |
bh.iter(|| { | |
for _ in range(0, 1000) { | |
// only have the last few bits set. | |
m.insert(rng.gen::<uint>() & 0xff_ff, [1, .. 10]); | |
} | |
}) | |
} | |
#[bench] | |
fn bench_insert_small(bh: &mut BenchHarness) { | |
let mut m = TrieMap::<uint,()>::new(); | |
let mut rng = weak_rng(); | |
bh.iter(|| { | |
for _ in range(0, 1000) { | |
m.insert(rng.gen(), ()); | |
} | |
}) | |
} | |
#[bench] | |
fn bench_insert_small_low_bits(bh: &mut BenchHarness) { | |
let mut m = TrieMap::<uint,()>::new(); | |
let mut rng = weak_rng(); | |
bh.iter(|| { | |
for _ in range(0, 1000) { | |
// only have the last few bits set. | |
m.insert(rng.gen::<uint>() & 0xff_ff, ()); | |
} | |
}) | |
} | |
} | |
#[cfg(test)] | |
mod test_set { | |
use super::*; | |
//use std::prelude::*; | |
use std::uint; | |
#[test] | |
fn test_sane_chunk() { | |
let x = 1u; | |
let y = 1 << (uint::BITS - 1); | |
let mut trie = TrieSet::new(); | |
assert!(trie.insert(x)); | |
assert!(trie.insert(y)); | |
assert_eq!(trie.len(), 2); | |
let expected = [x, y]; | |
for (i, x) in trie.iter().enumerate() { | |
assert_eq!(&expected[i], x); | |
} | |
} | |
#[test] | |
fn test_from_iter() { | |
let xs = ~[9u, 8, 7, 6, 5, 4, 3, 2, 1]; | |
let set: TrieSet<uint> = xs.iter().map(|&x| x).collect(); | |
for x in xs.iter() { | |
assert!(set.contains(x)); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment