Last active
August 29, 2015 14:09
-
-
Save haberman/3aae925f6fb1031e783b to your computer and use it in GitHub Desktop.
Implementation of an IntSet data structure in Rust.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::collections::TreeMap; | |
use std::collections::tree_map::Entries; // This took forever | |
use std::num::One; | |
use std::num::Saturating; | |
// Equivalent to Set<T>, but only works for Ints, and may be more efficient | |
// if the set contains large contiguous ranges. Each range is stored as a | |
// single entry instead of storing each member individually. | |
struct IntSet<T> { | |
// These correspond to ranges of high->low. The ranges are closed (inclusive) | |
// on both ends: [low, high]. Half-open ranges would make the algorithms | |
// simpler but then we wouldn't be able to specify a range that includes | |
// the max value. | |
// | |
// This representation allows for efficient enumeration and set membership | |
// test. | |
ints: TreeMap<T, T> | |
} | |
impl<T: Int + std::fmt::Show> IntSet<T> { | |
pub fn new() -> IntSet<T> { | |
IntSet { ints: TreeMap::new() } | |
} | |
// Returns an iterator to the first range (low, high) such that high >= n. | |
// If any range in the set contains n, it will be this one. | |
fn find<'a>(&'a self, n: T) -> Entries<'a, T, T> { self.ints.lower_bound(&n) } | |
// For testing only: validates that the set of ranges is the same as "ranges." | |
fn assert_ranges(&self, ranges: &[(T, T)]) { | |
let mut i = 0; | |
for (&high, &low) in self.ints.iter() { | |
assert_eq!(low, ranges[i].val0()); | |
assert_eq!(high, ranges[i].val1()); | |
i += 1; | |
} | |
assert_eq!(i, ranges.len()); | |
} | |
// Test whether the set contains this value. | |
pub fn contains(&self, n: T) -> bool { | |
match self.find(n).next() { | |
Some((_, low)) => { n >= *low } | |
None => { false } | |
} | |
} | |
// Add the single member "n". | |
pub fn add(&mut self, n: T) { | |
self.add_range(n, n); | |
} | |
// Add members from the inclusive range [low, high]. | |
pub fn add_range(&mut self, low: T, high: T) { | |
// If we are merging the new range with existing entries; track the list of | |
// existing entries we should remove later. We can't do this in the loop | |
// because there's no way to delete entries while iterating over the map. | |
let mut to_delete = Vec::new(); | |
// Find the new range to insert by merging (low, high) with any overlapping | |
// ranges. | |
let (insert_high, insert_low) = { | |
let mut iter = self.find(low.saturating_sub(One::one())); | |
match iter.next() { | |
None => { | |
// No overlapping ranges, add this range verbatim. | |
(high, low) | |
} | |
Some((&this_high, &this_low)) => { | |
// We overlap with at least one existing range. | |
// Compute the union of all intersecting ranges. | |
let new_low = std::cmp::min(low, this_low); | |
let mut new_high = std::cmp::max(high, this_high); | |
to_delete.push(this_high); | |
for (&iter_high, &iter_low) in iter { | |
if iter_low > high.saturating_add(One::one()) { break; } | |
to_delete.push(iter_high); | |
new_high = std::cmp::max(new_high, iter_high) | |
} | |
(new_high, new_low) | |
} | |
} | |
}; | |
for high in to_delete.iter() { | |
let removed = self.ints.remove(high); | |
assert!(removed.is_some()) | |
} | |
let existing = self.ints.insert(insert_high, insert_low); | |
assert!(existing.is_none()); | |
} | |
pub fn add_intset(&mut self, set: &IntSet<T>) { | |
for (&high, &low) in set.ints.iter() { | |
self.add_range(low, high); | |
} | |
} | |
} | |
mod test { | |
use super::IntSet; | |
#[test] | |
fn test_add() { | |
let mut set: IntSet<u8> = IntSet::new(); | |
assert_eq!(false, set.contains(5)); | |
set.add(5); | |
assert_eq!(true, set.contains(5)); | |
assert_eq!(false, set.contains(7)); | |
// Adding again causes no error but also has no effect. | |
set.add(5); | |
assert_eq!(true, set.contains(5)); | |
assert_eq!(false, set.contains(7)); | |
} | |
#[test] | |
fn test_range() { | |
let mut set: IntSet<u8> = IntSet::new(); | |
set.add_range(5, 7); | |
set.assert_ranges([(5, 7)]); | |
assert_eq!(false, set.contains(4)); | |
assert_eq!(true, set.contains(5)); | |
assert_eq!(true, set.contains(6)); | |
assert_eq!(true, set.contains(7)); | |
assert_eq!(false, set.contains(8)); | |
// Case 1: new range is identical to existing range. | |
set.add_range(5, 7); | |
set.assert_ranges([(5, 7)]); | |
// Case 2: new range is superset of existing range. | |
set.add_range(4, 8); | |
set.assert_ranges([(4, 8)]); | |
// Case 3: new range overlaps existing range's left edge (but not right). | |
set.add_range(3, 5); | |
set.assert_ranges([(3, 8)]); | |
// Case 4: new range overlaps existing range's right edge (but not left). | |
set.add_range(7, 9); | |
set.assert_ranges([(3, 9)]); | |
// Case 5: new range is adjacent to existing range on the left. | |
set.add_range(0, 2); | |
set.assert_ranges([(0, 9)]); | |
// Case 6: new range is adjacent to existing range on the right. | |
set.add(10); | |
set.assert_ranges([(0, 10)]); | |
// Case 7: new range does not overlap existing range. | |
set.add_range(20, 30); | |
set.add_range(50, 60); | |
set.add_range(80, 90); | |
set.assert_ranges([(0, 10), | |
(20, 30), | |
(50, 60), | |
(80, 90)]); | |
// Case 8: new range overlaps multiple ranges (but not all). | |
set.add_range(25, 55); | |
set.assert_ranges([(0, 10), | |
(20, 60), | |
(80, 90)]); | |
} | |
#[test] | |
fn test_addintset() { | |
let mut set1: IntSet<u8> = IntSet::new(); | |
set1.add(2); | |
set1.add(4); | |
set1.add(6); | |
let mut set2: IntSet<u8> = IntSet::new(); | |
set2.add(3); | |
set2.add(5); | |
set2.add_range(10, 20); | |
set2.add_intset(&set1); | |
set2.assert_ranges([(2, 6), | |
(10, 20)]); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment