Skip to content

Instantly share code, notes, and snippets.

@haberman
Last active August 29, 2015 14:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save haberman/3aae925f6fb1031e783b to your computer and use it in GitHub Desktop.
Save haberman/3aae925f6fb1031e783b to your computer and use it in GitHub Desktop.
Implementation of an IntSet data structure in Rust.
use std::collections::TreeMap;
use std::collections::tree_map::Entries; // This took forever
use std::num::One;
use std::num::Saturating;
// Equivalent to Set<T>, but only works for Ints, and may be more efficient
// if the set contains large contiguous ranges. Each range is stored as a
// single entry instead of storing each member individually.
struct IntSet<T> {
// These correspond to ranges of high->low. The ranges are closed (inclusive)
// on both ends: [low, high]. Half-open ranges would make the algorithms
// simpler but then we wouldn't be able to specify a range that includes
// the max value.
//
// This representation allows for efficient enumeration and set membership
// test.
ints: TreeMap<T, T>
}
impl<T: Int + std::fmt::Show> IntSet<T> {
pub fn new() -> IntSet<T> {
IntSet { ints: TreeMap::new() }
}
// Returns an iterator to the first range (low, high) such that high >= n.
// If any range in the set contains n, it will be this one.
fn find<'a>(&'a self, n: T) -> Entries<'a, T, T> { self.ints.lower_bound(&n) }
// For testing only: validates that the set of ranges is the same as "ranges."
fn assert_ranges(&self, ranges: &[(T, T)]) {
let mut i = 0;
for (&high, &low) in self.ints.iter() {
assert_eq!(low, ranges[i].val0());
assert_eq!(high, ranges[i].val1());
i += 1;
}
assert_eq!(i, ranges.len());
}
// Test whether the set contains this value.
pub fn contains(&self, n: T) -> bool {
match self.find(n).next() {
Some((_, low)) => { n >= *low }
None => { false }
}
}
// Add the single member "n".
pub fn add(&mut self, n: T) {
self.add_range(n, n);
}
// Add members from the inclusive range [low, high].
pub fn add_range(&mut self, low: T, high: T) {
// If we are merging the new range with existing entries; track the list of
// existing entries we should remove later. We can't do this in the loop
// because there's no way to delete entries while iterating over the map.
let mut to_delete = Vec::new();
// Find the new range to insert by merging (low, high) with any overlapping
// ranges.
let (insert_high, insert_low) = {
let mut iter = self.find(low.saturating_sub(One::one()));
match iter.next() {
None => {
// No overlapping ranges, add this range verbatim.
(high, low)
}
Some((&this_high, &this_low)) => {
// We overlap with at least one existing range.
// Compute the union of all intersecting ranges.
let new_low = std::cmp::min(low, this_low);
let mut new_high = std::cmp::max(high, this_high);
to_delete.push(this_high);
for (&iter_high, &iter_low) in iter {
if iter_low > high.saturating_add(One::one()) { break; }
to_delete.push(iter_high);
new_high = std::cmp::max(new_high, iter_high)
}
(new_high, new_low)
}
}
};
for high in to_delete.iter() {
let removed = self.ints.remove(high);
assert!(removed.is_some())
}
let existing = self.ints.insert(insert_high, insert_low);
assert!(existing.is_none());
}
pub fn add_intset(&mut self, set: &IntSet<T>) {
for (&high, &low) in set.ints.iter() {
self.add_range(low, high);
}
}
}
mod test {
use super::IntSet;
#[test]
fn test_add() {
let mut set: IntSet<u8> = IntSet::new();
assert_eq!(false, set.contains(5));
set.add(5);
assert_eq!(true, set.contains(5));
assert_eq!(false, set.contains(7));
// Adding again causes no error but also has no effect.
set.add(5);
assert_eq!(true, set.contains(5));
assert_eq!(false, set.contains(7));
}
#[test]
fn test_range() {
let mut set: IntSet<u8> = IntSet::new();
set.add_range(5, 7);
set.assert_ranges([(5, 7)]);
assert_eq!(false, set.contains(4));
assert_eq!(true, set.contains(5));
assert_eq!(true, set.contains(6));
assert_eq!(true, set.contains(7));
assert_eq!(false, set.contains(8));
// Case 1: new range is identical to existing range.
set.add_range(5, 7);
set.assert_ranges([(5, 7)]);
// Case 2: new range is superset of existing range.
set.add_range(4, 8);
set.assert_ranges([(4, 8)]);
// Case 3: new range overlaps existing range's left edge (but not right).
set.add_range(3, 5);
set.assert_ranges([(3, 8)]);
// Case 4: new range overlaps existing range's right edge (but not left).
set.add_range(7, 9);
set.assert_ranges([(3, 9)]);
// Case 5: new range is adjacent to existing range on the left.
set.add_range(0, 2);
set.assert_ranges([(0, 9)]);
// Case 6: new range is adjacent to existing range on the right.
set.add(10);
set.assert_ranges([(0, 10)]);
// Case 7: new range does not overlap existing range.
set.add_range(20, 30);
set.add_range(50, 60);
set.add_range(80, 90);
set.assert_ranges([(0, 10),
(20, 30),
(50, 60),
(80, 90)]);
// Case 8: new range overlaps multiple ranges (but not all).
set.add_range(25, 55);
set.assert_ranges([(0, 10),
(20, 60),
(80, 90)]);
}
#[test]
fn test_addintset() {
let mut set1: IntSet<u8> = IntSet::new();
set1.add(2);
set1.add(4);
set1.add(6);
let mut set2: IntSet<u8> = IntSet::new();
set2.add(3);
set2.add(5);
set2.add_range(10, 20);
set2.add_intset(&set1);
set2.assert_ranges([(2, 6),
(10, 20)]);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment