Skip to content

Instantly share code, notes, and snippets.

@olooney
Created March 22, 2013 22:03
Show Gist options
  • Save olooney/5225139 to your computer and use it in GitHub Desktop.
Save olooney/5225139 to your computer and use it in GitHub Desktop.
def collapse_intervals(intervals):
'''intervals is an iterable of 2-tuples representing the start and end of
closed intervals. returns a list of 2-tuples after collapsing any
overlapping intervals. For example:
collapse_intervals([(1,5), (9,10), (4,7)]) -> [(1,7), (9,10)]
The algorithm doesn't care if intervals are out of order, or if two points
of the same interval are out of order. It also doesn't require the points
to be of any particular type as long as they are strictly comparable: in
particular, strings, dates, and floating point numbers are all acceptable.
'''
points = []
for interval in intervals:
start, end = sorted(interval)
points.append( (start, 'A') )
points.append( (end , 'B') )
points.sort()
# This algorithm is easy enough to understand if you imagining sweeping
# from left to right and keeping track of the number of intervals we're
# inside. every time we encounter the left edge of an interval we enter a
# new interval and the overlap count goes up by one; everytime we hit the
# right edge of an interval the overlap count goes down by one. When the
# count is at zero we are outside of any interval, and as long as it stays
# above zero we are inside at least one. To construct collapsed intervals,
# we mark the left edge at the point where the overlaps increases from 0 to
# 1, and the right edge at the point when it decreases to zero again.
# Note that since 'A' sorts before 'B', we will always process left edges
# before right edges. That means that if one interval ends and another
# begins at the same point, the overlap count goes 0,1,2,1,0. In
# particular, the overlap count does not drop to zero at the middle point
# in this case, and we construct a single collapsed interval encompassing
# both intervals. In other words, we're interpreting intervals as "closed."
collapsed_intervals = []
overlap = 0 # the number of intervals we're currently in
for point, edge in points:
if edge == 'A':
overlap += 1
if overlap == 1:
# start a new collapsed interval at the first left edge
start = point
elif edge == 'B':
overlap -= 1
if overlap == 0:
collapsed_intervals.append( (start, point) )
return collapsed_intervals
if __name__ == '__main__':
import unittest
class TestIntervals(unittest.TestCase):
def test_collapse_intervals(self):
def test(original, collapsed, name=''):
self.assertListEqual( collapse_intervals(original), collapsed, name)
print name + ':', repr(original), '->', repr(collapsed), 'OK'
test([], [], 'empty')
test([(1,1)], [(1,1)], 'single point')
test([(1,2)], [(1,2)], 'single')
test([(2,1)], [(1,2)], 'reversed')
test([(1,5), (9,10), (4,7)], [(1,7), (9,10)], 'one collapse')
test([(1,2), (2,3)], [(1, 3)], 'closed' )
test([(1,2), (1,2), (1,2)], [(1, 2)], 'redundant' )
test([(2,3), (1,2)], [(1, 3)] )
test([(2,4), (1,3)], [(1, 4)], 'out of order' )
test([(2,4), (1,3), (0,2), (1,2)], [(0, 4)], 'down to one' )
test([(2,4), (1,3), (0,2), (-5,10)], [(-5, 10)], 'overarching' )
test([(2,4), (1,3), (0,2), (8,9), (9,10)], [(0, 4), (8, 10)], 'separate' )
unittest.main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment