Skip to content

Instantly share code, notes, and snippets.

@bcse
Last active October 7, 2015 02:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bcse/3092164 to your computer and use it in GitHub Desktop.
Save bcse/3092164 to your computer and use it in GitHub Desktop.
Python SortedList

SortedList is a list-like class that always keeps items sorted. heaplist.SortedList is faster than bisectlist.SortedList but with lesser functions.

Here are some test results:

bisectlist.SortedList

Test with 10000 random numbers...
Raw list takes 1.01116704941 seconds
Raw list, sort once takes 0.00338888168335 seconds
SortedList(1) takes 0.0301549434662 seconds:
	33.5x faster than baseline1,
	8.9x slower than baseline2
SortedList(2) takes 0.00342583656311 seconds:
	295.2x faster than baseline1,
	1.0x slower than baseline2
Traverse SortedList takes 0.000600099563599 seconds

Test with 10000 custom objects...
Raw list takes 30.8985888958 seconds
Raw list, sort once takes 0.0573041439056 seconds
SortedList(1) takes 0.0868980884552 seconds:
	355.6x faster than baseline1,
	1.5x slower than baseline2
SortedList(2) takes 0.0560028553009 seconds:
	551.7x faster than baseline1,
	1.0x slower than baseline2
Traverse SortedList takes 0.000681161880493 seconds

heaplist.SortedList

Test with 10000 random numbers...
Raw list takes 0.995594978333 seconds
Raw list, sort once takes 0.00341105461121 seconds
SortedList(1) takes 0.0149669647217 seconds:
	66.5x faster than baseline1,
	4.4x slower than baseline2
SortedList(2) takes 0.00660085678101 seconds:
	150.8x faster than baseline1,
	1.9x slower than baseline2
Traverse SortedList takes 0.0506889820099 seconds

Test with 10000 custom objects...
Raw list takes 30.6724150181 seconds
Raw list, sort once takes 0.0557820796967 seconds
SortedList(1) takes 0.0252261161804 seconds:
	1215.9x faster than baseline1,
	0.5x slower than baseline2
SortedList(2) takes 0.0148570537567 seconds:
	2064.5x faster than baseline1,
	0.3x slower than baseline2
Traverse SortedList takes 0.112375974655 seconds

Similar Projects

from bisect import bisect_left, bisect_right
from container import Container
class SortedList(Container):
def __init__(self, data=None):
self.data = sorted(data) if data else []
def __setitem__(self, key, value):
raise NotImplementedError
def __contains__(self, value):
return bisect_right(self.data, value) > 0
def add(self, value):
"""Add an item to this list."""
index = bisect_right(self.data, value)
self.data.insert(index, value)
def pop(self, index=None):
"""Remove and return item at index (default last).
Raise IndexError if this list is empty."""
if index is None:
index = len(self.data) - 1
return self.data.pop(index)
def remove(self, value):
"""Remove first occurrence of value.
Raise ValueError if not found."""
index = self.index(value)
del self[index]
def clear(self):
"""Remove all items from this list."""
self.data = []
def copy(self):
"""Return a shallow copy of this list."""
return self.__class__(self.data[:])
def count(self, value):
"""Return number of occurrences of value."""
i = bisect_left(self.data, value)
j = bisect_right(self.data, value)
return j - i
def index(self, value, start=0, stop=None):
"""Return first index of value.
Raise ValueError if not found."""
if stop is None:
stop = len(self.data)
index = bisect_left(self.data, value, start, stop)
if index != len(self.data) and self.data[index] == value:
return index
raise ValueError
if __name__ == '__main__':
from random import randint
from time import time
print '\nTest with 10000 random numbers...'
data = [randint(1, 1000000) for i in xrange(10000)]
t0 = time()
a = []
for i in data:
a.append(i)
a.sort()
baseline1 = time() - t0
print 'Raw list takes %s seconds' % baseline1
t0 = time()
a = data[:]
a.sort()
baseline2 = time() - t0
print 'Raw list, sort once takes %s seconds' % baseline2
t0 = time()
a = SortedList()
for i in data:
a.add(i)
dt = time() - t0
print 'SortedList(1) takes %s seconds:\n\t%sx faster than baseline1,\n\t%sx slower than baseline2' % (dt, round(baseline1 / dt, 1), round(dt / baseline2, 1))
t0 = time()
a = SortedList(data)
dt = time() - t0
print 'SortedList(2) takes %s seconds:\n\t%sx faster than baseline1,\n\t%sx slower than baseline2' % (dt, round(baseline1 / dt, 1), round(dt / baseline2, 1))
t0 = time()
for i in a:
pass
dt = time() - t0
print 'Traverse SortedList takes %s seconds' % (dt)
#-----------------------------------------------------------------------------
class CustomObj(object):
def __init__(self, value):
self.value = value
def __cmp__(self, other):
if self.value < other.value:
return -1
elif self.value == other.value:
return 0
elif self.value > other.value:
return 1
def __repr__(self):
return '<%s: %s>' % (self.__class__.__name__, self.value)
print '\nTest with 10000 custom objects...'
_data = [CustomObj(i) for i in data]
t0 = time()
a = []
for i in _data:
a.append(i)
a.sort()
baseline1 = time() - t0
print 'Raw list takes %s seconds' % baseline1
t0 = time()
a = _data[:]
a.sort()
baseline2 = time() - t0
print 'Raw list, sort once takes %s seconds' % baseline2
t0 = time()
a = SortedList()
for i in _data:
a.add(i)
dt = time() - t0
print 'SortedList(1) takes %s seconds:\n\t%sx faster than baseline1,\n\t%sx slower than baseline2' % (dt, round(baseline1 / dt, 1), round(dt / baseline2, 1))
t0 = time()
a = SortedList(_data)
dt = time() - t0
print 'SortedList(2) takes %s seconds:\n\t%sx faster than baseline1,\n\t%sx slower than baseline2' % (dt, round(baseline1 / dt, 1), round(dt / baseline2, 1))
t0 = time()
for i in a:
pass
dt = time() - t0
print 'Traverse SortedList takes %s seconds' % (dt)
class Container(object):
"""Base class of SortedList"""
def __init__(self, data=None):
super(Container, self).__init__()
self.data = data[:] if data else []
def __repr__(self):
return '%s(%s)' % (self.__class__.__name__, repr(self.data))
def __str__(self):
return str(self.data)
def __unicode__(self):
return unicode(self.data)
def __cast(self, other):
if isinstance(other, Container): return other.data
else: return other
def __lt__(self, other):
return self.data < self.__cast(other)
def __le__(self, other):
return self.data <= self.__cast(other)
def __eq__(self, other):
return self.data == self.__cast(other)
def __ne__(self, other):
return self.data != self.__cast(other)
def __gt__(self, other):
return self.data > self.__cast(other)
def __ge__(self, other):
return self.data >= self.__cast(other)
def __cmp__(self, other):
return cmp(self.data, self.__cast(other))
def __hash__(self):
return hash(self.data)
def __len__(self):
return len(self.data)
def __getitem__(self, key):
return self.data[key]
def __setitem__(self, key, value):
self.data[key] = value
def __delitem__(self, key):
del self.data[key]
def __iter__(self):
return self.data.__iter__()
def __reversed__(self):
return reversed(self.data)
def __contains__(self, item):
return item in self.data
if __name__ == '__main__':
o = Container(range(10))
print `o`
from heapq import heapify, heappush, heappop, nsmallest
from container import Container
class SortedList(Container):
def __init__(self, data=None):
self.data = data[:] if data else []
heapify(self.data)
def __getitem__(self, key):
raise NotImplementedError
def __setitem__(self, key, value):
raise NotImplementedError
def __delitem__(self, key):
raise NotImplementedError
def __iter__(self):
data = self.data[:]
while True:
try:
yield heappop(data)
except IndexError:
break
def __reversed__(self):
raise NotImplementedError
def __getslice__(self, i, j):
i = max(i, 0)
j = max(j, 0)
return self.__class__(nsmallest(j, self.data)[i:])
def add(self, value):
"""Add an item to this list."""
heappush(self.data, value)
def pop(self, index=None):
"""Remove and return item at index (default last).
Raise IndexError if this list is empty."""
raise NotImplementedError
def remove(self, value):
"""Remove first occurrence of value.
Raise ValueError if not found."""
index = self.index(value)
del self[index]
def clear(self):
"""Remove all items from this list."""
self.data = []
def copy(self):
"""Return a shallow copy of this list."""
return self.__class__(self.data[:])
def count(self, value):
"""Return number of occurrences of value."""
return self.data.count(value)
def index(self, value, start=0, stop=None):
"""Return first index of value.
Raise ValueError if not found."""
raise NotImplementedError
if __name__ == '__main__':
from random import randint
from time import time
print '\nTest with 10000 random numbers...'
data = [randint(1, 1000000) for i in xrange(10000)]
t0 = time()
a = []
for i in data:
a.append(i)
a.sort()
baseline1 = time() - t0
print 'Raw list takes %s seconds' % baseline1
t0 = time()
a = data[:]
a.sort()
baseline2 = time() - t0
print 'Raw list, sort once takes %s seconds' % baseline2
t0 = time()
a = SortedList()
for i in data:
a.add(i)
dt = time() - t0
print 'SortedList(1) takes %s seconds:\n\t%sx faster than baseline1,\n\t%sx slower than baseline2' % (dt, round(baseline1 / dt, 1), round(dt / baseline2, 1))
t0 = time()
a = SortedList(data)
dt = time() - t0
print 'SortedList(2) takes %s seconds:\n\t%sx faster than baseline1,\n\t%sx slower than baseline2' % (dt, round(baseline1 / dt, 1), round(dt / baseline2, 1))
t0 = time()
for i in a:
pass
dt = time() - t0
print 'Traverse SortedList takes %s seconds' % (dt)
#-----------------------------------------------------------------------------
class CustomObj(object):
def __init__(self, value):
self.value = value
def __cmp__(self, other):
if self.value < other.value:
return -1
elif self.value == other.value:
return 0
elif self.value > other.value:
return 1
def __repr__(self):
return '<%s: %s>' % (self.__class__.__name__, self.value)
print '\nTest with 10000 custom objects...'
_data = [CustomObj(i) for i in data]
t0 = time()
a = []
for i in _data:
a.append(i)
a.sort()
baseline1 = time() - t0
print 'Raw list takes %s seconds' % baseline1
t0 = time()
a = _data[:]
a.sort()
baseline2 = time() - t0
print 'Raw list, sort once takes %s seconds' % baseline2
t0 = time()
a = SortedList()
for i in _data:
a.add(i)
dt = time() - t0
print 'SortedList(1) takes %s seconds:\n\t%sx faster than baseline1,\n\t%sx slower than baseline2' % (dt, round(baseline1 / dt, 1), round(dt / baseline2, 1))
t0 = time()
a = SortedList(_data)
dt = time() - t0
print 'SortedList(2) takes %s seconds:\n\t%sx faster than baseline1,\n\t%sx slower than baseline2' % (dt, round(baseline1 / dt, 1), round(dt / baseline2, 1))
t0 = time()
for i in a:
pass
dt = time() - t0
print 'Traverse SortedList takes %s seconds' % (dt)
@grantjenks
Copy link

For performance comparisons, check out the Python sortedcontainers module at http://www.grantjenks.com/docs/sortedcontainers/ It's pure-Python and as fast as blist, rbtree, and treap. There's a performance comparison page at http://www.grantjenks.com/docs/sortedcontainers/performance.html and the package is easily installable from PyPI.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment