|
from heapq import heapify, heappush, heappop, nsmallest |
|
from container import Container |
|
|
|
class SortedList(Container): |
|
def __init__(self, data=None): |
|
self.data = data[:] if data else [] |
|
heapify(self.data) |
|
|
|
def __getitem__(self, key): |
|
raise NotImplementedError |
|
def __setitem__(self, key, value): |
|
raise NotImplementedError |
|
def __delitem__(self, key): |
|
raise NotImplementedError |
|
|
|
def __iter__(self): |
|
data = self.data[:] |
|
while True: |
|
try: |
|
yield heappop(data) |
|
except IndexError: |
|
break |
|
def __reversed__(self): |
|
raise NotImplementedError |
|
|
|
def __getslice__(self, i, j): |
|
i = max(i, 0) |
|
j = max(j, 0) |
|
return self.__class__(nsmallest(j, self.data)[i:]) |
|
|
|
def add(self, value): |
|
"""Add an item to this list.""" |
|
heappush(self.data, value) |
|
|
|
def pop(self, index=None): |
|
"""Remove and return item at index (default last). |
|
Raise IndexError if this list is empty.""" |
|
raise NotImplementedError |
|
|
|
def remove(self, value): |
|
"""Remove first occurrence of value. |
|
Raise ValueError if not found.""" |
|
index = self.index(value) |
|
del self[index] |
|
|
|
def clear(self): |
|
"""Remove all items from this list.""" |
|
self.data = [] |
|
|
|
def copy(self): |
|
"""Return a shallow copy of this list.""" |
|
return self.__class__(self.data[:]) |
|
|
|
def count(self, value): |
|
"""Return number of occurrences of value.""" |
|
return self.data.count(value) |
|
|
|
def index(self, value, start=0, stop=None): |
|
"""Return first index of value. |
|
Raise ValueError if not found.""" |
|
raise NotImplementedError |
|
|
|
if __name__ == '__main__': |
|
from random import randint |
|
from time import time |
|
|
|
print '\nTest with 10000 random numbers...' |
|
data = [randint(1, 1000000) for i in xrange(10000)] |
|
|
|
t0 = time() |
|
a = [] |
|
for i in data: |
|
a.append(i) |
|
a.sort() |
|
baseline1 = time() - t0 |
|
print 'Raw list takes %s seconds' % baseline1 |
|
|
|
t0 = time() |
|
a = data[:] |
|
a.sort() |
|
baseline2 = time() - t0 |
|
print 'Raw list, sort once takes %s seconds' % baseline2 |
|
|
|
t0 = time() |
|
a = SortedList() |
|
for i in data: |
|
a.add(i) |
|
dt = time() - t0 |
|
print 'SortedList(1) takes %s seconds:\n\t%sx faster than baseline1,\n\t%sx slower than baseline2' % (dt, round(baseline1 / dt, 1), round(dt / baseline2, 1)) |
|
|
|
t0 = time() |
|
a = SortedList(data) |
|
dt = time() - t0 |
|
print 'SortedList(2) takes %s seconds:\n\t%sx faster than baseline1,\n\t%sx slower than baseline2' % (dt, round(baseline1 / dt, 1), round(dt / baseline2, 1)) |
|
|
|
t0 = time() |
|
for i in a: |
|
pass |
|
dt = time() - t0 |
|
print 'Traverse SortedList takes %s seconds' % (dt) |
|
|
|
#----------------------------------------------------------------------------- |
|
|
|
class CustomObj(object): |
|
def __init__(self, value): |
|
self.value = value |
|
def __cmp__(self, other): |
|
if self.value < other.value: |
|
return -1 |
|
elif self.value == other.value: |
|
return 0 |
|
elif self.value > other.value: |
|
return 1 |
|
def __repr__(self): |
|
return '<%s: %s>' % (self.__class__.__name__, self.value) |
|
|
|
print '\nTest with 10000 custom objects...' |
|
_data = [CustomObj(i) for i in data] |
|
|
|
t0 = time() |
|
a = [] |
|
for i in _data: |
|
a.append(i) |
|
a.sort() |
|
baseline1 = time() - t0 |
|
print 'Raw list takes %s seconds' % baseline1 |
|
|
|
t0 = time() |
|
a = _data[:] |
|
a.sort() |
|
baseline2 = time() - t0 |
|
print 'Raw list, sort once takes %s seconds' % baseline2 |
|
|
|
t0 = time() |
|
a = SortedList() |
|
for i in _data: |
|
a.add(i) |
|
dt = time() - t0 |
|
print 'SortedList(1) takes %s seconds:\n\t%sx faster than baseline1,\n\t%sx slower than baseline2' % (dt, round(baseline1 / dt, 1), round(dt / baseline2, 1)) |
|
|
|
t0 = time() |
|
a = SortedList(_data) |
|
dt = time() - t0 |
|
print 'SortedList(2) takes %s seconds:\n\t%sx faster than baseline1,\n\t%sx slower than baseline2' % (dt, round(baseline1 / dt, 1), round(dt / baseline2, 1)) |
|
|
|
t0 = time() |
|
for i in a: |
|
pass |
|
dt = time() - t0 |
|
print 'Traverse SortedList takes %s seconds' % (dt) |
For performance comparisons, check out the Python sortedcontainers module at http://www.grantjenks.com/docs/sortedcontainers/ It's pure-Python and as fast as blist, rbtree, and treap. There's a performance comparison page at http://www.grantjenks.com/docs/sortedcontainers/performance.html and the package is easily installable from PyPI.