Last active
May 25, 2023 04:51
-
-
Save mathieucaroff/0cf094325fb5294fb54c6a577f05a2c1 to your computer and use it in GitHub Desktop.
Create a (read only) slice without creating a copy of the given sequence.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# stackoverflow.com/q/3485475/can-i-create-a-view-on-a-python-list | |
# This solution is based on python 3 range ability to be sliced and indexed | |
# in constant time. | |
# | |
# It supports slicing, equality comparsion, string casting, and reproducers, | |
# but doesn't support assigment (and I don't plan to add support for it). | |
# | |
# Creating a SliceableSequenceView of a SliceableSequenceView won't slow down | |
# access times as this case is detected. | |
try: | |
from collections.abc import Sequence | |
except ImportError: | |
from collections import Sequence # pylint: disable=no-name-in-module | |
class SliceableSequenceView(Sequence): | |
""" | |
A read-only sequence which allows slicing without copying the viewed list. | |
Supports negative indexes. | |
Usage: | |
li = list(range(100)) | |
s = SliceableSequenceView(li) | |
u = SliceableSequenceView(li, slice(1,7,2)) | |
v = s[1:7:2] | |
w = s[-99:-93:2] | |
li[1] += 10 | |
assert li[1:7:2] == list(u) == list(v) == list(w) | |
""" | |
__slots__ = "seq range".split() | |
def __init__(self, seq, sliced=None): | |
""" | |
Accept any sequence (such as lists, strings or ranges). | |
""" | |
if sliced is None: | |
sliced = slice(len(seq)) | |
ls = looksSliceable = True | |
ls = ls and hasattr(seq, "seq") and isinstance(seq.seq, Sequence) | |
ls = ls and hasattr(seq, "range") and isinstance(seq.range, range) | |
looksSliceable = ls | |
if looksSliceable: | |
self.seq = seq.seq | |
self.range = seq.range[sliced] | |
else: | |
self.seq = seq | |
self.range = range(len(seq))[sliced] | |
def __len__(self): | |
return len(self.range) | |
def __getitem__(self, i): | |
if isinstance(i, slice): | |
return SliceableSequenceView(self.seq, i) | |
return self.seq[self.range[i]] | |
def __str__(self): | |
r = self.range | |
s = slice(r.start, r.stop, r.step) | |
return str(self.seq[s]) | |
def __repr__(self): | |
r = self.range | |
s = slice(r.start, r.stop, r.step) | |
return "SliceableSequenceView({!r})".format(self.seq[s]) | |
def equal(self, otherSequence): | |
if self is otherSequence: | |
return True | |
if len(self) != len(otherSequence): | |
return False | |
for v, w in zip(self, otherSequence): | |
if v != w: | |
return False | |
return True |
@rmallow Thank you for the benchmark data; using SliceableSequenceView makes slicing run in constant time, but has the drawback that it slows down access time by a factor 12, since the __getItem__
function has a lot more work to do.
I'm a little puzzled by your answer: Why would you advice against using my code if you know that indexing will always be slow when using an indirect view over a sequence? All the answers to the Stackoverflow question have a similar drawback. Thank you for your time.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Found this code on Stack Overflow and thought it might fix my situation. Ran some very basic benchmarking and would have to advise against this. I can't comment on stack overflow or even vote, so I'll leave a comment here as a cautionary.
import timeit
import random
li = [random.randint(0, 99999) for _ in range(2000)]
s = SliceableSequenceView(li)
def test_one():
a = s[500:1000]
for x in range(len(a)):
a[x]
def test_two():
b = li[500:1000]
for x in range(len(b)):
b[x]
def main():
print(timeit.timeit("test_one()", globals=globals(), number=10 ** 4))
print(timeit.timeit("test_two()", globals=globals(), number=10 ** 4))
if name == "main":
main()
results:
8.466294852
0.7036374809999995
The slow part is accessing the array. While if you ran just the test with slicing it would appear to be very fast.
A further explanation from: https://stackoverflow.com/questions/66600139/constant-time-list-slicing
"Note that the indirection through indexes, in Python code, will be considerably slower than using native list slicing (despite the internal memory copy done by list slicing). The only benefit would be in extreme cases where total memory usage matters and the size of slices is enormous. The new list returned by slicing a list will only consume 8 bytes per item, no matter what the list items are. That is because both mutable and immutable datatypes are stored as pointers in a list (for immutables all the pointers reference the same memory for a given value)."