Last active
August 29, 2015 13:59
-
-
Save bpeterso2000/10941487 to your computer and use it in GitHub Desktop.
Efficiently extracts column indices from a list.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Extracts specified columns from a list by taking a list of column indices and converting them in to mininum number of Python | |
slice objects ahead of time before iterating through the list. Then uses slices to extract ranges of columns. | |
""" | |
from itertools import chain | |
def prepare_slices(indices): | |
""" | |
Converts a list of Python indices into an optimized list of slice objects. | |
>>> x = [2, 4, 13, 16, 19, 23, 24, 25] | |
>>> prepare_slices(x) | |
[slice(2, 5, 2), slice(13, 20, 3), slice(23, 26, 1)] | |
""" | |
start = None | |
try: | |
start = indices.pop(0) | |
stop = indices.pop(0) | |
except IndexError: | |
return [slice(start, None)] | |
step = stop - start | |
slices = [] | |
for index in indices: | |
if step: | |
stride = index - stop | |
if stride == step: | |
stop = index | |
else: | |
slices.append(slice(start, stop + 1, step)) | |
start, stop, step = index, None, None | |
else: | |
stop, step = index, index - start | |
slices.append(slice(start, stop + 1 if stop else None, step)) | |
return slices | |
def slice_columns(seq, slices): | |
""" | |
Extracts items from a sequence using a list of slice objects | |
>>> letters_a_to_z = [chr(97 + i) for i in range(26)] | |
>>> slices_ = [slice(2, 5, 2), slice(13, 20, 3), slice(23, 26, 1)] | |
>>> slice_columns(letters_a_to_z, slices_) | |
['c', 'e', 'n', 'q', 't', 'x', 'y', 'z'] | |
""" | |
return list(chain.from_iterable((seq[i] for i in slices))) | |
def slice_rows(rows, indices): | |
""" | |
Extracts columns from a list of rows | |
""" | |
slices = prepare_slices(indices) | |
for row in rows: | |
yield slice_columns(row, slices) | |
if __name__ == '__main__': | |
import doctest | |
doctest.testmod() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment