public
Created

Fancy indexing test for numpy

  • Download Gist
test_indexing.py
Python

import numpy as np
from numpy.compat import asbytes
from numpy.testing import *
import sys, warnings
 
from itertools import product
 
# These tests mimic the C-Code indexing for selection (still relying
# on .take for 1-d arrays along an axis, which is related to indexing).
# Please update the FIXME's if they are fixed (or do not apply).
 
# This mimics some of the odder parts of numpy indexing. These parts
# are marked by a "# FIXME" comment. Testing __setitem__ is possible
# in so far as one can set the values then check with __getitem__
# if they are correct. Note that this may not test some corner cases.
# For example a[[0,1,2],] = [[1,2,3]] works when a[:,] = [[1,2,3]]
# gives an error. This is may be FIXME itself.
 
# NOTE: * This still lacks tests for complex item setting.
# * Errors and behavior was original written to match numpy
# version 1.7. (for example also which error gets raised if
# there are two)
 
TEST_ERROR_TYPE = True
INDICES = [Ellipsis,
None,
1,
-2,
[],
# Boolean indices, up to 3-d for some special cases of eating up
# dimensions, also need to test all False
np.array(False),
np.array([True, False, False]),
np.array([[True, False], [False, True]]),
np.array([[[False, False], [False, False]]]),
# Some slices:
slice(None,None),
slice(-5, 5, 2),
slice(1, 1, 100),
slice(4, -1, -2),
slice(None,None,-3),
# Some Fancy indexes:
np.empty((0,1,1), dtype=np.intp), # empty broadcastable
np.array([0,1,-2]),
[0],
np.array([[2],[0],[1]]),
np.array([[0,-1], [0,1]]),
np.array([2,-1])]
 
class TestIndexing(TestCase):
def setUp(self):
self.a = np.arange(np.prod([3,1,5,6,4])).reshape(3,1,5,6,4)
self.b = np.empty((3,0,5,6,4))
 
def _get_multi_index(self, arr, indices):
"""Mimic multi dimensional indexing. Returns the indexed array and a
flag no_copy. If no_copy is True, np.may_share_memory(arr, arr[indicies])
should be True. If this function raises an error it should be equivalent
to normal error.
Notes
-----
This supports only multi_indices so a single one index would
be like a[index,]. Which is usually the same however.
This relies on these functions working correctly (and maybe also on
the correct exception type being raised by them):
* take
* ravel_multi_index
* reshape (for combining axes)
* transpose
* nonzero
* asarray
Especially take and ravel_multi_index are related to normal indexing,
but take is only used for 1D arrays along one axes, while ravel_multi_index
must work fine including broadcasting.
This is slow since for every axes a .take is performed seperatly.
"""
in_indices = list(indices)
indices = []
kinds = []
# if False, this is a fancy or boolean index
no_copy = True
# If the index includes at least one None/np.newaxis
has_none = False
# number of fancy/scalar indexes that are not consecutive
num_fancy = 0
# number of dimensions indexed by a "fancy" index
fancy_dim = 0
# For error calling at right time
empty_boolean_array = False
# The fancy index has a size of 0, but there are not too many indices
zero_size_fancy = False
raise_indexerror_if_broadcastable = False
 
# We need to handle Ellipsis and make arrays from indices, also
# check if this is fancy indexing (set no_copy).
ndim = 0
ellipsis_pos = None # define here mostly to replace all but first.
for i, indx in enumerate(in_indices):
if indx is None:
has_none = True
continue
if isinstance(indx, np.ndarray) and indx.dtype == bool:
no_copy = False
if indx.ndim == 0:
empty_boolean_array = True
continue # its not a zero_size_fancy...
if indx.sum() == 0:
zero_size_fancy = True
# boolean indices can have higher dimensions
ndim += indx.ndim
fancy_dim += indx.ndim
continue
if indx is Ellipsis:
if ellipsis_pos is None:
ellipsis_pos = i
continue # do not increment ndim counter
in_indices[i] = slice(None,None)
ndim += 1
continue
if isinstance(indx, slice):
ndim += 1
continue
if not isinstance(indx, np.ndarray):
# This could be open for changes in numpy.
# numpy should maybe raise an error if casting to intp
# is not safe. It rejects np.array([1., 2.]) but not
# [1., 2.] as index (same for ie. np.take).
# (Note the importance of empty lists if changing this here)
indx = np.array(indx, dtype=np.intp)
in_indices[i] = indx
if indx.ndim != 0:
no_copy = False
if indx.size == 0:
zero_size_fancy = True
ndim += 1
fancy_dim += 1
 
# if not no_copy and has_none:
# # FIXME: Numpy should probably support the None/np.newaxis
# # syntax in combination with fancy/boolean indexing. A
# # TypeError seems not ideal in any case.
# raise TypeError
 
# # FIXME: This is a bug, numpy 1.7. allows indexing to succeed by
# # ignoring all non-fancy indexes if the fancy ones have the correct
# # number of dimensions.
# # Right now numpy does not allow mixing None with fancy indexes
# # if that is implemented it might affect this?
# if not no_copy and fancy_dim == arr.ndim:
# # brutally remove all non-fancy axes!
# in_indices = [indx for indx in in_indices
# if not (isinstance(indx, slice) or indx is Ellipsis or indx is None)]
# # And even singleton boolean ones.
# in_indices = [indx for indx in in_indices
# if indx.ndim != 0 or indx.dtype != bool]
# ellipsis_pos = None # there is none anymore
# empty_boolean_array = False
# el # was elif
if arr.ndim - ndim < 0:
# we can't take more dimensions then we have, not even for 0-d arrays.
# since a[()] makes sense, but not a[(),]. We will raise an error
# lateron, unless a broadcasting error occurs first.
raise_indexerror_if_broadcastable = True
zero_size_fancy = False
# resize the array to make sure things "work"
arr = arr.reshape((arr.shape + (1,) * (ndim - arr.ndim)))
 
# We may not have a 0-d boolean index
if empty_boolean_array:
raise_indexerror_if_broadcastable = True
# and remove it, but this fails even if broadcasting would
# otherwise work of course.
zero_size_fancy = False
in_indices = [indx for indx in in_indices
if not isinstance(indx, np.ndarray)
or indx.ndim != 0 or indx.dtype != bool]
# Update ellipsis_pos if necessary.
if ellipsis_pos is not None:
for ellipsis_pos in xrange(len(in_indices)):
if in_indices[ellipsis_pos] is Ellipsis:
break
 
elif ndim == 0 and not None in in_indices:
# Well we have no indexes or one Ellipsis. This is legal.
return arr.view(), no_copy
 
if ellipsis_pos is not None:
in_indices[ellipsis_pos:ellipsis_pos+1] = [slice(None,None)] * (arr.ndim - ndim)
 
for ax, indx in enumerate(in_indices):
if isinstance(indx, slice):
# convert to an index array anways:
indx = np.arange(*indx.indices(arr.shape[ax]))
indices.append(['s', indx])
continue
elif indx is None:
# this is like taking a slice with one element from a new axis:
indices.append(['n', np.array([0], dtype=np.intp)])
arr = arr.reshape((arr.shape[:ax] + (1,) + arr.shape[ax:]))
continue
if isinstance(indx, np.ndarray) and indx.dtype == bool:
# This may be open for improvement in numpy.
# numpy should probably cast boolean lists to boolean indices
# instead of intp!
 
# Numpy supports for a boolean index with
# non-matching shape as long as the True values are not
# out of bounds, also this means that no broadcasting is
# attempted. Numpy maybe should maybe not allow this,
# (at least not array that are larger then the original one).
try:
flat_indx = np.ravel_multi_index(np.nonzero(indx),
arr.shape[ax:ax+indx.ndim], mode='raise')
except:
# There can't be broadcasting errors so must be index error
# (there is one exception, and that is if it gets
# reduced to 0-size by broadcasting lateron)
raise_indexerror_if_broadcastable = True
flat_indx = np.arange(indx.sum())
# concatenate axis into a single one:
arr = arr.reshape((arr.shape[:ax]
+ (np.prod(arr.shape[ax:ax+indx.ndim]),)
+ arr.shape[ax+indx.ndim:]))
indx = flat_indx
if len(indices) > 0 and indices[-1][0] == 'f' and ax != ellipsis_pos:
# NOTE: There could still have been a 0-sized Ellipsis
# between them. Checked that with ellipsis_pos.
indices[-1].append(indx)
else:
# We have a fancy index that is not after an existing one.
# NOTE: A 0-d array triggers this as well, while
# one may expect it to not trigger it, since a scalar
# would not be considered fancy indexing.
num_fancy += 1
indices.append(['f', indx])
 
if num_fancy > 1 and not no_copy:
# We have to flush the fancy indexes left
new_indices = indices[:]
axes = range(arr.ndim)
fancy_axes = []
new_indices.insert(0, ['f'])
ni = 0
ai = 0
for indx in indices:
ni += 1
if indx[0] == 'f':
new_indices[0].extend(indx[1:])
del new_indices[ni]
ni -= 1
for ax in xrange(ai, ai + len(indx[1:])):
fancy_axes.append(ax)
axes.remove(ax)
ai += len(indx) - 1 # axis we are at
indices = new_indices
# and now we need to transpose arr:
arr = arr.transpose(*(fancy_axes + axes))
 
# We only have one 'f' index now and arr is transposed accordingly.
# Now handle newaxes by reshaping...
ax = 0
for indx in indices:
if indx[0] == 'f':
# First of all, reshape arr to combine fancy axes into one:
orig_shape = arr.shape
orig_slice = orig_shape[ax:ax + len(indx[1:])]
arr = arr.reshape((arr.shape[:ax]
+ (np.prod(orig_slice).astype(int),)
+ arr.shape[ax + len(indx[1:]):]))
# unfortunatly the indices might be out of bounds. So check
# that first, and use mode='wrap then'
for _indx, _size in zip(indx[1:], orig_slice):
if _indx.size == 0:
continue
if np.any(_indx >= _size) or np.any(_indx < -_size):
raise_indexerror_if_broadcastable = True
# Check if broadcasting works
if len(indx[1:]) > 1:
np.broadcast(*indx[1:]) # raises ValueError...
if len(indx[1:]) != len(orig_slice):
raise IndexError
if len(indx[1:]) == len(orig_slice):
if np.product(orig_slice) == 0:
# Work around for a segfault or IndexError with 'wrap'
# in some cases
try:
mi = np.ravel_multi_index(indx[1:], orig_slice, mode='raise')
except:
# This happens with 0-sized orig_slice (sometimes?)
# here it is a ValueError, but indexing gives a:
raise IndexError('invalid index into 0-sized')
else:
mi = np.ravel_multi_index(indx[1:], orig_slice, mode='wrap')
else:
raise ValueError
# if (mi.size == 0 and not no_copy and arr.shape[ax] == 0 and
# len(indices) != 1):
# # FIXME: This is a bug in numpy.
# # Numpy does not allow taking an empty array from an empty axis
# # when using fancy indexing (here indicated by not no_copy),
# # but we have other indices that are not fancy (or ignored).
# # This is because for the fancy indexes it selects the 0s.
# raise IndexError
arr = arr.take(mi.ravel(), axis=ax)
arr = arr.reshape((arr.shape[:ax]
+ mi.shape
+ arr.shape[ax+1:]))
ax += mi.ndim
continue
 
try:
# If we are here, we have a 1D array for take:
arr = arr.take(indx[1], axis=ax)
except Exception, e:
raise_indexerror_if_broadcastable = True
ax += 1
# There was no broadcasting error so raise any IndexError now.
# However if the fancy indexing is empty, we do not need to worry
# about that.
if raise_indexerror_if_broadcastable and not zero_size_fancy:
raise IndexError # there were no fancy indexes...
 
return arr, no_copy
 
def _check_multi_index(self, arr, index):
"""Check mult index getting and simple setting. Input array
must be a reshaped arange for __setitem__ check for non-view
arrays to work. It then relies on .flat to work.
"""
# Test item getting
try:
mimic_get, no_copy = self._get_multi_index(arr, index)
except Exception, e:
if TEST_ERROR_TYPE:
error = type(e)
else:
error = Exception
assert_raises(error, arr.__getitem__, index)
assert_raises(error, arr.__setitem__, index, 0)
return
 
arr = arr.copy()
indexed_arr = arr[index]
assert_array_equal(indexed_arr, mimic_get)
# Check if we got a view, unless its a 0-sized array (then its
# not a view, and that does not matter)
if indexed_arr.size != 0:
assert_(np.may_share_memory(indexed_arr, arr) == no_copy)
sys.stdout.flush()
# Test non-broadcast setitem:
b = arr.copy()
b[index] = mimic_get + 1000
if b.size == 0:
return # nothing to compare here...
if no_copy:
# change indexed_arr in-place to manipulate original:
indexed_arr += 1000
assert_array_equal(arr, b)
return
# Use the fact that the array is originally an arange:
arr.flat[indexed_arr.ravel()] += 1000
assert_array_equal(arr, b)
 
def test_simple(self):
assert_array_equal(self.b[()], self.b)
assert_array_equal(self.b[...], self.b)
a = np.array(5)
a[...] = 1
assert_equal(a, 1)
 
def test_boolean(self):
a = np.array(5)
assert_equal(a[np.array(True)], 5)
a[np.array(True)] = 1
assert_equal(a, 1)
# NOTE: This is different from normal broadcasting, as
# arr[boolean_array] works like in a multi index. Which means
# it is aligned to the left. This is probably correct for
# consistency with arr[boolean_array,] also no broadcasting
# is done at all
self._check_multi_index(self.a, (np.zeros_like(self.a, dtype=bool),))
self._check_multi_index(self.a, (np.zeros_like(self.a, dtype=bool)[...,0],))
self._check_multi_index(self.a, (np.zeros_like(self.a, dtype=bool)[None,...],))
 
def test_multidim(self):
# Check all combinations of all inner 3x3 arrays. Since test None
# we also test the Ellipsis OK.
tocheck = [INDICES]*5# + [[slice(None,None)]]
for index in product(*tocheck):
self._check_multi_index(self.a, index)
self._check_multi_index(self.b, index)
 
# Also check (simple cases of) too many indices:
assert_raises(IndexError, self.a.__getitem__, (0,0,0,0,0,0))
assert_raises(IndexError, self.a.__setitem__, (0,0,0,0,0,0), 0)
assert_raises(IndexError, self.a.__getitem__, (0,0,0,[1],0,0))
assert_raises(IndexError, self.a.__setitem__, (0,0,0,[1],0,0), 0)
 
 
if __name__ == "__main__":
run_module_suite()

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.