markdouthwaite/vectorized_sample_index.py

## vectorized_sample_index.py
def sample_index(arr: ndarray) -> ndarray:
    """
    Given an *ordered* array of non-negative integers, randomly select one index for
    each unique element.

    Parameters
    ----------
    arr : ndarray
        An ordered array of integers.

    Returns
    -------
    ndarray
        An array of indices of shape (n_unique, ), where each element corresponds to a
        randomly sampled index for each unique integer appearing in the input array.

    Examples
    --------
    >>> x = np.array([0, 0, 1, 3, 3, 3, 4, 4, 4, 4, 5, 5, 7, 7, 7])
    >>> y = np.asarray([0, 1, 0, 0, 1, 2, 0, 1, 2, 3, 0, 1, 0, 1, 2])
    >>> s = sample_index(x)
    >>> s
    [ 1  2  3  7 11 13]
    >>> x[s]
    [0 1 3 4 5 7]
    >>> y[s]
    [0 0 1 3 0 2]

    Notes
    -----
    * Taken from https://stackoverflow.com/questions/57885114/numpy-sample-group-of-indices-with-different-values
      with some modifications.

    """

    # Count number of occurrences of each unique value in the array.
    counts = np.bincount(arr)
    # remove any elements with zero count (addresses non-consecutive ints case)
    counts = counts[counts.nonzero()[0]]

    # generate an array of indices mapped to the counts (i.e. if the zeroth element
    # appears twice, and the first element once, we'd have [0, 0, 1 ... ]
    count_idx = np.repeat(np.arange(counts.shape[0]), counts)

    # add 'noise' to the indices, and then get the new order for the indices. Note that
    # as the increment between values in 'counts' is by definition >= 1, this will only
    # act to reorder elements associated with the same unique value.
    shuffled_idx = (count_idx + np.random.random(arr.shape)).argsort()

    # sample the first occurrence of each unique element
    sampled = np.r_[0, counts.cumsum()[:-1]]

    # return the first occurrence of an element in the shuffled indices.
    return shuffled_idx[sampled]
	def sample_index(arr: ndarray) -> ndarray:
	"""
	Given an ordered array of non-negative integers, randomly select one index for
	each unique element.

	Parameters
	----------
	arr : ndarray
	An ordered array of integers.

	Returns
	-------
	ndarray
	An array of indices of shape (n_unique, ), where each element corresponds to a
	randomly sampled index for each unique integer appearing in the input array.

	Examples
	--------
	>>> x = np.array([0, 0, 1, 3, 3, 3, 4, 4, 4, 4, 5, 5, 7, 7, 7])
	>>> y = np.asarray([0, 1, 0, 0, 1, 2, 0, 1, 2, 3, 0, 1, 0, 1, 2])
	>>> s = sample_index(x)
	>>> s
	[ 1 2 3 7 11 13]
	>>> x[s]
	[0 1 3 4 5 7]
	>>> y[s]
	[0 0 1 3 0 2]

	Notes
	-----
	* Taken from https://stackoverflow.com/questions/57885114/numpy-sample-group-of-indices-with-different-values
	with some modifications.

	"""

	# Count number of occurrences of each unique value in the array.
	counts = np.bincount(arr)
	# remove any elements with zero count (addresses non-consecutive ints case)
	counts = counts[counts.nonzero()[0]]

	# generate an array of indices mapped to the counts (i.e. if the zeroth element
	# appears twice, and the first element once, we'd have [0, 0, 1 ... ]
	count_idx = np.repeat(np.arange(counts.shape[0]), counts)

	# add 'noise' to the indices, and then get the new order for the indices. Note that
	# as the increment between values in 'counts' is by definition >= 1, this will only
	# act to reorder elements associated with the same unique value.
	shuffled_idx = (count_idx + np.random.random(arr.shape)).argsort()

	# sample the first occurrence of each unique element
	sampled = np.r_[0, counts.cumsum()[:-1]]

	# return the first occurrence of an element in the shuffled indices.
	return shuffled_idx[sampled]