KaoruNishikawa/padded_array.py

## padded_array.py
from typing import Any

import numpy as np


def padded_array(list_: list, padval: Any = np.nan) -> np.ndarray:
    """Make np.array from ragged nested lists.

    Convert list of any dimension into an np.array with no ragged parts.

    Parameters
    ----------
    list_: list
        (Nested, ragged) list.
    padval: Any
        Value to put into the ragged parts.

    Returns
    -------
    np.ndarray
        Numpy array, which ragged elements are padded with ``padval``.

    Examples
    --------
    >>> padded_array([(1, ), [2, 3, 4, 5, 6]], np.inf)
    array([[ 1., inf, inf, inf, inf],
           [ 2.,  3.,  4.,  5.,  6.]])

    Notes
    -----
    The dtype may be changed, so use ndarray ``astype`` method if needed.

    """
    # get size
    size = [[len(list_)]]
    def get_length(li: list, dim: int = 1):
        remaining_list = []
        len_list = []
        for l in li:
            try:
                if isinstance(l, str):
                    raise TypeError  # to avoid infinite loop
                len_list.append(len(l))
                remaining_list.append(l)
            except TypeError:
                len_list.append(0)
        length = max(len_list)
        try:
            size[dim] += [length]
        except IndexError:
            size.append([length])
        if length > 0:
            _ = [get_length(l, dim + 1) for l in remaining_list]
    get_length(list_)
    size = [max(s) for s in size][:-1]
    ndim = len(size)
    # convert to list
    def convert2list(li: list):
        for i in range(len(li)):
            try:
                if isinstance(li[i], str):
                    raise TypeError  # to avoid infinite loop
                li[i] = list(li[i])
                convert2list(li[i])
            except TypeError:
                pass
    list_ = list(list_)
    convert2list(list_)
    # pad with padval
    def pad_array(li: list, size_: tuple, dim: int = 0):
        if dim < ndim:
            thisdimsize = size_[dim]
            for i in range(len(li)):
                try:
                    if isinstance(li[i], str):
                        raise TypeError  # to avoid concatenation error
                    padlen = thisdimsize - len(li[i])
                except TypeError:
                    li[i] = [li[i]]
                    padlen = thisdimsize - len(li[i])
                li[i] += [padval] * padlen
                pad_array(li[i], size_, dim + 1)
    pad_array([list_], size)
    return np.array(list_)
	from typing import Any

	import numpy as np


	def padded_array(list_: list, padval: Any = np.nan) -> np.ndarray:
	"""Make np.array from ragged nested lists.

	Convert list of any dimension into an np.array with no ragged parts.

	Parameters
	----------
	list_: list
	(Nested, ragged) list.
	padval: Any
	Value to put into the ragged parts.

	Returns
	-------
	np.ndarray
	Numpy array, which ragged elements are padded with ``padval``.

	Examples
	--------
	>>> padded_array([(1, ), [2, 3, 4, 5, 6]], np.inf)
	array([[ 1., inf, inf, inf, inf],
	[ 2., 3., 4., 5., 6.]])

	Notes
	-----
	The dtype may be changed, so use ndarray ``astype`` method if needed.

	"""
	# get size
	size = [[len(list_)]]
	def get_length(li: list, dim: int = 1):
	remaining_list = []
	len_list = []
	for l in li:
	try:
	if isinstance(l, str):
	raise TypeError # to avoid infinite loop
	len_list.append(len(l))
	remaining_list.append(l)
	except TypeError:
	len_list.append(0)
	length = max(len_list)
	try:
	size[dim] += [length]
	except IndexError:
	size.append([length])
	if length > 0:
	_ = [get_length(l, dim + 1) for l in remaining_list]
	get_length(list_)
	size = [max(s) for s in size][:-1]
	ndim = len(size)
	# convert to list
	def convert2list(li: list):
	for i in range(len(li)):
	try:
	if isinstance(li[i], str):
	raise TypeError # to avoid infinite loop
	li[i] = list(li[i])
	convert2list(li[i])
	except TypeError:
	pass
	list_ = list(list_)
	convert2list(list_)
	# pad with padval
	def pad_array(li: list, size_: tuple, dim: int = 0):
	if dim < ndim:
	thisdimsize = size_[dim]
	for i in range(len(li)):
	try:
	if isinstance(li[i], str):
	raise TypeError # to avoid concatenation error
	padlen = thisdimsize - len(li[i])
	except TypeError:
	li[i] = [li[i]]
	padlen = thisdimsize - len(li[i])
	li[i] += [padval] * padlen
	pad_array(li[i], size_, dim + 1)
	pad_array([list_], size)
	return np.array(list_)