Last active
June 24, 2021 15:21
-
-
Save KaoruNishikawa/0e26ac2539457a5b8babe53284c7a4fa to your computer and use it in GitHub Desktop.
Create np.ndarray from ragged nested lists.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import Any | |
import numpy as np | |
def padded_array(list_: list, padval: Any = np.nan) -> np.ndarray: | |
"""Make np.array from ragged nested lists. | |
Convert list of any dimension into an np.array with no ragged parts. | |
Parameters | |
---------- | |
list_: list | |
(Nested, ragged) list. | |
padval: Any | |
Value to put into the ragged parts. | |
Returns | |
------- | |
np.ndarray | |
Numpy array, which ragged elements are padded with ``padval``. | |
Examples | |
-------- | |
>>> padded_array([(1, ), [2, 3, 4, 5, 6]], np.inf) | |
array([[ 1., inf, inf, inf, inf], | |
[ 2., 3., 4., 5., 6.]]) | |
Notes | |
----- | |
The dtype may be changed, so use ndarray ``astype`` method if needed. | |
""" | |
# get size | |
size = [[len(list_)]] | |
def get_length(li: list, dim: int = 1): | |
remaining_list = [] | |
len_list = [] | |
for l in li: | |
try: | |
if isinstance(l, str): | |
raise TypeError # to avoid infinite loop | |
len_list.append(len(l)) | |
remaining_list.append(l) | |
except TypeError: | |
len_list.append(0) | |
length = max(len_list) | |
try: | |
size[dim] += [length] | |
except IndexError: | |
size.append([length]) | |
if length > 0: | |
_ = [get_length(l, dim + 1) for l in remaining_list] | |
get_length(list_) | |
size = [max(s) for s in size][:-1] | |
ndim = len(size) | |
# convert to list | |
def convert2list(li: list): | |
for i in range(len(li)): | |
try: | |
if isinstance(li[i], str): | |
raise TypeError # to avoid infinite loop | |
li[i] = list(li[i]) | |
convert2list(li[i]) | |
except TypeError: | |
pass | |
list_ = list(list_) | |
convert2list(list_) | |
# pad with padval | |
def pad_array(li: list, size_: tuple, dim: int = 0): | |
if dim < ndim: | |
thisdimsize = size_[dim] | |
for i in range(len(li)): | |
try: | |
if isinstance(li[i], str): | |
raise TypeError # to avoid concatenation error | |
padlen = thisdimsize - len(li[i]) | |
except TypeError: | |
li[i] = [li[i]] | |
padlen = thisdimsize - len(li[i]) | |
li[i] += [padval] * padlen | |
pad_array(li[i], size_, dim + 1) | |
pad_array([list_], size) | |
return np.array(list_) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment