Skip to content

Instantly share code, notes, and snippets.

@KaoruNishikawa
Last active June 24, 2021 15:21
Show Gist options
  • Save KaoruNishikawa/0e26ac2539457a5b8babe53284c7a4fa to your computer and use it in GitHub Desktop.
Save KaoruNishikawa/0e26ac2539457a5b8babe53284c7a4fa to your computer and use it in GitHub Desktop.
Create np.ndarray from ragged nested lists.
from typing import Any
import numpy as np
def padded_array(list_: list, padval: Any = np.nan) -> np.ndarray:
"""Make np.array from ragged nested lists.
Convert list of any dimension into an np.array with no ragged parts.
Parameters
----------
list_: list
(Nested, ragged) list.
padval: Any
Value to put into the ragged parts.
Returns
-------
np.ndarray
Numpy array, which ragged elements are padded with ``padval``.
Examples
--------
>>> padded_array([(1, ), [2, 3, 4, 5, 6]], np.inf)
array([[ 1., inf, inf, inf, inf],
[ 2., 3., 4., 5., 6.]])
Notes
-----
The dtype may be changed, so use ndarray ``astype`` method if needed.
"""
# get size
size = [[len(list_)]]
def get_length(li: list, dim: int = 1):
remaining_list = []
len_list = []
for l in li:
try:
if isinstance(l, str):
raise TypeError # to avoid infinite loop
len_list.append(len(l))
remaining_list.append(l)
except TypeError:
len_list.append(0)
length = max(len_list)
try:
size[dim] += [length]
except IndexError:
size.append([length])
if length > 0:
_ = [get_length(l, dim + 1) for l in remaining_list]
get_length(list_)
size = [max(s) for s in size][:-1]
ndim = len(size)
# convert to list
def convert2list(li: list):
for i in range(len(li)):
try:
if isinstance(li[i], str):
raise TypeError # to avoid infinite loop
li[i] = list(li[i])
convert2list(li[i])
except TypeError:
pass
list_ = list(list_)
convert2list(list_)
# pad with padval
def pad_array(li: list, size_: tuple, dim: int = 0):
if dim < ndim:
thisdimsize = size_[dim]
for i in range(len(li)):
try:
if isinstance(li[i], str):
raise TypeError # to avoid concatenation error
padlen = thisdimsize - len(li[i])
except TypeError:
li[i] = [li[i]]
padlen = thisdimsize - len(li[i])
li[i] += [padval] * padlen
pad_array(li[i], size_, dim + 1)
pad_array([list_], size)
return np.array(list_)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment