Skip to content

Instantly share code, notes, and snippets.

@jaysoncena
Created November 9, 2019 06:46
Show Gist options
  • Save jaysoncena/592ad5b12b745178f25a317e0a84971d to your computer and use it in GitHub Desktop.
Save jaysoncena/592ad5b12b745178f25a317e0a84971d to your computer and use it in GitHub Desktop.
Resize numpy array to a fixed size and pad with np.nan
def load_npy_files(path, range_val=(), row_col=(0,0)):
total_rows = 0
total_cols = 0
max_cols = 0
max_rows = 0
row_size_list = []
dataset_list = []
for fnum in range(*range_val):
npload = np.load(f"{path}/{fnum}.npy")
rows, cols = npload.shape
total_rows += rows
total_cols += cols
row_size_list.append(rows)
if rows > max_rows: max_rows = rows
if cols > max_cols: max_cols = cols
pad_row = row_col[0] - rows
pad_col = row_col[1] - cols
if pad_row > 0 or pad_col > 0:
npload = np.pad(npload, ((0, pad_row), (0, pad_col)), 'constant', constant_values=np.nan)
# just in case there's a bug in reshape & padding code
if (336, 40) != npload.shape:
logging.error("Unexpected size of array: {npload.shape}")
break
dataset_list.append(npload)
if fnum % 1000 == 0 or fnum >= (range_val[1] - 1):
avg_rows = round(sum(row_size_list) / len(row_size_list) + 0.0, 2)
logging.info(f"{fnum}: Dataset size: {(max_rows, max_cols)}), row.mean={avg_rows}")
load_npy_files("/kaggle/input/train/train", range_val=(0, 30336), row_col=(336, 40))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment