Skip to content

Instantly share code, notes, and snippets.

@kastnerkyle
Last active October 22, 2020 11:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kastnerkyle/ac7bd2894efa5caceaf5487de9c7239c to your computer and use it in GitHub Desktop.
Save kastnerkyle/ac7bd2894efa5caceaf5487de9c7239c to your computer and use it in GitHub Desktop.
import numpy as np
# make a minibatch of time, batch, features
# time length 7
# batch size 2
# feature dimension 4:
# 1:4, 10:14, 20:24, 30:34, etc for first minibatch element
# 5:8, 15:18, etc second minibatch el
n_features = 4
n_timesteps = 7
base_mb1_features = np.arange(n_features) + 1
time_mb1_features = 10 * np.arange(n_timesteps)[:, None] + base_mb1_features[None]
base_mb2_features = np.arange(n_features) + 5 + 1
time_mb2_features = 10 * np.arange(n_timesteps)[:, None] + base_mb2_features[None]
data = np.concatenate((time_mb1_features[:, None], time_mb2_features[:, None]), axis=1)
time_len = data.shape[0]
minibatch_size = data.shape[1]
features = data.shape[2]
# for each example [0, 6) and [6, 12), we will make an autoregressive mask and equivalent targets for each step
# new assumption is that the "feature" dimension is the one for autoregression
# should be more natural compared to the previous example
# 1, 2, 3, 4 -> in: 0, 0, 0, 0, ; target: 1
# 1, 2, 3, 4 -> in: 1, 0, 0, 0, ; target: 2
# 1, 2, 3, 4 -> in: 1, 2, 0, 0, ; target: 3
# 1, 2, 3, 4 -> in: 1, 2, 3, 0, ; target: 4
# accomplished using np.triu with 1 argument
# [[1, 1, 1]
# [1, 1, 1]
# [1, 1, 1]] -> [[0, 1, 1]
# [0, 0, 1]
# [0, 0, 0]]
mask_array = np.triu(np.ones((features, features)), k=1)
# now we have a 4, 4 we want to multiply across 7, 2, 1
#
# results in 4, 4, 7, 2 which is basically features, "feature time" (autoregressive), time, minibatch
masked_and_copied = mask_array[:, :, None, None] * data.transpose(2, 0, 1)[:, None]
# there is still extra 0s at the end but we leave them alone for now
# now we transpose it to
# "feature_time", time, minibatch, features
masked_and_copied = masked_and_copied.transpose(1, 2, 3, 0)
# 0th "timestep", 0th element, looping over the "autoregressive" axis we see
# masked_and_copied[0, 0, 0] = [0, 0, 0, 0]
# masked_and_copied[0, 0, 1] = [1, 0, 0, 0]
# masked_and_copied[0, 0, 2] = [1, 2, 0, 0]
# masked_and_copied[0, 0, 3] = [1, 2, 3, 0]
ar_data = masked_and_copied.copy()
ar_data_shape = ar_data.shape
# make the targets
ar_targets = data.transpose(2, 0, 1)[..., None]
ar_targets_shape = ar_targets.shape
# we rearrange (and unarrange) with this function to make the "normal" training scheme of minibatch, features
def ar_minibatch_conversion(arr, original_shape=None, inverse=False):
# expects
# "feature_time", time, minibatch, features
# skip total blank?
if inverse == False:
# ar, t, mb, f -> t, ar * mb, f
# particularly, we want the masked groups in order so that we can do a reshape/structured sum to average them
shp = arr.shape
arr = arr.transpose(1, 2, 0, 3)
arr = arr.reshape(shp[1], shp[2] * shp[0], shp[3])
# now we have t, ar * mb, f
# *each* of these can be fed through a network, and effectively we ran all the timesteps in parallel (assuming not passing hidden info)
return arr
else:
# need to invert the old procedure, original shape is REQUIRED
shp = original_shape
arr = arr.reshape(shp[1], shp[2], shp[0], shp[3])
arr = arr.transpose(2, 0, 1, 3)
return arr
flat_data = ar_minibatch_conversion(ar_data)
orig_data = ar_minibatch_conversion(flat_data, ar_data_shape, inverse=True)
flat_targets = ar_minibatch_conversion(ar_targets)
orig_targets = ar_minibatch_conversion(flat_targets, ar_targets_shape, inverse=True)
# normally you would do something like step_preds = f(flat_data)
# per_step_loss = (step_preds - flat_targets) ** 2
# loss = loss.sum() or loss = loss.mean()
# loss.backwards()
for i in range(n_features):
print("flat")
# show that it is chunkwise blocked into the minibatch
print(flat_data[:, i])
print(flat_targets[:, i])
# this should match the previous
# this setup is available if you wanted to do more structured losses than just averaged per step
for i in range(n_features):
print("orig")
print(orig_data[:, i, 0])
print(orig_targets[:, i, 0])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment