Skip to content

Instantly share code, notes, and snippets.

@eonu
Last active June 25, 2022 14:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save eonu/e8a41332e796dd60985a2bcc1d22d40c to your computer and use it in GitHub Desktop.
Save eonu/e8a41332e796dd60985a2bcc1d22d40c to your computer and use it in GitHub Desktop.
Torch implementation of DeepGRU

DeepGRU

A modular PyTorch implementation of the DeepGRU (Deep Gesture Recognition Utility) recurrent neural network architecture designed by Maghoumi & LaViola Jr.[1], originally for gesture recognition but applicable to general sequences.

Dependencies

This implementation of DeepGRU requires a working installation of torch.

Usage

TODO

References

[1] Mehran Maghoumi & Joseph J. LaViola Jr. "DeepGRU: Deep Gesture Recognition Utility" Advances in Visual Computing, 14th International Symposium on Visual Computing, ISVC 2019, Proceedings, Part I, 16-31.
import torch
from torch import nn
from torch.nn import functional as F
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
class DeepGRU(nn.Module):
"""A modular PyTorch implementation of the DeepGRU (Deep Gesture Recognition Utility) recurrent neural network architecture
designed by Maghoumi & LaViola Jr., originally for gesture recognition, but applicable to general sequences.
Parameters
----------
n_features: `int`
The number of features that each observation within a sequence has.
n_classes: `int`
The number of different sequence classes.
dims: `dict`
A dictionary consisting of dimension configuration for the GRUs and fully-connected layers.
Values for the keys `'gru1'`, `'gru2'`, `'gru3'` and `'fc'` must be set.
device: `str`, optional
The device to send the model parameters to for computation.
If no device is specified, a check is made for any available CUDA device, otherwise the CPU is used.
"""
def __init__(self, n_features, n_classes, dims={'gru1': 512, 'gru2': 256, 'gru3': 128, 'fc': 256}, device=None):
super().__init__()
if device is None:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# Specify sub-modules
self.model = nn.ModuleDict({
'enc': EncoderNetwork(dims={'in': n_features, **{k:v for k, v in dims.items() if k.startswith('gru')}}, device=device),
'attn': AttentionModule(dims={'in': dims['gru3']}, device=device),
'clf': Classifier(dims={'in': dims['gru3']*2, 'fc': dims['fc'], 'out': n_classes}, device=device),
})
# Send model to device
self.to(device)
def forward(self, x, x_lengths):
"""Passes the batched input sequences through the encoder network, attention module and classifier to generate log-softmax scores.
Since log-softmax scores are returned, it is advised to use the negative log-likelihood loss `torch.nn.NLLLoss`.
Parameters
----------
x: `torch.PackedSequence`
A packed representation of a batch of input observation sequences.
x_lengths: `torch.Tensor` (`int`)
A tensor of the sequence lengths of the batch in descending order.
Returns
-------
log_softmax: `torch.Tensor` (`float`)
(`batch_size` x `n_classes`) tensor of `n_classes` log-softmax scores for each observation sequence in the batch.
"""
h, h_last = self.model['enc'](x, x_lengths)
o_attn = self.model['attn'](h, h_last)
return self.model['clf'](o_attn)
class EncoderNetwork(nn.Module):
def __init__(self, dims, device):
super().__init__()
self.dims = dims
self.device = device
# Specify sub-modules
self.model = nn.ModuleDict({
'gru1': nn.GRU(self.dims['in'], self.dims['gru1'], num_layers=2, batch_first=True).to(device),
'gru2': nn.GRU(self.dims['gru1'], self.dims['gru2'], num_layers=2, batch_first=True).to(device),
'gru3': nn.GRU(self.dims['gru2'], self.dims['gru3'], num_layers=1, batch_first=True).to(device)
})
# Send model to device
self.to(device)
def forward(self, x, x_lengths):
x = x.to(self.device)
# Pack the padded Tensor into a PackedSequence
x_packed = pack_padded_sequence(x, x_lengths.cpu(), batch_first=True)
# Pass the PackedSequence through the GRUs
h_packed, _ = self.model['gru1'](x_packed)
h_packed, _ = self.model['gru2'](h_packed)
h_packed, h_last = self.model['gru3'](h_packed)
# Unpack the hidden state PackedSequence into a padded Tensor
h_padded = pad_packed_sequence(h_packed, batch_first=True, padding_value=0.0, total_length=max(x_lengths))
return h_padded[0], h_last
# Shape: B x T_max x D_out, 1 x B x D_out
class AttentionModule(nn.Module):
def __init__(self, dims, device):
super().__init__()
self.device = device
self.dims = dims
# Specify sub-modules
self.model = nn.ModuleDict({
# Attentional context vector weights
'attn_ctx': nn.Linear(self.dims['in'], self.dims['in'], bias=False).to(device),
# Auxilliary context
'aux_ctx': nn.GRU(input_size=self.dims['in'], hidden_size=self.dims['in']).to(device)
})
# Send model to device
self.to(device)
def forward(self, h, h_last):
h_last.transpose_(1, 0)
# Shape: B x 1 x D_out
# Calculate attentional context
h.transpose_(1, 2)
c = F.softmax(self.model['attn_ctx'](h_last) @ h, dim=0)
c = (c @ h.transpose(2, 1)).transpose(1, 0)
# Shape: 1 x B x D_out
# Calculate auxilliary context
c_aux, _ = self.model['aux_ctx'](c, h_last.transpose(1, 0))
# Shape: 1 x B x D_out
# Combine attentional and auxilliary context
return torch.cat((c.squeeze(0), c_aux.squeeze(0)), dim=1)
# Shape: B x D_out*2
class Classifier(nn.Module):
def __init__(self, dims, device):
super().__init__()
self.device = device
self.dims = dims
# Specify sub-modules
self.model = nn.ModuleDict({
'fc1': nn.Sequential(
nn.BatchNorm1d(self.dims['in']),
nn.Dropout(),
nn.Linear(self.dims['in'], self.dims['fc'])
).to(device),
'fc2': nn.Sequential(
nn.BatchNorm1d(self.dims['fc']),
nn.Dropout(),
nn.Linear(self.dims['fc'], self.dims['out'])
).to(device)
})
# Send model to device
self.to(device)
def forward(self, o_attn):
f1 = self.model['fc1'](o_attn)
f2 = self.model['fc2'](F.relu(f1))
return F.log_softmax(f2, dim=1)
import torch
def collate_fn(batch):
"""Collects together univariate or multivariate sequences into a single batch, arranged in descending order of length.
Also returns the corresponding lengths and labels as `torch.LongTensor` objects.
Parameters
----------
batch: `list` ( `tuple` (`torch.Tensor`, `int`))
Collection of `batch_size` sequence-label pairs, where each sequence `x` is of shape (`len_x` x `n_features`) or (`len_x`,) if one-dimensional, and the label is an integer.
Returns
-------
padded_sequences: `torch.Tensor` (`float`)
A tensor of size (`batch_size` x `max_len` x `n_features`) containing all of the sequences in descending length order, padded to the length of the longest sequence in each batch.
lengths: `torch.Tensor` (`int`)
A tensor of the `batch_size` sequence lengths in descending order.
labels: `torch.Tensor` (`int`)
A tensor of the `batch_size` sequence labels in descending length order.
"""
batch_size = len(batch)
# Sort the (sequence, label) pairs in descending order of duration
batch.sort(key=(lambda x: len(x[0])), reverse=True)
# Shape: list(tuple(tensor(TxD), int)) or list(tuple(tensor(T), int))
# Create list of sequences, and tensors for lengths and labels
sequences, lengths, labels = [], torch.zeros(batch_size, dtype=torch.long), torch.zeros(batch_size, dtype=torch.long)
for i, (sequence, label) in enumerate(batch):
lengths[i], labels[i] = len(sequence), label
sequences.append(sequence)
# Combine sequences into a padded matrix
padded_sequences = torch.nn.utils.rnn.pad_sequence(sequences, batch_first=True)
# Shape: (B x T_max x D) or (B x T_max)
# If a vector input was given for the sequences, expand (B x T_max) to (B x T_max x 1)
if padded_sequences.ndim == 2:
padded_sequences.unsqueeze_(-1)
return padded_sequences, lengths, labels
# Shapes: (B x T_max x D), (B,), (B,)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment