eonu/deepgru.md

## deepgru.md

      
    Raw
  

              deepgru.md
            
          
    DeepGRU

A modular PyTorch implementation of the DeepGRU (Deep Gesture Recognition Utility) recurrent neural network architecture designed by Maghoumi & LaViola Jr.^[1], originally for gesture recognition but applicable to general sequences.
Dependencies

This implementation of DeepGRU requires a working installation of torch.
Usage

TODO
References


      [1]
      
        Mehran Maghoumi & Joseph J. LaViola Jr. "DeepGRU: Deep Gesture Recognition Utility" Advances in Visual Computing, 14th International Symposium on Visual Computing, ISVC 2019, Proceedings, Part I, 16-31.
      
    
## deepgru.py
import torch
from torch import nn
from torch.nn import functional as F
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

class DeepGRU(nn.Module):
    """A modular PyTorch implementation of the DeepGRU (Deep Gesture Recognition Utility) recurrent neural network architecture
    designed by Maghoumi & LaViola Jr., originally for gesture recognition, but applicable to general sequences.

    Parameters
    ----------
    n_features: `int`
        The number of features that each observation within a sequence has.

    n_classes: `int`
        The number of different sequence classes.

    dims: `dict`
        A dictionary consisting of dimension configuration for the GRUs and fully-connected layers.

        Values for the keys `'gru1'`, `'gru2'`, `'gru3'` and `'fc'` must be set.

    device: `str`, optional
        The device to send the model parameters to for computation.

        If no device is specified, a check is made for any available CUDA device, otherwise the CPU is used.
    """
    def __init__(self, n_features, n_classes, dims={'gru1': 512, 'gru2': 256, 'gru3': 128, 'fc': 256}, device=None):
        super().__init__()

        if device is None:
            device = 'cuda' if torch.cuda.is_available() else 'cpu'

        # Specify sub-modules
        self.model = nn.ModuleDict({
            'enc': EncoderNetwork(dims={'in': n_features, **{k:v for k, v in dims.items() if k.startswith('gru')}}, device=device),
            'attn': AttentionModule(dims={'in': dims['gru3']}, device=device),
            'clf': Classifier(dims={'in': dims['gru3']*2, 'fc': dims['fc'], 'out': n_classes}, device=device),
        })

        # Send model to device
        self.to(device)

    def forward(self, x, x_lengths):
        """Passes the batched input sequences through the encoder network, attention module and classifier to generate log-softmax scores.

        Since log-softmax scores are returned, it is advised to use the negative log-likelihood loss `torch.nn.NLLLoss`.

        Parameters
        ----------
        x: `torch.PackedSequence`
            A packed representation of a batch of input observation sequences.

        x_lengths: `torch.Tensor` (`int`)
            A tensor of the sequence lengths of the batch in descending order.

        Returns
        -------
        log_softmax: `torch.Tensor` (`float`)
            (`batch_size` x `n_classes`) tensor of `n_classes` log-softmax scores for each observation sequence in the batch.
        """
        h, h_last = self.model['enc'](x, x_lengths)
        o_attn = self.model['attn'](h, h_last)
        return self.model['clf'](o_attn)

class EncoderNetwork(nn.Module):
    def __init__(self, dims, device):
        super().__init__()
        self.dims = dims
        self.device = device

        # Specify sub-modules
        self.model = nn.ModuleDict({
           'gru1': nn.GRU(self.dims['in'], self.dims['gru1'], num_layers=2, batch_first=True).to(device),
           'gru2': nn.GRU(self.dims['gru1'], self.dims['gru2'], num_layers=2, batch_first=True).to(device),
           'gru3': nn.GRU(self.dims['gru2'], self.dims['gru3'], num_layers=1, batch_first=True).to(device)
        })

        # Send model to device
        self.to(device)

    def forward(self, x, x_lengths):
        x = x.to(self.device)

        # Pack the padded Tensor into a PackedSequence
        x_packed = pack_padded_sequence(x, x_lengths.cpu(), batch_first=True)

        # Pass the PackedSequence through the GRUs
        h_packed, _ = self.model['gru1'](x_packed)
        h_packed, _ = self.model['gru2'](h_packed)
        h_packed, h_last = self.model['gru3'](h_packed)

        # Unpack the hidden state PackedSequence into a padded Tensor
        h_padded = pad_packed_sequence(h_packed, batch_first=True, padding_value=0.0, total_length=max(x_lengths))
        return h_padded[0], h_last
        # Shape: B x T_max x D_out, 1 x B x D_out

class AttentionModule(nn.Module):
    def __init__(self, dims, device):
        super().__init__()
        self.device = device
        self.dims = dims

        # Specify sub-modules
        self.model = nn.ModuleDict({
            # Attentional context vector weights
            'attn_ctx': nn.Linear(self.dims['in'], self.dims['in'], bias=False).to(device),
            # Auxilliary context
            'aux_ctx': nn.GRU(input_size=self.dims['in'], hidden_size=self.dims['in']).to(device)
        })

        # Send model to device
        self.to(device)

    def forward(self, h, h_last):
        h_last.transpose_(1, 0)
        # Shape: B x 1 x D_out

        # Calculate attentional context
        h.transpose_(1, 2)
        c = F.softmax(self.model['attn_ctx'](h_last) @ h, dim=0)
        c = (c @ h.transpose(2, 1)).transpose(1, 0)
        # Shape: 1 x B x D_out

        # Calculate auxilliary context
        c_aux, _ = self.model['aux_ctx'](c, h_last.transpose(1, 0))
        # Shape: 1 x B x D_out

        # Combine attentional and auxilliary context
        return torch.cat((c.squeeze(0), c_aux.squeeze(0)), dim=1)
        # Shape: B x D_out*2

class Classifier(nn.Module):
    def __init__(self, dims, device):
        super().__init__()
        self.device = device
        self.dims = dims

        # Specify sub-modules
        self.model = nn.ModuleDict({
            'fc1': nn.Sequential(
                nn.BatchNorm1d(self.dims['in']),
                nn.Dropout(),
                nn.Linear(self.dims['in'], self.dims['fc'])
            ).to(device),
            'fc2': nn.Sequential(
                nn.BatchNorm1d(self.dims['fc']),
                nn.Dropout(),
                nn.Linear(self.dims['fc'], self.dims['out'])
            ).to(device)
        })

        # Send model to device
        self.to(device)

    def forward(self, o_attn):
        f1 = self.model['fc1'](o_attn)
        f2 = self.model['fc2'](F.relu(f1))
        return F.log_softmax(f2, dim=1)

## utils.py
import torch

def collate_fn(batch):
    """Collects together univariate or multivariate sequences into a single batch, arranged in descending order of length.

    Also returns the corresponding lengths and labels as `torch.LongTensor` objects.

    Parameters
    ----------
    batch: `list` ( `tuple` (`torch.Tensor`, `int`))
        Collection of `batch_size` sequence-label pairs, where each sequence `x` is of shape (`len_x` x `n_features`) or (`len_x`,) if one-dimensional, and the label is an integer.

    Returns
    -------
    padded_sequences: `torch.Tensor` (`float`)
        A tensor of size (`batch_size` x `max_len` x `n_features`) containing all of the sequences in descending length order, padded to the length of the longest sequence in each batch.

    lengths: `torch.Tensor` (`int`)
        A tensor of the `batch_size` sequence lengths in descending order.

    labels: `torch.Tensor` (`int`)
        A tensor of the `batch_size` sequence labels in descending length order.
    """
    batch_size = len(batch)

    # Sort the (sequence, label) pairs in descending order of duration
    batch.sort(key=(lambda x: len(x[0])), reverse=True)
    # Shape: list(tuple(tensor(TxD), int)) or list(tuple(tensor(T), int))

    # Create list of sequences, and tensors for lengths and labels
    sequences, lengths, labels = [], torch.zeros(batch_size, dtype=torch.long), torch.zeros(batch_size, dtype=torch.long)
    for i, (sequence, label) in enumerate(batch):
        lengths[i], labels[i] = len(sequence), label
        sequences.append(sequence)

    # Combine sequences into a padded matrix
    padded_sequences = torch.nn.utils.rnn.pad_sequence(sequences, batch_first=True)
    # Shape: (B x T_max x D) or (B x T_max)

    # If a vector input was given for the sequences, expand (B x T_max) to (B x T_max x 1)
    if padded_sequences.ndim == 2:
        padded_sequences.unsqueeze_(-1)

    return padded_sequences, lengths, labels
    # Shapes: (B x T_max x D), (B,), (B,)
	import torch
	from torch import nn
	from torch.nn import functional as F
	from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

	class DeepGRU(nn.Module):
	"""A modular PyTorch implementation of the DeepGRU (Deep Gesture Recognition Utility) recurrent neural network architecture
	designed by Maghoumi & LaViola Jr., originally for gesture recognition, but applicable to general sequences.

	Parameters
	----------
	n_features: `int`
	The number of features that each observation within a sequence has.

	n_classes: `int`
	The number of different sequence classes.

	dims: `dict`
	A dictionary consisting of dimension configuration for the GRUs and fully-connected layers.

	Values for the keys `'gru1'`, `'gru2'`, `'gru3'` and `'fc'` must be set.

	device: `str`, optional
	The device to send the model parameters to for computation.

	If no device is specified, a check is made for any available CUDA device, otherwise the CPU is used.
	"""
	def __init__(self, n_features, n_classes, dims={'gru1': 512, 'gru2': 256, 'gru3': 128, 'fc': 256}, device=None):
	super().__init__()

	if device is None:
	device = 'cuda' if torch.cuda.is_available() else 'cpu'

	# Specify sub-modules
	self.model = nn.ModuleDict({
	'enc': EncoderNetwork(dims={'in': n_features, **{k:v for k, v in dims.items() if k.startswith('gru')}}, device=device),
	'attn': AttentionModule(dims={'in': dims['gru3']}, device=device),
	'clf': Classifier(dims={'in': dims['gru3']*2, 'fc': dims['fc'], 'out': n_classes}, device=device),
	})

	# Send model to device
	self.to(device)

	def forward(self, x, x_lengths):
	"""Passes the batched input sequences through the encoder network, attention module and classifier to generate log-softmax scores.

	Since log-softmax scores are returned, it is advised to use the negative log-likelihood loss `torch.nn.NLLLoss`.

	Parameters
	----------
	x: `torch.PackedSequence`
	A packed representation of a batch of input observation sequences.

	x_lengths: `torch.Tensor` (`int`)
	A tensor of the sequence lengths of the batch in descending order.

	Returns
	-------
	log_softmax: `torch.Tensor` (`float`)
	(`batch_size` x `n_classes`) tensor of `n_classes` log-softmax scores for each observation sequence in the batch.
	"""
	h, h_last = self.model['enc'](x, x_lengths)
	o_attn = self.model['attn'](h, h_last)
	return self.model['clf'](o_attn)

	class EncoderNetwork(nn.Module):
	def __init__(self, dims, device):
	super().__init__()
	self.dims = dims
	self.device = device

	# Specify sub-modules
	self.model = nn.ModuleDict({
	'gru1': nn.GRU(self.dims['in'], self.dims['gru1'], num_layers=2, batch_first=True).to(device),
	'gru2': nn.GRU(self.dims['gru1'], self.dims['gru2'], num_layers=2, batch_first=True).to(device),
	'gru3': nn.GRU(self.dims['gru2'], self.dims['gru3'], num_layers=1, batch_first=True).to(device)
	})

	# Send model to device
	self.to(device)

	def forward(self, x, x_lengths):
	x = x.to(self.device)

	# Pack the padded Tensor into a PackedSequence
	x_packed = pack_padded_sequence(x, x_lengths.cpu(), batch_first=True)

	# Pass the PackedSequence through the GRUs
	h_packed, _ = self.model['gru1'](x_packed)
	h_packed, _ = self.model['gru2'](h_packed)
	h_packed, h_last = self.model['gru3'](h_packed)

	# Unpack the hidden state PackedSequence into a padded Tensor
	h_padded = pad_packed_sequence(h_packed, batch_first=True, padding_value=0.0, total_length=max(x_lengths))
	return h_padded[0], h_last
	# Shape: B x T_max x D_out, 1 x B x D_out

	class AttentionModule(nn.Module):
	def __init__(self, dims, device):
	super().__init__()
	self.device = device
	self.dims = dims

	# Specify sub-modules
	self.model = nn.ModuleDict({
	# Attentional context vector weights
	'attn_ctx': nn.Linear(self.dims['in'], self.dims['in'], bias=False).to(device),
	# Auxilliary context
	'aux_ctx': nn.GRU(input_size=self.dims['in'], hidden_size=self.dims['in']).to(device)
	})

	# Send model to device
	self.to(device)

	def forward(self, h, h_last):
	h_last.transpose_(1, 0)
	# Shape: B x 1 x D_out

	# Calculate attentional context
	h.transpose_(1, 2)
	c = F.softmax(self.model['attn_ctx'](h_last) @ h, dim=0)
	c = (c @ h.transpose(2, 1)).transpose(1, 0)
	# Shape: 1 x B x D_out

	# Calculate auxilliary context
	c_aux, _ = self.model['aux_ctx'](c, h_last.transpose(1, 0))
	# Shape: 1 x B x D_out

	# Combine attentional and auxilliary context
	return torch.cat((c.squeeze(0), c_aux.squeeze(0)), dim=1)
	# Shape: B x D_out*2

	class Classifier(nn.Module):
	def __init__(self, dims, device):
	super().__init__()
	self.device = device
	self.dims = dims

	# Specify sub-modules
	self.model = nn.ModuleDict({
	'fc1': nn.Sequential(
	nn.BatchNorm1d(self.dims['in']),
	nn.Dropout(),
	nn.Linear(self.dims['in'], self.dims['fc'])
	).to(device),
	'fc2': nn.Sequential(
	nn.BatchNorm1d(self.dims['fc']),
	nn.Dropout(),
	nn.Linear(self.dims['fc'], self.dims['out'])
	).to(device)
	})

	# Send model to device
	self.to(device)

	def forward(self, o_attn):
	f1 = self.model['fc1'](o_attn)
	f2 = self.model['fc2'](F.relu(f1))
	return F.log_softmax(f2, dim=1)
	import torch

	def collate_fn(batch):
	"""Collects together univariate or multivariate sequences into a single batch, arranged in descending order of length.

	Also returns the corresponding lengths and labels as `torch.LongTensor` objects.

	Parameters
	----------
	batch: `list` ( `tuple` (`torch.Tensor`, `int`))
	Collection of `batch_size` sequence-label pairs, where each sequence `x` is of shape (`len_x` x `n_features`) or (`len_x`,) if one-dimensional, and the label is an integer.

	Returns
	-------
	padded_sequences: `torch.Tensor` (`float`)
	A tensor of size (`batch_size` x `max_len` x `n_features`) containing all of the sequences in descending length order, padded to the length of the longest sequence in each batch.

	lengths: `torch.Tensor` (`int`)
	A tensor of the `batch_size` sequence lengths in descending order.

	labels: `torch.Tensor` (`int`)
	A tensor of the `batch_size` sequence labels in descending length order.
	"""
	batch_size = len(batch)

	# Sort the (sequence, label) pairs in descending order of duration
	batch.sort(key=(lambda x: len(x[0])), reverse=True)
	# Shape: list(tuple(tensor(TxD), int)) or list(tuple(tensor(T), int))

	# Create list of sequences, and tensors for lengths and labels
	sequences, lengths, labels = [], torch.zeros(batch_size, dtype=torch.long), torch.zeros(batch_size, dtype=torch.long)
	for i, (sequence, label) in enumerate(batch):
	lengths[i], labels[i] = len(sequence), label
	sequences.append(sequence)

	# Combine sequences into a padded matrix
	padded_sequences = torch.nn.utils.rnn.pad_sequence(sequences, batch_first=True)
	# Shape: (B x T_max x D) or (B x T_max)

	# If a vector input was given for the sequences, expand (B x T_max) to (B x T_max x 1)
	if padded_sequences.ndim == 2:
	padded_sequences.unsqueeze_(-1)

	return padded_sequences, lengths, labels
	# Shapes: (B x T_max x D), (B,), (B,)