epwalsh/partial_config.jsonnet

## partial_config.jsonnet
{
    "trainer": {
        "cuda_device": 0,
        "learning_rate_scheduler": {
            "type": "triangular",
            // total number of epochs, should match the trainner param `num_epochs` below
            "num_epochs": 80,
            // increase LR linearly for 20 epochs
            "warm_up": 20,
            // then decrease LR linearly for 30 epochs
            "cool_down": 30,
            // LR will start at `lr / ratio = 0.05 / 32`
            "ratio": 32
        },
        "num_epochs": 80,
        "optimizer": {
            "type": "sgd",
            "lr": 0.05
        },
        // log the learning rate to tensorboard so we can see how it changes
        "should_log_learning_rate": true,
        "should_log_parameter_statistics": false
    }
}

## slanted_traingular.py
import torch

from allennlp.common.checks import ConfigurationError
from allennlp.training.learning_rate_schedulers import LearningRateScheduler


@LearningRateScheduler.register("triangular")
class Triangular(torch.optim.lr_scheduler._LRScheduler): # pylint: disable=protected-access
    """
    Slanted triangular learning rate scheduler.

    The LR will start at ``lr / ratio`` and increase linearly for ``warm_up`` epochs
    until reaching ``lr``, at which point it will decrease linearly for ``cool_down``
    epochs until reaching ``lr / ratio`` again. Then the LR will continue
    linearly decreasing down to 0 for the remaining number of epochs.
    """

    def __init__(self,
                 optimizer: torch.optim.Optimizer,
                 num_epochs: int,
                 warm_up: int,
                 cool_down: int,
                 ratio: int = 10,
                 last_epoch: int = -1) -> None:
        if num_epochs < warm_up + cool_down:
            raise ConfigurationError(f"'num_epochs' should be greater than the sum of 'warm_up' and 'cool_down'. "
                                     f"Got 'num_epochs' = {num_epochs} >= 'warm_up' ({warm_up}) + "
                                     f"'cool_down' ({cool_down}) = {warm_up + cool_down}.")
        self.num_epochs = num_epochs
        self.warm_up = warm_up
        self.cool_down = cool_down
        self.ratio = ratio
        self._initialized: bool = False
        super().__init__(optimizer, last_epoch)

    def get_lr(self):
        # HACK: We need to check if this is the first time ``self.get_lr()`` was called,
        # since ``torch.optim.lr_scheduler._LRScheduler`` will call ``self.get_lr()``
        # when first initialized.
        if not self._initialized and self.last_epoch == 0:
            self._initialized = True
            step = 0
        else:
            step = min(self.last_epoch, self.num_epochs - 2) + 1
        if step <= self.warm_up:
            # Warm up phase: increase LR linearly.
            lrs = [lr / self.ratio + (lr - lr / self.ratio) * (step / self.warm_up)
                   for lr in self.base_lrs]
        elif step <= self.warm_up + self.cool_down:
            # Cool down phase: decrease LR linearly.
            lrs = [lr - (lr - lr / self.ratio) * (step - self.warm_up) / self.cool_down
                   for lr in self.base_lrs]
        else:
            # "Trickle-off" phase: continue decreasing linearly down to 0.
            lrs = [lr / self.ratio - (lr / self.ratio) * (step - self.warm_up - self.cool_down)
                   / (self.num_epochs - self.warm_up - self.cool_down)
                   for lr in self.base_lrs]
        return lrs
	{
	"trainer": {
	"cuda_device": 0,
	"learning_rate_scheduler": {
	"type": "triangular",
	// total number of epochs, should match the trainner param `num_epochs` below
	"num_epochs": 80,
	// increase LR linearly for 20 epochs
	"warm_up": 20,
	// then decrease LR linearly for 30 epochs
	"cool_down": 30,
	// LR will start at `lr / ratio = 0.05 / 32`
	"ratio": 32
	},
	"num_epochs": 80,
	"optimizer": {
	"type": "sgd",
	"lr": 0.05
	},
	// log the learning rate to tensorboard so we can see how it changes
	"should_log_learning_rate": true,
	"should_log_parameter_statistics": false
	}
	}
	import torch

	from allennlp.common.checks import ConfigurationError
	from allennlp.training.learning_rate_schedulers import LearningRateScheduler


	@LearningRateScheduler.register("triangular")
	class Triangular(torch.optim.lr_scheduler._LRScheduler): # pylint: disable=protected-access
	"""
	Slanted triangular learning rate scheduler.

	The LR will start at ``lr / ratio`` and increase linearly for ``warm_up`` epochs
	until reaching ``lr``, at which point it will decrease linearly for ``cool_down``
	epochs until reaching ``lr / ratio`` again. Then the LR will continue
	linearly decreasing down to 0 for the remaining number of epochs.
	"""

	def __init__(self,
	optimizer: torch.optim.Optimizer,
	num_epochs: int,
	warm_up: int,
	cool_down: int,
	ratio: int = 10,
	last_epoch: int = -1) -> None:
	if num_epochs < warm_up + cool_down:
	raise ConfigurationError(f"'num_epochs' should be greater than the sum of 'warm_up' and 'cool_down'. "
	f"Got 'num_epochs' = {num_epochs} >= 'warm_up' ({warm_up}) + "
	f"'cool_down' ({cool_down}) = {warm_up + cool_down}.")
	self.num_epochs = num_epochs
	self.warm_up = warm_up
	self.cool_down = cool_down
	self.ratio = ratio
	self._initialized: bool = False
	super().__init__(optimizer, last_epoch)

	def get_lr(self):
	# HACK: We need to check if this is the first time ``self.get_lr()`` was called,
	# since ``torch.optim.lr_scheduler._LRScheduler`` will call ``self.get_lr()``
	# when first initialized.
	if not self._initialized and self.last_epoch == 0:
	self._initialized = True
	step = 0
	else:
	step = min(self.last_epoch, self.num_epochs - 2) + 1
	if step <= self.warm_up:
	# Warm up phase: increase LR linearly.
	lrs = [lr / self.ratio + (lr - lr / self.ratio) * (step / self.warm_up)
	for lr in self.base_lrs]
	elif step <= self.warm_up + self.cool_down:
	# Cool down phase: decrease LR linearly.
	lrs = [lr - (lr - lr / self.ratio) * (step - self.warm_up) / self.cool_down
	for lr in self.base_lrs]
	else:
	# "Trickle-off" phase: continue decreasing linearly down to 0.
	lrs = [lr / self.ratio - (lr / self.ratio) * (step - self.warm_up - self.cool_down)
	/ (self.num_epochs - self.warm_up - self.cool_down)
	for lr in self.base_lrs]
	return lrs