Skip to content

Instantly share code, notes, and snippets.

@Algomancer
Last active January 3, 2024 16:02
Show Gist options
  • Save Algomancer/7fe578bfd69ca2d6844ef08ebc98c84a to your computer and use it in GitHub Desktop.
Save Algomancer/7fe578bfd69ca2d6844ef08ebc98c84a to your computer and use it in GitHub Desktop.
learning_rate = 4e-4
warmup_steps = 2000
log_step_interval = 1
eval_iters = 100
save_step_interval = 1000
eval_step_interval = 1000
weight_decay = 1e-1
beta1 = 0.9
beta2 = 0.95
grad_clip = 1.0
decay_lr = True
min_lr = 4e-5
def get_lr(it: int, lr_decay_iters: int) -> int:
# 1) linear warmup for warmup_iters steps
if it < warmup_iters:
return learning_rate * it / warmup_iters
# 2) if it > lr_decay_iters, return min learning rate
if it > lr_decay_iters:
return min_lr
# 3) in between, use cosine decay down to min learning rate
decay_ratio = (it - warmup_iters) / (lr_decay_iters - warmup_iters)
assert 0 <= decay_ratio <= 1
coeff = 0.5 * (1.0 + math.cos(math.pi * decay_ratio)) # coeff ranges 0..1
return min_lr + coeff * (learning_rate - min_lr)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment