Last active
January 3, 2024 16:02
-
-
Save Algomancer/7fe578bfd69ca2d6844ef08ebc98c84a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
learning_rate = 4e-4 | |
warmup_steps = 2000 | |
log_step_interval = 1 | |
eval_iters = 100 | |
save_step_interval = 1000 | |
eval_step_interval = 1000 | |
weight_decay = 1e-1 | |
beta1 = 0.9 | |
beta2 = 0.95 | |
grad_clip = 1.0 | |
decay_lr = True | |
min_lr = 4e-5 | |
def get_lr(it: int, lr_decay_iters: int) -> int: | |
# 1) linear warmup for warmup_iters steps | |
if it < warmup_iters: | |
return learning_rate * it / warmup_iters | |
# 2) if it > lr_decay_iters, return min learning rate | |
if it > lr_decay_iters: | |
return min_lr | |
# 3) in between, use cosine decay down to min learning rate | |
decay_ratio = (it - warmup_iters) / (lr_decay_iters - warmup_iters) | |
assert 0 <= decay_ratio <= 1 | |
coeff = 0.5 * (1.0 + math.cos(math.pi * decay_ratio)) # coeff ranges 0..1 | |
return min_lr + coeff * (learning_rate - min_lr) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment