mazpie's picture
Initial commit
2d9a728
raw
history blame
2.58 kB
""" Scheduler Factory
Hacked together by / Copyright 2020 Ross Wightman
"""
from torch.optim import Optimizer
import math
from torch.optim.lr_scheduler import LambdaLR
def create_scheduler(args, optimizer):
lr_scheduler = None
if args.sched == 'cosine':
lr_scheduler = get_cosine_schedule_with_warmup(
optimizer,
num_warmup_steps=args.num_warmup_steps,
num_training_steps=args.num_training_steps,
num_cycles=0.5,
min_lr_multi=args.min_lr_multi,
last_epoch=args.get('last_epoch', -1)
)
else:
raise NotImplementedError(args.sched)
return lr_scheduler
def get_cosine_schedule_with_warmup(
optimizer: Optimizer, num_warmup_steps: int, num_training_steps: int,
num_cycles: float = 0.5, min_lr_multi: float = 0., last_epoch: int = -1
):
"""
Modified from https://github.com/huggingface/transformers/blob/v4.15.0/src/transformers/optimization.py
Create a schedule with a learning rate that decreases following the values of the cosine function between the
initial lr set in the optimizer to 0, after a warmup period during which it increases linearly between 0 and the
initial lr set in the optimizer.
Args:
optimizer ([`~torch.optim.Optimizer`]):
The optimizer for which to schedule the learning rate.
num_warmup_steps (`int`):
The number of steps for the warmup phase.
num_training_steps (`int`):
The total number of training steps.
num_cycles (`float`, *optional*, defaults to 0.5):
The number of waves in the cosine schedule (the defaults is to just decrease from the max value to 0
following a half-cosine).
min_lr_multi (`float`, *optional*, defaults to 0):
The minimum learning rate multiplier. Thus the minimum learning rate is base_lr * min_lr_multi.
last_epoch (`int`, *optional*, defaults to -1):
The index of the last epoch when resuming training.
Return:
`torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule.
"""
def lr_lambda(current_step):
if current_step < num_warmup_steps:
return max(min_lr_multi, float(current_step) / float(max(1, num_warmup_steps)))
progress = float(current_step - num_warmup_steps) / float(max(1, num_training_steps - num_warmup_steps))
return max(min_lr_multi, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress)))
return LambdaLR(optimizer, lr_lambda, last_epoch)