Spaces:
Sleeping
Sleeping
""" Scheduler Factory | |
Hacked together by / Copyright 2020 Ross Wightman | |
""" | |
from torch.optim import Optimizer | |
import math | |
from torch.optim.lr_scheduler import LambdaLR | |
def create_scheduler(args, optimizer): | |
lr_scheduler = None | |
if args.sched == 'cosine': | |
lr_scheduler = get_cosine_schedule_with_warmup( | |
optimizer, | |
num_warmup_steps=args.num_warmup_steps, | |
num_training_steps=args.num_training_steps, | |
num_cycles=0.5, | |
min_lr_multi=args.min_lr_multi, | |
last_epoch=args.get('last_epoch', -1) | |
) | |
else: | |
raise NotImplementedError(args.sched) | |
return lr_scheduler | |
def get_cosine_schedule_with_warmup( | |
optimizer: Optimizer, num_warmup_steps: int, num_training_steps: int, | |
num_cycles: float = 0.5, min_lr_multi: float = 0., last_epoch: int = -1 | |
): | |
""" | |
Modified from https://github.com/huggingface/transformers/blob/v4.15.0/src/transformers/optimization.py | |
Create a schedule with a learning rate that decreases following the values of the cosine function between the | |
initial lr set in the optimizer to 0, after a warmup period during which it increases linearly between 0 and the | |
initial lr set in the optimizer. | |
Args: | |
optimizer ([`~torch.optim.Optimizer`]): | |
The optimizer for which to schedule the learning rate. | |
num_warmup_steps (`int`): | |
The number of steps for the warmup phase. | |
num_training_steps (`int`): | |
The total number of training steps. | |
num_cycles (`float`, *optional*, defaults to 0.5): | |
The number of waves in the cosine schedule (the defaults is to just decrease from the max value to 0 | |
following a half-cosine). | |
min_lr_multi (`float`, *optional*, defaults to 0): | |
The minimum learning rate multiplier. Thus the minimum learning rate is base_lr * min_lr_multi. | |
last_epoch (`int`, *optional*, defaults to -1): | |
The index of the last epoch when resuming training. | |
Return: | |
`torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule. | |
""" | |
def lr_lambda(current_step): | |
if current_step < num_warmup_steps: | |
return max(min_lr_multi, float(current_step) / float(max(1, num_warmup_steps))) | |
progress = float(current_step - num_warmup_steps) / float(max(1, num_training_steps - num_warmup_steps)) | |
return max(min_lr_multi, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress))) | |
return LambdaLR(optimizer, lr_lambda, last_epoch) | |