""" Scheduler Factory Hacked together by / Copyright 2020 Ross Wightman """ from torch.optim import Optimizer import math from torch.optim.lr_scheduler import LambdaLR def create_scheduler(args, optimizer): lr_scheduler = None if args.sched == 'cosine': lr_scheduler = get_cosine_schedule_with_warmup( optimizer, num_warmup_steps=args.num_warmup_steps, num_training_steps=args.num_training_steps, num_cycles=0.5, min_lr_multi=args.min_lr_multi, last_epoch=args.get('last_epoch', -1) ) else: raise NotImplementedError(args.sched) return lr_scheduler def get_cosine_schedule_with_warmup( optimizer: Optimizer, num_warmup_steps: int, num_training_steps: int, num_cycles: float = 0.5, min_lr_multi: float = 0., last_epoch: int = -1 ): """ Modified from https://github.com/huggingface/transformers/blob/v4.15.0/src/transformers/optimization.py Create a schedule with a learning rate that decreases following the values of the cosine function between the initial lr set in the optimizer to 0, after a warmup period during which it increases linearly between 0 and the initial lr set in the optimizer. Args: optimizer ([`~torch.optim.Optimizer`]): The optimizer for which to schedule the learning rate. num_warmup_steps (`int`): The number of steps for the warmup phase. num_training_steps (`int`): The total number of training steps. num_cycles (`float`, *optional*, defaults to 0.5): The number of waves in the cosine schedule (the defaults is to just decrease from the max value to 0 following a half-cosine). min_lr_multi (`float`, *optional*, defaults to 0): The minimum learning rate multiplier. Thus the minimum learning rate is base_lr * min_lr_multi. last_epoch (`int`, *optional*, defaults to -1): The index of the last epoch when resuming training. Return: `torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule. """ def lr_lambda(current_step): if current_step < num_warmup_steps: return max(min_lr_multi, float(current_step) / float(max(1, num_warmup_steps))) progress = float(current_step - num_warmup_steps) / float(max(1, num_training_steps - num_warmup_steps)) return max(min_lr_multi, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress))) return LambdaLR(optimizer, lr_lambda, last_epoch)