Bitsandbytes documentation

AdEMAMix

Hugging Face's logo
Join the Hugging Face community

and get access to the augmented documentation experience

to get started

AdEMAMix

AdEMAMix is a variant of the Adam optimizer.

bitsandbytes also supports paged optimizers which take advantage of CUDAs unified memory to transfer memory from the GPU to the CPU when GPU memory is exhausted.

AdEMAMix

class bitsandbytes.optim.AdEMAMix

< >

( params: typing.Iterable[torch.nn.parameter.Parameter] lr: float = 0.001 betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999) alpha: float = 5.0 t_alpha: typing.Optional[int] = None t_beta3: typing.Optional[int] = None eps: float = 1e-08 weight_decay: float = 0.01 optim_bits: typing.Literal[8, 32] = 32 min_8bit_size: int = 4096 is_paged: bool = False )

__init__

< >

( params: typing.Iterable[torch.nn.parameter.Parameter] lr: float = 0.001 betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999) alpha: float = 5.0 t_alpha: typing.Optional[int] = None t_beta3: typing.Optional[int] = None eps: float = 1e-08 weight_decay: float = 0.01 optim_bits: typing.Literal[8, 32] = 32 min_8bit_size: int = 4096 is_paged: bool = False )

AdEMAMix8bit

class bitsandbytes.optim.AdEMAMix8bit

< >

( params: typing.Iterable[torch.nn.parameter.Parameter] lr: float = 0.001 betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999) alpha: float = 5.0 t_alpha: typing.Optional[int] = None t_beta3: typing.Optional[int] = None eps: float = 1e-08 weight_decay: float = 0.01 min_8bit_size: int = 4096 is_paged: bool = False )

__init__

< >

( params: typing.Iterable[torch.nn.parameter.Parameter] lr: float = 0.001 betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999) alpha: float = 5.0 t_alpha: typing.Optional[int] = None t_beta3: typing.Optional[int] = None eps: float = 1e-08 weight_decay: float = 0.01 min_8bit_size: int = 4096 is_paged: bool = False )

AdEMAMix32bit

class bitsandbytes.optim.AdEMAMix32bit

< >

( params: typing.Iterable[torch.nn.parameter.Parameter] lr: float = 0.001 betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999) alpha: float = 5.0 t_alpha: typing.Optional[int] = None t_beta3: typing.Optional[int] = None eps: float = 1e-08 weight_decay: float = 0.01 min_8bit_size: int = 4096 is_paged: bool = False )

__init__

< >

( params: typing.Iterable[torch.nn.parameter.Parameter] lr: float = 0.001 betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999) alpha: float = 5.0 t_alpha: typing.Optional[int] = None t_beta3: typing.Optional[int] = None eps: float = 1e-08 weight_decay: float = 0.01 min_8bit_size: int = 4096 is_paged: bool = False )

PagedAdEMAMix

class bitsandbytes.optim.PagedAdEMAMix

< >

( params: typing.Iterable[torch.nn.parameter.Parameter] lr: float = 0.001 betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999) alpha: float = 5.0 t_alpha: typing.Optional[int] = None t_beta3: typing.Optional[int] = None eps: float = 1e-08 weight_decay: float = 0.01 optim_bits: typing.Literal[8, 32] = 32 min_8bit_size: int = 4096 )

__init__

< >

( params: typing.Iterable[torch.nn.parameter.Parameter] lr: float = 0.001 betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999) alpha: float = 5.0 t_alpha: typing.Optional[int] = None t_beta3: typing.Optional[int] = None eps: float = 1e-08 weight_decay: float = 0.01 optim_bits: typing.Literal[8, 32] = 32 min_8bit_size: int = 4096 )

PagedAdEMAMix8bit

class bitsandbytes.optim.PagedAdEMAMix8bit

< >

( params: typing.Iterable[torch.nn.parameter.Parameter] lr: float = 0.001 betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999) alpha: float = 5.0 t_alpha: typing.Optional[int] = None t_beta3: typing.Optional[int] = None eps: float = 1e-08 weight_decay: float = 0.01 min_8bit_size: int = 4096 )

__init__

< >

( params: typing.Iterable[torch.nn.parameter.Parameter] lr: float = 0.001 betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999) alpha: float = 5.0 t_alpha: typing.Optional[int] = None t_beta3: typing.Optional[int] = None eps: float = 1e-08 weight_decay: float = 0.01 min_8bit_size: int = 4096 )

PagedAdEMAMix32bit

class bitsandbytes.optim.PagedAdEMAMix32bit

< >

( params: typing.Iterable[torch.nn.parameter.Parameter] lr: float = 0.001 betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999) alpha: float = 5.0 t_alpha: typing.Optional[int] = None t_beta3: typing.Optional[int] = None eps: float = 1e-08 weight_decay: float = 0.01 min_8bit_size: int = 4096 )

__init__

< >

( params: typing.Iterable[torch.nn.parameter.Parameter] lr: float = 0.001 betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999) alpha: float = 5.0 t_alpha: typing.Optional[int] = None t_beta3: typing.Optional[int] = None eps: float = 1e-08 weight_decay: float = 0.01 min_8bit_size: int = 4096 )

< > Update on GitHub