Bitsandbytes 文件
AdEMAMix
並獲得增強的文件體驗
開始使用
AdEMAMix
AdEMAMix 是 Adam
最佳化器的一個變體。
bitsandbytes 還支援分頁最佳化器,當 GPU 記憶體耗盡時,該最佳化器利用 CUDA 的統一記憶體將記憶體從 GPU 轉移到 CPU。
AdEMAMix
class bitsandbytes.optim.AdEMAMix
< 原始碼 >( params: Iterable lr: float = 0.001 betas: tuple = (0.9, 0.999, 0.9999) alpha: float = 5.0 t_alpha: typing.Optional[int] = None t_beta3: typing.Optional[int] = None eps: float = 1e-08 weight_decay: float = 0.01 optim_bits: typing.Literal[8, 32] = 32 min_8bit_size: int = 4096 is_paged: bool = False )
__init__
< 原始碼 >( params: Iterable lr: float = 0.001 betas: tuple = (0.9, 0.999, 0.9999) alpha: float = 5.0 t_alpha: typing.Optional[int] = None t_beta3: typing.Optional[int] = None eps: float = 1e-08 weight_decay: float = 0.01 optim_bits: typing.Literal[8, 32] = 32 min_8bit_size: int = 4096 is_paged: bool = False )
AdEMAMix8bit
class bitsandbytes.optim.AdEMAMix8bit
< 原始碼 >( params: Iterable lr: float = 0.001 betas: tuple = (0.9, 0.999, 0.9999) alpha: float = 5.0 t_alpha: typing.Optional[int] = None t_beta3: typing.Optional[int] = None eps: float = 1e-08 weight_decay: float = 0.01 min_8bit_size: int = 4096 is_paged: bool = False )
__init__
< 原始碼 >( params: Iterable lr: float = 0.001 betas: tuple = (0.9, 0.999, 0.9999) alpha: float = 5.0 t_alpha: typing.Optional[int] = None t_beta3: typing.Optional[int] = None eps: float = 1e-08 weight_decay: float = 0.01 min_8bit_size: int = 4096 is_paged: bool = False )
AdEMAMix32bit
class bitsandbytes.optim.AdEMAMix32bit
< 原始碼 >( params: Iterable lr: float = 0.001 betas: tuple = (0.9, 0.999, 0.9999) alpha: float = 5.0 t_alpha: typing.Optional[int] = None t_beta3: typing.Optional[int] = None eps: float = 1e-08 weight_decay: float = 0.01 min_8bit_size: int = 4096 is_paged: bool = False )
__init__
< 原始碼 >( params: Iterable lr: float = 0.001 betas: tuple = (0.9, 0.999, 0.9999) alpha: float = 5.0 t_alpha: typing.Optional[int] = None t_beta3: typing.Optional[int] = None eps: float = 1e-08 weight_decay: float = 0.01 min_8bit_size: int = 4096 is_paged: bool = False )
PagedAdEMAMix
class bitsandbytes.optim.PagedAdEMAMix
< 原始碼 >( params: Iterable lr: float = 0.001 betas: tuple = (0.9, 0.999, 0.9999) alpha: float = 5.0 t_alpha: typing.Optional[int] = None t_beta3: typing.Optional[int] = None eps: float = 1e-08 weight_decay: float = 0.01 optim_bits: typing.Literal[8, 32] = 32 min_8bit_size: int = 4096 )
__init__
< 原始碼 >( params: Iterable lr: float = 0.001 betas: tuple = (0.9, 0.999, 0.9999) alpha: float = 5.0 t_alpha: typing.Optional[int] = None t_beta3: typing.Optional[int] = None eps: float = 1e-08 weight_decay: float = 0.01 optim_bits: typing.Literal[8, 32] = 32 min_8bit_size: int = 4096 )
PagedAdEMAMix8bit
class bitsandbytes.optim.PagedAdEMAMix8bit
< 原始碼 >( params: Iterable lr: float = 0.001 betas: tuple = (0.9, 0.999, 0.9999) alpha: float = 5.0 t_alpha: typing.Optional[int] = None t_beta3: typing.Optional[int] = None eps: float = 1e-08 weight_decay: float = 0.01 min_8bit_size: int = 4096 )
__init__
< 原始碼 >( params: Iterable lr: float = 0.001 betas: tuple = (0.9, 0.999, 0.9999) alpha: float = 5.0 t_alpha: typing.Optional[int] = None t_beta3: typing.Optional[int] = None eps: float = 1e-08 weight_decay: float = 0.01 min_8bit_size: int = 4096 )
PagedAdEMAMix32bit
class bitsandbytes.optim.PagedAdEMAMix32bit
< 原始碼 >( params: Iterable lr: float = 0.001 betas: tuple = (0.9, 0.999, 0.9999) alpha: float = 5.0 t_alpha: typing.Optional[int] = None t_beta3: typing.Optional[int] = None eps: float = 1e-08 weight_decay: float = 0.01 min_8bit_size: int = 4096 )
__init__
< 原始碼 >( params: Iterable lr: float = 0.001 betas: tuple = (0.9, 0.999, 0.9999) alpha: float = 5.0 t_alpha: typing.Optional[int] = None t_beta3: typing.Optional[int] = None eps: float = 1e-08 weight_decay: float = 0.01 min_8bit_size: int = 4096 )