2023-02-25 13:05:22 -07:00
{
"doc" : {
2023-05-04 18:11:11 -06:00
"base" : "base optimizer configuration for unet and text encoder" ,
"text_encoder_overrides" : "text encoder config overrides" ,
"text_encoder_lr_scale" : "if LR not set on text encoder, sets the Lr to a multiple of the Base LR. for example, if base `lr` is 2e-6 and `text_encoder_lr_scale` is 0.5, the text encoder's LR will be set to `1e-6`." ,
2023-04-29 20:56:10 -06:00
"-----------------" : "-----------------" ,
2023-02-25 13:05:22 -07:00
"optimizer" : "adamw, adamw8bit, lion" ,
"optimizer_desc" : "'adamw' in standard 32bit, 'adamw8bit' is bitsandbytes, 'lion' is lucidrains" ,
2023-04-29 20:56:10 -06:00
"lr" : "learning rate, if null will use CLI or main JSON config value" ,
2023-05-04 18:11:11 -06:00
"lr_scheduler" : "'constant' or 'cosine'" ,
"lr_warmup_steps" : "number of steps to warmup LR to target LR, if null will use CLI or default a value based on max epochs" ,
"lr_decay_steps" : "number of steps to decay LR to zero for cosine, if null will use CLI or default a value based on max epochs" ,
2023-02-25 13:05:22 -07:00
"betas" : "exponential decay rates for the moment estimates" ,
"epsilon" : "value added to denominator for numerical stability, unused for lion" ,
2023-05-14 03:49:11 -06:00
"weight_decay" : "weight decay (L2 penalty)" ,
"------------------" : "-----------------" ,
2023-06-17 10:54:06 -06:00
"unfreeze_last_n_layers" : "if not null, freeze all parameters in the text encoder except for the last n layers and the final layer norm"
2023-02-25 13:05:22 -07:00
} ,
2023-05-04 18:11:11 -06:00
"base" : {
2023-04-29 20:56:10 -06:00
"optimizer" : "adamw8bit" ,
"lr" : 1e-6 ,
2023-04-30 07:28:55 -06:00
"lr_scheduler" : "constant" ,
2023-05-04 18:11:11 -06:00
"lr_decay_steps" : null ,
"lr_warmup_steps" : null ,
2023-04-29 20:56:10 -06:00
"betas" : [ 0.9 , 0.999 ] ,
"epsilon" : 1e-8 ,
"weight_decay" : 0.010
} ,
2023-05-04 18:11:11 -06:00
"text_encoder_overrides" : {
2023-04-29 20:56:10 -06:00
"optimizer" : null ,
2023-11-24 14:13:01 -07:00
"lr" : 5e-7 ,
"lr_scheduler" : "cosine" ,
2023-05-04 18:11:11 -06:00
"lr_decay_steps" : null ,
"lr_warmup_steps" : null ,
2023-04-29 20:56:10 -06:00
"betas" : null ,
"epsilon" : null ,
"weight_decay" : null
2023-05-14 03:49:11 -06:00
} ,
"text_encoder_freezing" : {
2023-06-17 10:54:06 -06:00
"unfreeze_last_n_layers" : null
2023-06-17 11:18:04 -06:00
} ,
"apply_grad_scaler_step_tweaks" : true
2023-02-26 08:00:42 -07:00
}