2023-02-25 13:05:22 -07:00
|
|
|
{
|
|
|
|
"doc": {
|
|
|
|
"optimizer": "adamw, adamw8bit, lion",
|
|
|
|
"optimizer_desc": "'adamw' in standard 32bit, 'adamw8bit' is bitsandbytes, 'lion' is lucidrains",
|
|
|
|
"lr": "learning rate, if null wil use CLI or main JSON config value",
|
|
|
|
"betas": "exponential decay rates for the moment estimates",
|
|
|
|
"epsilon": "value added to denominator for numerical stability, unused for lion",
|
|
|
|
"weight_decay": "weight decay (L2 penalty)"
|
|
|
|
},
|
|
|
|
"optimizer": "adamw8bit",
|
2023-02-25 14:23:33 -07:00
|
|
|
"lr:": 1e-6,
|
2023-02-25 13:05:22 -07:00
|
|
|
"betas": [0.9, 0.999],
|
|
|
|
"epsilon": 1e-8,
|
|
|
|
"weight_decay": 0.01
|
|
|
|
}
|