damian0815
commited on
Commit
•
ef0c9ab
1
Parent(s):
2098ce6
Upload optimizerSD21.json with huggingface_hub
Browse files- optimizerSD21.json +44 -0
optimizerSD21.json
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"doc": {
|
3 |
+
"base": "base optimizer configuration for unet and text encoder",
|
4 |
+
"text_encoder_overrides": "text encoder config overrides",
|
5 |
+
"text_encoder_lr_scale": "if LR not set on text encoder, sets the Lr to a multiple of the Base LR. for example, if base `lr` is 2e-6 and `text_encoder_lr_scale` is 0.5, the text encoder's LR will be set to `1e-6`.",
|
6 |
+
"-----------------": "-----------------",
|
7 |
+
"optimizer": "adamw, adamw8bit, lion",
|
8 |
+
"optimizer_desc": "'adamw' in standard 32bit, 'adamw8bit' is bitsandbytes, 'lion' is lucidrains",
|
9 |
+
"lr": "learning rate, if null will use CLI or main JSON config value",
|
10 |
+
"lr_scheduler": "'constant' or 'cosine'",
|
11 |
+
"lr_warmup_steps": "number of steps to warmup LR to target LR, if null will use CLI or default a value based on max epochs",
|
12 |
+
"lr_decay_steps": "number of steps to decay LR to zero for cosine, if null will use CLI or default a value based on max epochs",
|
13 |
+
"betas": "exponential decay rates for the moment estimates",
|
14 |
+
"epsilon": "value added to denominator for numerical stability, unused for lion",
|
15 |
+
"weight_decay": "weight decay (L2 penalty)",
|
16 |
+
"------------------": "-----------------",
|
17 |
+
"freeze_embeddings": "whether to freeze the text embeddings",
|
18 |
+
"freeze_front_n_layers": "if not null, freeze the front N layers of the text encoder (you can pass eg -2 to leave only the last 2 layers unfrozen)",
|
19 |
+
"freeze_final_layer_norm": "whether to freeze the text encoder's final layer norm"
|
20 |
+
},
|
21 |
+
"base": {
|
22 |
+
"optimizer": "adamw8bit",
|
23 |
+
"lr": 2e-6,
|
24 |
+
"lr_scheduler": "cosine",
|
25 |
+
"lr_decay_steps": null,
|
26 |
+
"lr_warmup_steps": null,
|
27 |
+
"betas": [0.9, 0.999],
|
28 |
+
"epsilon": 1e-8,
|
29 |
+
"weight_decay": 0.010
|
30 |
+
},
|
31 |
+
"text_encoder_overrides": {
|
32 |
+
"optimizer": null,
|
33 |
+
"lr": 3e-7,
|
34 |
+
"lr_scheduler": null,
|
35 |
+
"lr_decay_steps": null,
|
36 |
+
"lr_warmup_steps": null,
|
37 |
+
"betas": null,
|
38 |
+
"epsilon": null,
|
39 |
+
"weight_decay": null
|
40 |
+
},
|
41 |
+
"text_encoder_freezing": {
|
42 |
+
"unfreeze_last_n_layers": 2
|
43 |
+
}
|
44 |
+
}
|