tweak config to work
Browse files
examples/openllama-3b/config.yml
CHANGED
@@ -26,17 +26,18 @@ wandb_watch:
|
|
26 |
wandb_run_id:
|
27 |
wandb_log_model:
|
28 |
output_dir: ./openllama-out
|
29 |
-
|
30 |
-
micro_batch_size:
|
31 |
num_epochs: 3
|
32 |
optimizer: adamw_bnb_8bit
|
33 |
torchdistx_path:
|
34 |
lr_scheduler: cosine
|
35 |
-
learning_rate: 0.
|
36 |
train_on_inputs: false
|
37 |
group_by_length: false
|
|
|
38 |
bf16: false
|
39 |
-
fp16:
|
40 |
tf32: false
|
41 |
gradient_checkpointing: true
|
42 |
early_stopping_patience:
|
@@ -52,7 +53,7 @@ eval_steps: 50
|
|
52 |
save_steps:
|
53 |
debug:
|
54 |
deepspeed:
|
55 |
-
weight_decay: 0.
|
56 |
fsdp:
|
57 |
fsdp_config:
|
58 |
special_tokens:
|
|
|
26 |
wandb_run_id:
|
27 |
wandb_log_model:
|
28 |
output_dir: ./openllama-out
|
29 |
+
gradient_accumulation_steps: 1
|
30 |
+
micro_batch_size: 1
|
31 |
num_epochs: 3
|
32 |
optimizer: adamw_bnb_8bit
|
33 |
torchdistx_path:
|
34 |
lr_scheduler: cosine
|
35 |
+
learning_rate: 0.00001
|
36 |
train_on_inputs: false
|
37 |
group_by_length: false
|
38 |
+
float16: true
|
39 |
bf16: false
|
40 |
+
fp16: false
|
41 |
tf32: false
|
42 |
gradient_checkpointing: true
|
43 |
early_stopping_patience:
|
|
|
53 |
save_steps:
|
54 |
debug:
|
55 |
deepspeed:
|
56 |
+
weight_decay: 0.1
|
57 |
fsdp:
|
58 |
fsdp_config:
|
59 |
special_tokens:
|