Update README.md
Browse files
README.md
CHANGED
@@ -22,6 +22,7 @@ base_model: Na0s/Llama-3.1-8b-Pruned-4-Layers
|
|
22 |
batch_size=2,
|
23 |
steps=10000, gradient_accumulation_steps = 4,
|
24 |
warmup_steps = 5,
|
|
|
25 |
learning_rate = 2e-4,
|
26 |
fp16 = not is_bfloat16_supported(),
|
27 |
bf16 = is_bfloat16_supported(),
|
@@ -29,7 +30,7 @@ base_model: Na0s/Llama-3.1-8b-Pruned-4-Layers
|
|
29 |
optim = "adamw_8bit",
|
30 |
weight_decay = 0.01,
|
31 |
lr_scheduler_type = "linear",
|
32 |
-
seed = 3407
|
33 |
|
34 |
### Training Data
|
35 |
|
|
|
22 |
batch_size=2,
|
23 |
steps=10000, gradient_accumulation_steps = 4,
|
24 |
warmup_steps = 5,
|
25 |
+
max_steps = 10000
|
26 |
learning_rate = 2e-4,
|
27 |
fp16 = not is_bfloat16_supported(),
|
28 |
bf16 = is_bfloat16_supported(),
|
|
|
30 |
optim = "adamw_8bit",
|
31 |
weight_decay = 0.01,
|
32 |
lr_scheduler_type = "linear",
|
33 |
+
seed = 3407
|
34 |
|
35 |
### Training Data
|
36 |
|