End of training
Browse files
README.md
CHANGED
@@ -6,7 +6,7 @@ tags:
|
|
6 |
- axolotl
|
7 |
- generated_from_trainer
|
8 |
model-index:
|
9 |
-
- name:
|
10 |
results: []
|
11 |
---
|
12 |
|
@@ -34,8 +34,8 @@ datasets:
|
|
34 |
dataset_prepared_path: gemmadataset
|
35 |
val_set_size: 0
|
36 |
output_dir: models/gemma27b
|
37 |
-
|
38 |
-
|
39 |
sequence_len: 4096
|
40 |
sample_packing: true
|
41 |
pad_to_sequence_len: true
|
@@ -59,7 +59,7 @@ wandb_log_model:
|
|
59 |
|
60 |
gradient_accumulation_steps: 4
|
61 |
micro_batch_size: 1
|
62 |
-
num_epochs:
|
63 |
optimizer: adamw_torch
|
64 |
lr_scheduler: cosine
|
65 |
learning_rate: 0.0002
|
@@ -69,20 +69,20 @@ group_by_length: false
|
|
69 |
bf16: true
|
70 |
fp16:
|
71 |
tf32: false
|
72 |
-
hub_model_id: afrias5/
|
73 |
gradient_checkpointing: true
|
74 |
early_stopping_patience:
|
75 |
resume_from_checkpoint:
|
76 |
local_rank:
|
77 |
logging_steps: 1
|
78 |
xformers_attention:
|
79 |
-
flash_attention:
|
80 |
s2_attention:
|
81 |
logging_steps: 1
|
82 |
warmup_steps: 10
|
83 |
# eval_steps: 300
|
84 |
saves_per_epoch: 1
|
85 |
-
save_total_limit:
|
86 |
debug:
|
87 |
deepspeed:
|
88 |
weight_decay: 0.0
|
@@ -96,8 +96,8 @@ special_tokens:
|
|
96 |
|
97 |
</details><br>
|
98 |
|
99 |
-
[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/afrias5/GemmaFeed/runs/
|
100 |
-
#
|
101 |
|
102 |
This model is a fine-tuned version of [google/gemma-2-27b](https://huggingface.co/google/gemma-2-27b) on the None dataset.
|
103 |
|
@@ -130,7 +130,7 @@ The following hyperparameters were used during training:
|
|
130 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
131 |
- lr_scheduler_type: cosine
|
132 |
- lr_scheduler_warmup_steps: 10
|
133 |
-
- num_epochs:
|
134 |
|
135 |
### Training results
|
136 |
|
|
|
6 |
- axolotl
|
7 |
- generated_from_trainer
|
8 |
model-index:
|
9 |
+
- name: gemma-2-27b
|
10 |
results: []
|
11 |
---
|
12 |
|
|
|
34 |
dataset_prepared_path: gemmadataset
|
35 |
val_set_size: 0
|
36 |
output_dir: models/gemma27b
|
37 |
+
lora_model_dir: models/gemma27b/checkpoint-50
|
38 |
+
auto_resume_from_checkpoints: true
|
39 |
sequence_len: 4096
|
40 |
sample_packing: true
|
41 |
pad_to_sequence_len: true
|
|
|
59 |
|
60 |
gradient_accumulation_steps: 4
|
61 |
micro_batch_size: 1
|
62 |
+
num_epochs: 10
|
63 |
optimizer: adamw_torch
|
64 |
lr_scheduler: cosine
|
65 |
learning_rate: 0.0002
|
|
|
69 |
bf16: true
|
70 |
fp16:
|
71 |
tf32: false
|
72 |
+
hub_model_id: afrias5/gemma-2-27b
|
73 |
gradient_checkpointing: true
|
74 |
early_stopping_patience:
|
75 |
resume_from_checkpoint:
|
76 |
local_rank:
|
77 |
logging_steps: 1
|
78 |
xformers_attention:
|
79 |
+
flash_attention: false
|
80 |
s2_attention:
|
81 |
logging_steps: 1
|
82 |
warmup_steps: 10
|
83 |
# eval_steps: 300
|
84 |
saves_per_epoch: 1
|
85 |
+
save_total_limit: 1
|
86 |
debug:
|
87 |
deepspeed:
|
88 |
weight_decay: 0.0
|
|
|
96 |
|
97 |
</details><br>
|
98 |
|
99 |
+
[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/afrias5/GemmaFeed/runs/z6949qp5)
|
100 |
+
# gemma-2-27b
|
101 |
|
102 |
This model is a fine-tuned version of [google/gemma-2-27b](https://huggingface.co/google/gemma-2-27b) on the None dataset.
|
103 |
|
|
|
130 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
131 |
- lr_scheduler_type: cosine
|
132 |
- lr_scheduler_warmup_steps: 10
|
133 |
+
- num_epochs: 10
|
134 |
|
135 |
### Training results
|
136 |
|