sedrickkeh commited on
Commit
6c8e8a2
1 Parent(s): 8b5d960

End of training

Browse files
Files changed (5) hide show
  1. README.md +2 -1
  2. all_results.json +5 -5
  3. eval_results.json +3 -3
  4. train_results.json +2 -2
  5. trainer_state.json +11 -11
README.md CHANGED
@@ -4,6 +4,7 @@ license: gemma
4
  base_model: google/gemma-2-9b
5
  tags:
6
  - llama-factory
 
7
  - generated_from_trainer
8
  model-index:
9
  - name: hp_ablations_gemma_bsz1024
@@ -15,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # hp_ablations_gemma_bsz1024
17
 
18
- This model is a fine-tuned version of [google/gemma-2-9b](https://huggingface.co/google/gemma-2-9b) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
  - Loss: 0.5917
21
 
 
4
  base_model: google/gemma-2-9b
5
  tags:
6
  - llama-factory
7
+ - full
8
  - generated_from_trainer
9
  model-index:
10
  - name: hp_ablations_gemma_bsz1024
 
16
 
17
  # hp_ablations_gemma_bsz1024
18
 
19
+ This model is a fine-tuned version of [google/gemma-2-9b](https://huggingface.co/google/gemma-2-9b) on the mlfoundations-dev/oh-dcft-v3.1-gpt-4o-mini dataset.
20
  It achieves the following results on the evaluation set:
21
  - Loss: 0.5917
22
 
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 2.9946403385049365,
3
  "eval_loss": 0.5917297005653381,
4
- "eval_runtime": 356.383,
5
- "eval_samples_per_second": 33.506,
6
- "eval_steps_per_second": 0.525,
7
  "total_flos": 5052524767739904.0,
8
  "train_loss": 0.5641303292586612,
9
- "train_runtime": 61418.5213,
10
- "train_samples_per_second": 11.081,
11
  "train_steps_per_second": 0.011
12
  }
 
1
  {
2
  "epoch": 2.9946403385049365,
3
  "eval_loss": 0.5917297005653381,
4
+ "eval_runtime": 355.6789,
5
+ "eval_samples_per_second": 33.572,
6
+ "eval_steps_per_second": 0.526,
7
  "total_flos": 5052524767739904.0,
8
  "train_loss": 0.5641303292586612,
9
+ "train_runtime": 61317.9429,
10
+ "train_samples_per_second": 11.1,
11
  "train_steps_per_second": 0.011
12
  }
eval_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 2.9946403385049365,
3
  "eval_loss": 0.5917297005653381,
4
- "eval_runtime": 356.383,
5
- "eval_samples_per_second": 33.506,
6
- "eval_steps_per_second": 0.525
7
  }
 
1
  {
2
  "epoch": 2.9946403385049365,
3
  "eval_loss": 0.5917297005653381,
4
+ "eval_runtime": 355.6789,
5
+ "eval_samples_per_second": 33.572,
6
+ "eval_steps_per_second": 0.526
7
  }
train_results.json CHANGED
@@ -2,7 +2,7 @@
2
  "epoch": 2.9946403385049365,
3
  "total_flos": 5052524767739904.0,
4
  "train_loss": 0.5641303292586612,
5
- "train_runtime": 61418.5213,
6
- "train_samples_per_second": 11.081,
7
  "train_steps_per_second": 0.011
8
  }
 
2
  "epoch": 2.9946403385049365,
3
  "total_flos": 5052524767739904.0,
4
  "train_loss": 0.5641303292586612,
5
+ "train_runtime": 61317.9429,
6
+ "train_samples_per_second": 11.1,
7
  "train_steps_per_second": 0.011
8
  }
trainer_state.json CHANGED
@@ -165,9 +165,9 @@
165
  {
166
  "epoch": 0.9974612129760225,
167
  "eval_loss": 0.5952667593955994,
168
- "eval_runtime": 354.3828,
169
- "eval_samples_per_second": 33.695,
170
- "eval_steps_per_second": 0.528,
171
  "step": 221
172
  },
173
  {
@@ -327,9 +327,9 @@
327
  {
328
  "epoch": 1.9960507757404795,
329
  "eval_loss": 0.5866958498954773,
330
- "eval_runtime": 356.554,
331
- "eval_samples_per_second": 33.49,
332
- "eval_steps_per_second": 0.524,
333
  "step": 442
334
  },
335
  {
@@ -489,9 +489,9 @@
489
  {
490
  "epoch": 2.9946403385049365,
491
  "eval_loss": 0.5917297005653381,
492
- "eval_runtime": 356.1014,
493
- "eval_samples_per_second": 33.533,
494
- "eval_steps_per_second": 0.525,
495
  "step": 663
496
  },
497
  {
@@ -499,8 +499,8 @@
499
  "step": 663,
500
  "total_flos": 5052524767739904.0,
501
  "train_loss": 0.5641303292586612,
502
- "train_runtime": 61418.5213,
503
- "train_samples_per_second": 11.081,
504
  "train_steps_per_second": 0.011
505
  }
506
  ],
 
165
  {
166
  "epoch": 0.9974612129760225,
167
  "eval_loss": 0.5952667593955994,
168
+ "eval_runtime": 355.1756,
169
+ "eval_samples_per_second": 33.62,
170
+ "eval_steps_per_second": 0.527,
171
  "step": 221
172
  },
173
  {
 
327
  {
328
  "epoch": 1.9960507757404795,
329
  "eval_loss": 0.5866958498954773,
330
+ "eval_runtime": 355.7934,
331
+ "eval_samples_per_second": 33.562,
332
+ "eval_steps_per_second": 0.526,
333
  "step": 442
334
  },
335
  {
 
489
  {
490
  "epoch": 2.9946403385049365,
491
  "eval_loss": 0.5917297005653381,
492
+ "eval_runtime": 355.5387,
493
+ "eval_samples_per_second": 33.586,
494
+ "eval_steps_per_second": 0.526,
495
  "step": 663
496
  },
497
  {
 
499
  "step": 663,
500
  "total_flos": 5052524767739904.0,
501
  "train_loss": 0.5641303292586612,
502
+ "train_runtime": 61317.9429,
503
+ "train_samples_per_second": 11.1,
504
  "train_steps_per_second": 0.011
505
  }
506
  ],