peterbeamish commited on
Commit
0273197
1 Parent(s): a1e5954

End of training

Browse files
Files changed (4) hide show
  1. README.md +13 -14
  2. pytorch_model.bin +1 -1
  3. trainer_state.json +64 -59
  4. training_args.bin +2 -2
README.md CHANGED
@@ -17,12 +17,12 @@ should probably proofread and complete it, then remove this comment. -->
17
 
18
  This model is a fine-tuned version of [google/flan-t5-small](https://huggingface.co/google/flan-t5-small) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: nan
21
- - Rouge1: 0.04
22
- - Rouge2: 0.0
23
- - Rougel: 0.04
24
- - Rougelsum: 0.04
25
- - Gen Len: 18.4
26
 
27
  ## Model description
28
 
@@ -41,22 +41,21 @@ More information needed
41
  ### Training hyperparameters
42
 
43
  The following hyperparameters were used during training:
44
- - learning_rate: 5e-05
45
- - train_batch_size: 16
46
- - eval_batch_size: 16
47
  - seed: 42
48
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
49
  - lr_scheduler_type: linear
50
- - num_epochs: 4
51
 
52
  ### Training results
53
 
54
  | Training Loss | Epoch | Step | Validation Loss | Rouge1 | Rouge2 | Rougel | Rougelsum | Gen Len |
55
  |:-------------:|:-----:|:----:|:---------------:|:------:|:------:|:------:|:---------:|:-------:|
56
- | No log | 1.0 | 1 | nan | 0.04 | 0.0 | 0.04 | 0.04 | 18.4 |
57
- | No log | 2.0 | 2 | nan | 0.04 | 0.0 | 0.04 | 0.04 | 18.4 |
58
- | No log | 3.0 | 3 | nan | 0.04 | 0.0 | 0.04 | 0.04 | 18.4 |
59
- | No log | 4.0 | 4 | nan | 0.04 | 0.0 | 0.04 | 0.04 | 18.4 |
60
 
61
 
62
  ### Framework versions
 
17
 
18
  This model is a fine-tuned version of [google/flan-t5-small](https://huggingface.co/google/flan-t5-small) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 1.3339
21
+ - Rouge1: 0.3861
22
+ - Rouge2: 0.2867
23
+ - Rougel: 0.3878
24
+ - Rougelsum: 0.3898
25
+ - Gen Len: 13.16
26
 
27
  ## Model description
28
 
 
41
  ### Training hyperparameters
42
 
43
  The following hyperparameters were used during training:
44
+ - learning_rate: 4e-05
45
+ - train_batch_size: 2
46
+ - eval_batch_size: 2
47
  - seed: 42
48
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
49
  - lr_scheduler_type: linear
50
+ - num_epochs: 6
51
 
52
  ### Training results
53
 
54
  | Training Loss | Epoch | Step | Validation Loss | Rouge1 | Rouge2 | Rougel | Rougelsum | Gen Len |
55
  |:-------------:|:-----:|:----:|:---------------:|:------:|:------:|:------:|:---------:|:-------:|
56
+ | 1.9101 | 2.0 | 100 | 1.4032 | 0.3914 | 0.2858 | 0.3924 | 0.3934 | 12.7 |
57
+ | 1.4264 | 4.0 | 200 | 1.3399 | 0.3805 | 0.287 | 0.382 | 0.3841 | 12.68 |
58
+ | 1.3121 | 6.0 | 300 | 1.3339 | 0.3861 | 0.2867 | 0.3878 | 0.3898 | 13.16 |
 
59
 
60
 
61
  ### Framework versions
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f61e25eb29b9917691bb35646a92c3f398d03c9bf45976973ce7c3d5a5fa8fe0
3
  size 307910149
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:741144121518df3bcb91f5111ecc4fea4c3ddcb016d849203a9ef046779f8bdf
3
  size 307910149
trainer_state.json CHANGED
@@ -1,80 +1,85 @@
1
  {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 4.0,
5
- "eval_steps": 500,
6
- "global_step": 4,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 1.0,
13
- "eval_gen_len": 18.4,
14
- "eval_loss": NaN,
15
- "eval_rouge1": 0.04,
16
- "eval_rouge2": 0.0,
17
- "eval_rougeL": 0.04,
18
- "eval_rougeLsum": 0.04,
19
- "eval_runtime": 0.5268,
20
- "eval_samples_per_second": 18.984,
21
- "eval_steps_per_second": 1.898,
22
- "step": 1
23
  },
24
  {
25
  "epoch": 2.0,
26
- "eval_gen_len": 18.4,
27
- "eval_loss": NaN,
28
- "eval_rouge1": 0.04,
29
- "eval_rouge2": 0.0,
30
- "eval_rougeL": 0.04,
31
- "eval_rougeLsum": 0.04,
32
- "eval_runtime": 0.565,
33
- "eval_samples_per_second": 17.699,
34
- "eval_steps_per_second": 1.77,
35
- "step": 2
36
  },
37
  {
38
- "epoch": 3.0,
39
- "eval_gen_len": 18.4,
40
- "eval_loss": NaN,
41
- "eval_rouge1": 0.04,
42
- "eval_rouge2": 0.0,
43
- "eval_rougeL": 0.04,
44
- "eval_rougeLsum": 0.04,
45
- "eval_runtime": 0.542,
46
- "eval_samples_per_second": 18.45,
47
- "eval_steps_per_second": 1.845,
48
- "step": 3
49
  },
50
  {
51
  "epoch": 4.0,
52
- "eval_gen_len": 18.4,
53
- "eval_loss": NaN,
54
- "eval_rouge1": 0.04,
55
- "eval_rouge2": 0.0,
56
- "eval_rougeL": 0.04,
57
- "eval_rougeLsum": 0.04,
58
- "eval_runtime": 0.5202,
59
- "eval_samples_per_second": 19.222,
60
- "eval_steps_per_second": 1.922,
61
- "step": 4
62
  },
63
  {
64
- "epoch": 4.0,
65
- "step": 4,
66
- "total_flos": 7435620188160.0,
67
- "train_loss": 0.0,
68
- "train_runtime": 2.8469,
69
- "train_samples_per_second": 14.05,
70
- "train_steps_per_second": 1.405
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  }
72
  ],
73
- "logging_steps": 500,
74
- "max_steps": 4,
75
- "num_train_epochs": 4,
76
- "save_steps": 500,
77
- "total_flos": 7435620188160.0,
78
  "trial_name": null,
79
  "trial_params": null
80
  }
 
1
  {
2
+ "best_metric": 1.3398905992507935,
3
+ "best_model_checkpoint": "./trained_model/checkpoint-200",
4
+ "epoch": 6.0,
5
+ "eval_steps": 100,
6
+ "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 2.0,
13
+ "learning_rate": 2.6666666666666667e-05,
14
+ "loss": 1.9101,
15
+ "step": 100
 
 
 
 
 
 
 
16
  },
17
  {
18
  "epoch": 2.0,
19
+ "eval_gen_len": 12.7,
20
+ "eval_loss": 1.4032484292984009,
21
+ "eval_rouge1": 0.3914,
22
+ "eval_rouge2": 0.2858,
23
+ "eval_rougeL": 0.3924,
24
+ "eval_rougeLsum": 0.3934,
25
+ "eval_runtime": 14.4875,
26
+ "eval_samples_per_second": 6.903,
27
+ "eval_steps_per_second": 3.451,
28
+ "step": 100
29
  },
30
  {
31
+ "epoch": 4.0,
32
+ "learning_rate": 1.3333333333333333e-05,
33
+ "loss": 1.4264,
34
+ "step": 200
 
 
 
 
 
 
 
35
  },
36
  {
37
  "epoch": 4.0,
38
+ "eval_gen_len": 12.68,
39
+ "eval_loss": 1.3398905992507935,
40
+ "eval_rouge1": 0.3805,
41
+ "eval_rouge2": 0.287,
42
+ "eval_rougeL": 0.382,
43
+ "eval_rougeLsum": 0.3841,
44
+ "eval_runtime": 14.5729,
45
+ "eval_samples_per_second": 6.862,
46
+ "eval_steps_per_second": 3.431,
47
+ "step": 200
48
  },
49
  {
50
+ "epoch": 6.0,
51
+ "learning_rate": 0.0,
52
+ "loss": 1.3121,
53
+ "step": 300
54
+ },
55
+ {
56
+ "epoch": 6.0,
57
+ "eval_gen_len": 13.16,
58
+ "eval_loss": 1.3339253664016724,
59
+ "eval_rouge1": 0.3861,
60
+ "eval_rouge2": 0.2867,
61
+ "eval_rougeL": 0.3878,
62
+ "eval_rougeLsum": 0.3898,
63
+ "eval_runtime": 15.3545,
64
+ "eval_samples_per_second": 6.513,
65
+ "eval_steps_per_second": 3.256,
66
+ "step": 300
67
+ },
68
+ {
69
+ "epoch": 6.0,
70
+ "step": 300,
71
+ "total_flos": 111534302822400.0,
72
+ "train_loss": 1.5495406595865886,
73
+ "train_runtime": 74.9004,
74
+ "train_samples_per_second": 8.011,
75
+ "train_steps_per_second": 4.005
76
  }
77
  ],
78
+ "logging_steps": 100,
79
+ "max_steps": 300,
80
+ "num_train_epochs": 6,
81
+ "save_steps": 200,
82
+ "total_flos": 111534302822400.0,
83
  "trial_name": null,
84
  "trial_params": null
85
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c3058916ffeb55eb6dddcb4252c3c0b496033bbd44e5e22a2047f428bc39317
3
- size 4155
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9da472b08deaef8c8414c7727a31d6ee03fb06b4f3e229ea402bdd50ebbc5835
3
+ size 4219