Ronaldodev commited on
Commit
597f332
1 Parent(s): ec3e334

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ---
3
+ tags:
4
+ - autotrain
5
+ - text2text-generation
6
+ base_model: Helsinki-NLP/opus-mt-fr-en
7
+ widget:
8
+ - text: "I love AutoTrain"
9
+ ---
10
+
11
+ # Model Trained Using AutoTrain
12
+
13
+ - Problem type: Seq2Seq
14
+
15
+ ## Validation Metrics
16
+ loss: 2.07828688621521
17
+
18
+ rouge1: 37.8388
19
+
20
+ rouge2: 19.8532
21
+
22
+ rougeL: 37.3344
23
+
24
+ rougeLsum: 37.4253
25
+
26
+ gen_len: 11.7958
27
+
28
+ runtime: 388.1088
29
+
30
+ samples_per_second: 1.224
31
+
32
+ steps_per_second: 0.307
33
+
34
+ : 10.0
checkpoint-4745/config.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Helsinki-NLP/opus-mt-fr-en",
3
+ "_num_labels": 3,
4
+ "activation_dropout": 0.0,
5
+ "activation_function": "swish",
6
+ "add_bias_logits": false,
7
+ "add_final_layer_norm": false,
8
+ "architectures": [
9
+ "MarianMTModel"
10
+ ],
11
+ "attention_dropout": 0.0,
12
+ "bad_words_ids": [
13
+ [
14
+ 59513
15
+ ]
16
+ ],
17
+ "bos_token_id": 0,
18
+ "classif_dropout": 0.0,
19
+ "classifier_dropout": 0.0,
20
+ "d_model": 512,
21
+ "decoder_attention_heads": 8,
22
+ "decoder_ffn_dim": 2048,
23
+ "decoder_layerdrop": 0.0,
24
+ "decoder_layers": 6,
25
+ "decoder_start_token_id": 59513,
26
+ "decoder_vocab_size": 59514,
27
+ "dropout": 0.1,
28
+ "encoder_attention_heads": 8,
29
+ "encoder_ffn_dim": 2048,
30
+ "encoder_layerdrop": 0.0,
31
+ "encoder_layers": 6,
32
+ "eos_token_id": 0,
33
+ "forced_eos_token_id": 0,
34
+ "gradient_checkpointing": false,
35
+ "id2label": {
36
+ "0": "LABEL_0",
37
+ "1": "LABEL_1",
38
+ "2": "LABEL_2"
39
+ },
40
+ "init_std": 0.02,
41
+ "is_encoder_decoder": true,
42
+ "label2id": {
43
+ "LABEL_0": 0,
44
+ "LABEL_1": 1,
45
+ "LABEL_2": 2
46
+ },
47
+ "max_length": 512,
48
+ "max_position_embeddings": 512,
49
+ "model_type": "marian",
50
+ "normalize_before": false,
51
+ "normalize_embedding": false,
52
+ "num_beams": 4,
53
+ "num_hidden_layers": 6,
54
+ "pad_token_id": 59513,
55
+ "scale_embedding": true,
56
+ "share_encoder_decoder_embeddings": true,
57
+ "static_position_embeddings": true,
58
+ "torch_dtype": "float32",
59
+ "transformers_version": "4.44.2",
60
+ "use_cache": false,
61
+ "vocab_size": 59514
62
+ }
checkpoint-4745/generation_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bad_words_ids": [
3
+ [
4
+ 59513
5
+ ]
6
+ ],
7
+ "bos_token_id": 0,
8
+ "decoder_start_token_id": 59513,
9
+ "eos_token_id": 0,
10
+ "forced_eos_token_id": 0,
11
+ "max_length": 512,
12
+ "num_beams": 4,
13
+ "pad_token_id": 59513,
14
+ "renormalize_logits": true,
15
+ "transformers_version": "4.44.2"
16
+ }
checkpoint-4745/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:244221195120bb40e78229e4ec91ea2626ebc10293ea3df2e110120457c4e07a
3
+ size 298705768
checkpoint-4745/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:786e5e8787ead2fe449c9f372dfe3996243f3099dfcced0fd47ea37fb7673fc5
3
+ size 597081786
checkpoint-4745/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fd499ec4176e73814cc7276d32ed047cf9dba0faa65457f0bdab11c90a521b6
3
+ size 13990
checkpoint-4745/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42158f52ca919e4f84f8dda8ebf6a418aa3544991867273f27053ca96a464d97
3
+ size 1064
checkpoint-4745/source.spm ADDED
Binary file (802 kB). View file
 
checkpoint-4745/special_tokens_map.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "eos_token": "</s>",
3
+ "pad_token": "<pad>",
4
+ "unk_token": "<unk>"
5
+ }
checkpoint-4745/target.spm ADDED
Binary file (778 kB). View file
 
checkpoint-4745/tokenizer_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "</s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<unk>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "59513": {
20
+ "content": "<pad>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ }
27
+ },
28
+ "clean_up_tokenization_spaces": true,
29
+ "eos_token": "</s>",
30
+ "model_max_length": 512,
31
+ "pad_token": "<pad>",
32
+ "separate_vocabs": false,
33
+ "source_lang": "fr",
34
+ "sp_model_kwargs": {},
35
+ "target_lang": "en",
36
+ "tokenizer_class": "MarianTokenizer",
37
+ "unk_token": "<unk>"
38
+ }
checkpoint-4745/trainer_state.json ADDED
@@ -0,0 +1,1430 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 2.07828688621521,
3
+ "best_model_checkpoint": "fr-mina/checkpoint-4745",
4
+ "epoch": 5.0,
5
+ "eval_steps": 500,
6
+ "global_step": 4745,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.026343519494204427,
13
+ "grad_norm": 53.76327133178711,
14
+ "learning_rate": 4.390586582367405e-07,
15
+ "loss": 8.0275,
16
+ "step": 25
17
+ },
18
+ {
19
+ "epoch": 0.05268703898840885,
20
+ "grad_norm": 31.924686431884766,
21
+ "learning_rate": 8.78117316473481e-07,
22
+ "loss": 8.0678,
23
+ "step": 50
24
+ },
25
+ {
26
+ "epoch": 0.07903055848261328,
27
+ "grad_norm": 39.08976745605469,
28
+ "learning_rate": 1.3171759747102214e-06,
29
+ "loss": 7.683,
30
+ "step": 75
31
+ },
32
+ {
33
+ "epoch": 0.1053740779768177,
34
+ "grad_norm": 35.28089904785156,
35
+ "learning_rate": 1.756234632946962e-06,
36
+ "loss": 7.5655,
37
+ "step": 100
38
+ },
39
+ {
40
+ "epoch": 0.13171759747102213,
41
+ "grad_norm": 26.91787338256836,
42
+ "learning_rate": 2.1952932911837023e-06,
43
+ "loss": 7.4298,
44
+ "step": 125
45
+ },
46
+ {
47
+ "epoch": 0.15806111696522657,
48
+ "grad_norm": 36.86225128173828,
49
+ "learning_rate": 2.634351949420443e-06,
50
+ "loss": 7.1249,
51
+ "step": 150
52
+ },
53
+ {
54
+ "epoch": 0.18440463645943098,
55
+ "grad_norm": 35.9243278503418,
56
+ "learning_rate": 3.073410607657183e-06,
57
+ "loss": 6.6859,
58
+ "step": 175
59
+ },
60
+ {
61
+ "epoch": 0.2107481559536354,
62
+ "grad_norm": 31.12093734741211,
63
+ "learning_rate": 3.512469265893924e-06,
64
+ "loss": 6.5787,
65
+ "step": 200
66
+ },
67
+ {
68
+ "epoch": 0.23709167544783982,
69
+ "grad_norm": 21.474767684936523,
70
+ "learning_rate": 3.951527924130664e-06,
71
+ "loss": 6.3458,
72
+ "step": 225
73
+ },
74
+ {
75
+ "epoch": 0.26343519494204426,
76
+ "grad_norm": 29.76708984375,
77
+ "learning_rate": 4.3905865823674045e-06,
78
+ "loss": 6.2686,
79
+ "step": 250
80
+ },
81
+ {
82
+ "epoch": 0.2897787144362487,
83
+ "grad_norm": 26.34191131591797,
84
+ "learning_rate": 4.829645240604145e-06,
85
+ "loss": 5.9099,
86
+ "step": 275
87
+ },
88
+ {
89
+ "epoch": 0.31612223393045313,
90
+ "grad_norm": 25.640949249267578,
91
+ "learning_rate": 5.268703898840886e-06,
92
+ "loss": 5.1909,
93
+ "step": 300
94
+ },
95
+ {
96
+ "epoch": 0.3424657534246575,
97
+ "grad_norm": 28.177034378051758,
98
+ "learning_rate": 5.707762557077626e-06,
99
+ "loss": 5.6095,
100
+ "step": 325
101
+ },
102
+ {
103
+ "epoch": 0.36880927291886195,
104
+ "grad_norm": 31.351806640625,
105
+ "learning_rate": 6.146821215314366e-06,
106
+ "loss": 5.356,
107
+ "step": 350
108
+ },
109
+ {
110
+ "epoch": 0.3951527924130664,
111
+ "grad_norm": 31.123451232910156,
112
+ "learning_rate": 6.585879873551107e-06,
113
+ "loss": 5.3215,
114
+ "step": 375
115
+ },
116
+ {
117
+ "epoch": 0.4214963119072708,
118
+ "grad_norm": 25.988691329956055,
119
+ "learning_rate": 7.024938531787848e-06,
120
+ "loss": 4.9161,
121
+ "step": 400
122
+ },
123
+ {
124
+ "epoch": 0.44783983140147526,
125
+ "grad_norm": 20.850446701049805,
126
+ "learning_rate": 7.463997190024588e-06,
127
+ "loss": 5.0717,
128
+ "step": 425
129
+ },
130
+ {
131
+ "epoch": 0.47418335089567965,
132
+ "grad_norm": 28.87261962890625,
133
+ "learning_rate": 7.903055848261329e-06,
134
+ "loss": 4.7932,
135
+ "step": 450
136
+ },
137
+ {
138
+ "epoch": 0.5005268703898841,
139
+ "grad_norm": 39.34148406982422,
140
+ "learning_rate": 8.342114506498068e-06,
141
+ "loss": 4.9443,
142
+ "step": 475
143
+ },
144
+ {
145
+ "epoch": 0.5268703898840885,
146
+ "grad_norm": 24.568782806396484,
147
+ "learning_rate": 8.781173164734809e-06,
148
+ "loss": 4.6028,
149
+ "step": 500
150
+ },
151
+ {
152
+ "epoch": 0.553213909378293,
153
+ "grad_norm": 38.465816497802734,
154
+ "learning_rate": 9.22023182297155e-06,
155
+ "loss": 4.2893,
156
+ "step": 525
157
+ },
158
+ {
159
+ "epoch": 0.5795574288724974,
160
+ "grad_norm": 22.170591354370117,
161
+ "learning_rate": 9.65929048120829e-06,
162
+ "loss": 4.3665,
163
+ "step": 550
164
+ },
165
+ {
166
+ "epoch": 0.6059009483667018,
167
+ "grad_norm": 25.342151641845703,
168
+ "learning_rate": 1.009834913944503e-05,
169
+ "loss": 4.4401,
170
+ "step": 575
171
+ },
172
+ {
173
+ "epoch": 0.6322444678609063,
174
+ "grad_norm": 29.228736877441406,
175
+ "learning_rate": 1.0537407797681771e-05,
176
+ "loss": 4.0221,
177
+ "step": 600
178
+ },
179
+ {
180
+ "epoch": 0.6585879873551106,
181
+ "grad_norm": 26.896635055541992,
182
+ "learning_rate": 1.0976466455918512e-05,
183
+ "loss": 3.9198,
184
+ "step": 625
185
+ },
186
+ {
187
+ "epoch": 0.684931506849315,
188
+ "grad_norm": 22.59519386291504,
189
+ "learning_rate": 1.1415525114155251e-05,
190
+ "loss": 3.9569,
191
+ "step": 650
192
+ },
193
+ {
194
+ "epoch": 0.7112750263435195,
195
+ "grad_norm": 27.328128814697266,
196
+ "learning_rate": 1.1854583772391992e-05,
197
+ "loss": 3.7594,
198
+ "step": 675
199
+ },
200
+ {
201
+ "epoch": 0.7376185458377239,
202
+ "grad_norm": 28.952634811401367,
203
+ "learning_rate": 1.2293642430628732e-05,
204
+ "loss": 3.6405,
205
+ "step": 700
206
+ },
207
+ {
208
+ "epoch": 0.7639620653319283,
209
+ "grad_norm": 34.70003890991211,
210
+ "learning_rate": 1.2732701088865473e-05,
211
+ "loss": 3.5767,
212
+ "step": 725
213
+ },
214
+ {
215
+ "epoch": 0.7903055848261328,
216
+ "grad_norm": 31.843168258666992,
217
+ "learning_rate": 1.3171759747102214e-05,
218
+ "loss": 3.424,
219
+ "step": 750
220
+ },
221
+ {
222
+ "epoch": 0.8166491043203372,
223
+ "grad_norm": 43.431888580322266,
224
+ "learning_rate": 1.3610818405338955e-05,
225
+ "loss": 3.6013,
226
+ "step": 775
227
+ },
228
+ {
229
+ "epoch": 0.8429926238145417,
230
+ "grad_norm": 24.29863929748535,
231
+ "learning_rate": 1.4049877063575696e-05,
232
+ "loss": 3.6026,
233
+ "step": 800
234
+ },
235
+ {
236
+ "epoch": 0.8693361433087461,
237
+ "grad_norm": 18.842683792114258,
238
+ "learning_rate": 1.4488935721812435e-05,
239
+ "loss": 3.2906,
240
+ "step": 825
241
+ },
242
+ {
243
+ "epoch": 0.8956796628029505,
244
+ "grad_norm": 24.327327728271484,
245
+ "learning_rate": 1.4927994380049176e-05,
246
+ "loss": 3.3199,
247
+ "step": 850
248
+ },
249
+ {
250
+ "epoch": 0.9220231822971549,
251
+ "grad_norm": 34.937644958496094,
252
+ "learning_rate": 1.536705303828592e-05,
253
+ "loss": 3.3293,
254
+ "step": 875
255
+ },
256
+ {
257
+ "epoch": 0.9483667017913593,
258
+ "grad_norm": 20.94723129272461,
259
+ "learning_rate": 1.5806111696522658e-05,
260
+ "loss": 3.061,
261
+ "step": 900
262
+ },
263
+ {
264
+ "epoch": 0.9747102212855637,
265
+ "grad_norm": 36.616371154785156,
266
+ "learning_rate": 1.6245170354759397e-05,
267
+ "loss": 3.3562,
268
+ "step": 925
269
+ },
270
+ {
271
+ "epoch": 1.0,
272
+ "eval_gen_len": 29.7495,
273
+ "eval_loss": 3.1300747394561768,
274
+ "eval_rouge1": 14.3649,
275
+ "eval_rouge2": 3.2092,
276
+ "eval_rougeL": 14.1859,
277
+ "eval_rougeLsum": 14.1403,
278
+ "eval_runtime": 1240.6646,
279
+ "eval_samples_per_second": 0.383,
280
+ "eval_steps_per_second": 0.096,
281
+ "step": 949
282
+ },
283
+ {
284
+ "epoch": 1.0010537407797682,
285
+ "grad_norm": 26.46944236755371,
286
+ "learning_rate": 1.6684229012996136e-05,
287
+ "loss": 3.2056,
288
+ "step": 950
289
+ },
290
+ {
291
+ "epoch": 1.0273972602739727,
292
+ "grad_norm": 24.677677154541016,
293
+ "learning_rate": 1.7123287671232875e-05,
294
+ "loss": 3.0972,
295
+ "step": 975
296
+ },
297
+ {
298
+ "epoch": 1.053740779768177,
299
+ "grad_norm": 44.66987609863281,
300
+ "learning_rate": 1.7562346329469618e-05,
301
+ "loss": 3.1316,
302
+ "step": 1000
303
+ },
304
+ {
305
+ "epoch": 1.0800842992623814,
306
+ "grad_norm": 22.583946228027344,
307
+ "learning_rate": 1.8001404987706357e-05,
308
+ "loss": 2.9408,
309
+ "step": 1025
310
+ },
311
+ {
312
+ "epoch": 1.106427818756586,
313
+ "grad_norm": 37.008148193359375,
314
+ "learning_rate": 1.84404636459431e-05,
315
+ "loss": 3.3225,
316
+ "step": 1050
317
+ },
318
+ {
319
+ "epoch": 1.1327713382507902,
320
+ "grad_norm": 31.299516677856445,
321
+ "learning_rate": 1.887952230417984e-05,
322
+ "loss": 2.8608,
323
+ "step": 1075
324
+ },
325
+ {
326
+ "epoch": 1.1591148577449948,
327
+ "grad_norm": 41.19516372680664,
328
+ "learning_rate": 1.931858096241658e-05,
329
+ "loss": 2.9428,
330
+ "step": 1100
331
+ },
332
+ {
333
+ "epoch": 1.1854583772391991,
334
+ "grad_norm": 36.30160903930664,
335
+ "learning_rate": 1.975763962065332e-05,
336
+ "loss": 3.0541,
337
+ "step": 1125
338
+ },
339
+ {
340
+ "epoch": 1.2118018967334037,
341
+ "grad_norm": 29.6497859954834,
342
+ "learning_rate": 2.019669827889006e-05,
343
+ "loss": 3.0771,
344
+ "step": 1150
345
+ },
346
+ {
347
+ "epoch": 1.238145416227608,
348
+ "grad_norm": 28.226837158203125,
349
+ "learning_rate": 2.0635756937126803e-05,
350
+ "loss": 2.8116,
351
+ "step": 1175
352
+ },
353
+ {
354
+ "epoch": 1.2644889357218125,
355
+ "grad_norm": 37.17683029174805,
356
+ "learning_rate": 2.1074815595363542e-05,
357
+ "loss": 2.709,
358
+ "step": 1200
359
+ },
360
+ {
361
+ "epoch": 1.2908324552160169,
362
+ "grad_norm": 24.86113929748535,
363
+ "learning_rate": 2.1513874253600282e-05,
364
+ "loss": 2.8454,
365
+ "step": 1225
366
+ },
367
+ {
368
+ "epoch": 1.3171759747102212,
369
+ "grad_norm": 20.32333755493164,
370
+ "learning_rate": 2.1952932911837024e-05,
371
+ "loss": 3.0137,
372
+ "step": 1250
373
+ },
374
+ {
375
+ "epoch": 1.3435194942044257,
376
+ "grad_norm": 20.579383850097656,
377
+ "learning_rate": 2.2391991570073764e-05,
378
+ "loss": 3.064,
379
+ "step": 1275
380
+ },
381
+ {
382
+ "epoch": 1.36986301369863,
383
+ "grad_norm": 34.56845474243164,
384
+ "learning_rate": 2.2831050228310503e-05,
385
+ "loss": 2.7602,
386
+ "step": 1300
387
+ },
388
+ {
389
+ "epoch": 1.3962065331928346,
390
+ "grad_norm": 26.004846572875977,
391
+ "learning_rate": 2.3270108886547242e-05,
392
+ "loss": 2.6513,
393
+ "step": 1325
394
+ },
395
+ {
396
+ "epoch": 1.422550052687039,
397
+ "grad_norm": 27.322162628173828,
398
+ "learning_rate": 2.3709167544783985e-05,
399
+ "loss": 3.0455,
400
+ "step": 1350
401
+ },
402
+ {
403
+ "epoch": 1.4488935721812435,
404
+ "grad_norm": 25.812335968017578,
405
+ "learning_rate": 2.4148226203020724e-05,
406
+ "loss": 2.6421,
407
+ "step": 1375
408
+ },
409
+ {
410
+ "epoch": 1.4752370916754478,
411
+ "grad_norm": 22.53229522705078,
412
+ "learning_rate": 2.4587284861257463e-05,
413
+ "loss": 2.6565,
414
+ "step": 1400
415
+ },
416
+ {
417
+ "epoch": 1.5015806111696524,
418
+ "grad_norm": 21.36598777770996,
419
+ "learning_rate": 2.5026343519494206e-05,
420
+ "loss": 2.8773,
421
+ "step": 1425
422
+ },
423
+ {
424
+ "epoch": 1.5279241306638567,
425
+ "grad_norm": 21.852357864379883,
426
+ "learning_rate": 2.5465402177730945e-05,
427
+ "loss": 2.5911,
428
+ "step": 1450
429
+ },
430
+ {
431
+ "epoch": 1.554267650158061,
432
+ "grad_norm": 24.827373504638672,
433
+ "learning_rate": 2.5904460835967688e-05,
434
+ "loss": 2.8661,
435
+ "step": 1475
436
+ },
437
+ {
438
+ "epoch": 1.5806111696522656,
439
+ "grad_norm": 26.9720458984375,
440
+ "learning_rate": 2.6343519494204427e-05,
441
+ "loss": 2.6055,
442
+ "step": 1500
443
+ },
444
+ {
445
+ "epoch": 1.60695468914647,
446
+ "grad_norm": 23.86491584777832,
447
+ "learning_rate": 2.6782578152441166e-05,
448
+ "loss": 2.4583,
449
+ "step": 1525
450
+ },
451
+ {
452
+ "epoch": 1.6332982086406744,
453
+ "grad_norm": 35.060543060302734,
454
+ "learning_rate": 2.722163681067791e-05,
455
+ "loss": 2.7086,
456
+ "step": 1550
457
+ },
458
+ {
459
+ "epoch": 1.6596417281348788,
460
+ "grad_norm": 33.904869079589844,
461
+ "learning_rate": 2.766069546891465e-05,
462
+ "loss": 3.0039,
463
+ "step": 1575
464
+ },
465
+ {
466
+ "epoch": 1.685985247629083,
467
+ "grad_norm": 16.747236251831055,
468
+ "learning_rate": 2.809975412715139e-05,
469
+ "loss": 2.7121,
470
+ "step": 1600
471
+ },
472
+ {
473
+ "epoch": 1.7123287671232876,
474
+ "grad_norm": 29.914405822753906,
475
+ "learning_rate": 2.853881278538813e-05,
476
+ "loss": 2.4394,
477
+ "step": 1625
478
+ },
479
+ {
480
+ "epoch": 1.7386722866174922,
481
+ "grad_norm": 34.94831848144531,
482
+ "learning_rate": 2.897787144362487e-05,
483
+ "loss": 2.2323,
484
+ "step": 1650
485
+ },
486
+ {
487
+ "epoch": 1.7650158061116965,
488
+ "grad_norm": 23.3820743560791,
489
+ "learning_rate": 2.9416930101861612e-05,
490
+ "loss": 2.2026,
491
+ "step": 1675
492
+ },
493
+ {
494
+ "epoch": 1.7913593256059008,
495
+ "grad_norm": 23.368724822998047,
496
+ "learning_rate": 2.985598876009835e-05,
497
+ "loss": 2.3719,
498
+ "step": 1700
499
+ },
500
+ {
501
+ "epoch": 1.8177028451001054,
502
+ "grad_norm": 23.903703689575195,
503
+ "learning_rate": 3.0295047418335094e-05,
504
+ "loss": 2.6685,
505
+ "step": 1725
506
+ },
507
+ {
508
+ "epoch": 1.84404636459431,
509
+ "grad_norm": 28.905014038085938,
510
+ "learning_rate": 3.073410607657184e-05,
511
+ "loss": 2.798,
512
+ "step": 1750
513
+ },
514
+ {
515
+ "epoch": 1.8703898840885143,
516
+ "grad_norm": 37.410953521728516,
517
+ "learning_rate": 3.117316473480857e-05,
518
+ "loss": 2.8655,
519
+ "step": 1775
520
+ },
521
+ {
522
+ "epoch": 1.8967334035827186,
523
+ "grad_norm": 30.735639572143555,
524
+ "learning_rate": 3.1612223393045315e-05,
525
+ "loss": 2.4698,
526
+ "step": 1800
527
+ },
528
+ {
529
+ "epoch": 1.9230769230769231,
530
+ "grad_norm": 17.455341339111328,
531
+ "learning_rate": 3.205128205128206e-05,
532
+ "loss": 2.5119,
533
+ "step": 1825
534
+ },
535
+ {
536
+ "epoch": 1.9494204425711275,
537
+ "grad_norm": 34.35090255737305,
538
+ "learning_rate": 3.2490340709518794e-05,
539
+ "loss": 2.6294,
540
+ "step": 1850
541
+ },
542
+ {
543
+ "epoch": 1.975763962065332,
544
+ "grad_norm": 22.377056121826172,
545
+ "learning_rate": 3.292939936775553e-05,
546
+ "loss": 2.5728,
547
+ "step": 1875
548
+ },
549
+ {
550
+ "epoch": 2.0,
551
+ "eval_gen_len": 8.5832,
552
+ "eval_loss": 2.4636101722717285,
553
+ "eval_rouge1": 25.9213,
554
+ "eval_rouge2": 9.8991,
555
+ "eval_rougeL": 25.4651,
556
+ "eval_rougeLsum": 25.4305,
557
+ "eval_runtime": 181.2174,
558
+ "eval_samples_per_second": 2.621,
559
+ "eval_steps_per_second": 0.657,
560
+ "step": 1898
561
+ },
562
+ {
563
+ "epoch": 2.0021074815595363,
564
+ "grad_norm": 24.795652389526367,
565
+ "learning_rate": 3.336845802599227e-05,
566
+ "loss": 2.0971,
567
+ "step": 1900
568
+ },
569
+ {
570
+ "epoch": 2.0284510010537407,
571
+ "grad_norm": 42.50971984863281,
572
+ "learning_rate": 3.3807516684229015e-05,
573
+ "loss": 2.2645,
574
+ "step": 1925
575
+ },
576
+ {
577
+ "epoch": 2.0547945205479454,
578
+ "grad_norm": 28.93120765686035,
579
+ "learning_rate": 3.424657534246575e-05,
580
+ "loss": 2.2754,
581
+ "step": 1950
582
+ },
583
+ {
584
+ "epoch": 2.0811380400421498,
585
+ "grad_norm": 32.35176467895508,
586
+ "learning_rate": 3.4685634000702494e-05,
587
+ "loss": 1.9605,
588
+ "step": 1975
589
+ },
590
+ {
591
+ "epoch": 2.107481559536354,
592
+ "grad_norm": 20.049243927001953,
593
+ "learning_rate": 3.5124692658939236e-05,
594
+ "loss": 1.985,
595
+ "step": 2000
596
+ },
597
+ {
598
+ "epoch": 2.1338250790305584,
599
+ "grad_norm": 21.458831787109375,
600
+ "learning_rate": 3.556375131717597e-05,
601
+ "loss": 1.9294,
602
+ "step": 2025
603
+ },
604
+ {
605
+ "epoch": 2.1601685985247627,
606
+ "grad_norm": 29.076791763305664,
607
+ "learning_rate": 3.6002809975412715e-05,
608
+ "loss": 2.5018,
609
+ "step": 2050
610
+ },
611
+ {
612
+ "epoch": 2.1865121180189675,
613
+ "grad_norm": 17.459365844726562,
614
+ "learning_rate": 3.644186863364946e-05,
615
+ "loss": 2.2102,
616
+ "step": 2075
617
+ },
618
+ {
619
+ "epoch": 2.212855637513172,
620
+ "grad_norm": 35.115997314453125,
621
+ "learning_rate": 3.68809272918862e-05,
622
+ "loss": 2.2814,
623
+ "step": 2100
624
+ },
625
+ {
626
+ "epoch": 2.239199157007376,
627
+ "grad_norm": 20.998336791992188,
628
+ "learning_rate": 3.7319985950122936e-05,
629
+ "loss": 2.1817,
630
+ "step": 2125
631
+ },
632
+ {
633
+ "epoch": 2.2655426765015805,
634
+ "grad_norm": 24.086315155029297,
635
+ "learning_rate": 3.775904460835968e-05,
636
+ "loss": 2.3293,
637
+ "step": 2150
638
+ },
639
+ {
640
+ "epoch": 2.291886195995785,
641
+ "grad_norm": 15.898116111755371,
642
+ "learning_rate": 3.819810326659642e-05,
643
+ "loss": 2.3658,
644
+ "step": 2175
645
+ },
646
+ {
647
+ "epoch": 2.3182297154899896,
648
+ "grad_norm": 44.00349807739258,
649
+ "learning_rate": 3.863716192483316e-05,
650
+ "loss": 2.2536,
651
+ "step": 2200
652
+ },
653
+ {
654
+ "epoch": 2.344573234984194,
655
+ "grad_norm": 30.016952514648438,
656
+ "learning_rate": 3.90762205830699e-05,
657
+ "loss": 2.1975,
658
+ "step": 2225
659
+ },
660
+ {
661
+ "epoch": 2.3709167544783982,
662
+ "grad_norm": 29.138978958129883,
663
+ "learning_rate": 3.951527924130664e-05,
664
+ "loss": 2.1086,
665
+ "step": 2250
666
+ },
667
+ {
668
+ "epoch": 2.3972602739726026,
669
+ "grad_norm": 22.48296546936035,
670
+ "learning_rate": 3.995433789954338e-05,
671
+ "loss": 1.8731,
672
+ "step": 2275
673
+ },
674
+ {
675
+ "epoch": 2.4236037934668073,
676
+ "grad_norm": 38.969642639160156,
677
+ "learning_rate": 4.039339655778012e-05,
678
+ "loss": 2.4843,
679
+ "step": 2300
680
+ },
681
+ {
682
+ "epoch": 2.4499473129610116,
683
+ "grad_norm": 46.12045669555664,
684
+ "learning_rate": 4.0832455216016864e-05,
685
+ "loss": 2.0695,
686
+ "step": 2325
687
+ },
688
+ {
689
+ "epoch": 2.476290832455216,
690
+ "grad_norm": 16.667869567871094,
691
+ "learning_rate": 4.1271513874253606e-05,
692
+ "loss": 2.0357,
693
+ "step": 2350
694
+ },
695
+ {
696
+ "epoch": 2.5026343519494203,
697
+ "grad_norm": 28.602094650268555,
698
+ "learning_rate": 4.171057253249034e-05,
699
+ "loss": 1.9143,
700
+ "step": 2375
701
+ },
702
+ {
703
+ "epoch": 2.528977871443625,
704
+ "grad_norm": 21.20049476623535,
705
+ "learning_rate": 4.2149631190727085e-05,
706
+ "loss": 2.329,
707
+ "step": 2400
708
+ },
709
+ {
710
+ "epoch": 2.5553213909378294,
711
+ "grad_norm": 24.602462768554688,
712
+ "learning_rate": 4.258868984896383e-05,
713
+ "loss": 1.9732,
714
+ "step": 2425
715
+ },
716
+ {
717
+ "epoch": 2.5816649104320337,
718
+ "grad_norm": 41.749755859375,
719
+ "learning_rate": 4.3027748507200563e-05,
720
+ "loss": 2.3922,
721
+ "step": 2450
722
+ },
723
+ {
724
+ "epoch": 2.608008429926238,
725
+ "grad_norm": 29.759376525878906,
726
+ "learning_rate": 4.3466807165437306e-05,
727
+ "loss": 2.1928,
728
+ "step": 2475
729
+ },
730
+ {
731
+ "epoch": 2.6343519494204424,
732
+ "grad_norm": 27.628883361816406,
733
+ "learning_rate": 4.390586582367405e-05,
734
+ "loss": 2.6254,
735
+ "step": 2500
736
+ },
737
+ {
738
+ "epoch": 2.660695468914647,
739
+ "grad_norm": 20.805973052978516,
740
+ "learning_rate": 4.4344924481910785e-05,
741
+ "loss": 1.937,
742
+ "step": 2525
743
+ },
744
+ {
745
+ "epoch": 2.6870389884088515,
746
+ "grad_norm": 24.72698974609375,
747
+ "learning_rate": 4.478398314014753e-05,
748
+ "loss": 1.9697,
749
+ "step": 2550
750
+ },
751
+ {
752
+ "epoch": 2.713382507903056,
753
+ "grad_norm": 32.659202575683594,
754
+ "learning_rate": 4.522304179838426e-05,
755
+ "loss": 1.945,
756
+ "step": 2575
757
+ },
758
+ {
759
+ "epoch": 2.73972602739726,
760
+ "grad_norm": 30.338882446289062,
761
+ "learning_rate": 4.5662100456621006e-05,
762
+ "loss": 2.3917,
763
+ "step": 2600
764
+ },
765
+ {
766
+ "epoch": 2.7660695468914644,
767
+ "grad_norm": 24.97830581665039,
768
+ "learning_rate": 4.610115911485774e-05,
769
+ "loss": 2.3004,
770
+ "step": 2625
771
+ },
772
+ {
773
+ "epoch": 2.792413066385669,
774
+ "grad_norm": 20.519182205200195,
775
+ "learning_rate": 4.6540217773094484e-05,
776
+ "loss": 2.0284,
777
+ "step": 2650
778
+ },
779
+ {
780
+ "epoch": 2.8187565858798735,
781
+ "grad_norm": 18.327741622924805,
782
+ "learning_rate": 4.697927643133123e-05,
783
+ "loss": 1.8822,
784
+ "step": 2675
785
+ },
786
+ {
787
+ "epoch": 2.845100105374078,
788
+ "grad_norm": 28.89052963256836,
789
+ "learning_rate": 4.741833508956797e-05,
790
+ "loss": 2.3916,
791
+ "step": 2700
792
+ },
793
+ {
794
+ "epoch": 2.8714436248682826,
795
+ "grad_norm": 17.761756896972656,
796
+ "learning_rate": 4.7857393747804706e-05,
797
+ "loss": 2.1041,
798
+ "step": 2725
799
+ },
800
+ {
801
+ "epoch": 2.897787144362487,
802
+ "grad_norm": 16.94314193725586,
803
+ "learning_rate": 4.829645240604145e-05,
804
+ "loss": 2.0099,
805
+ "step": 2750
806
+ },
807
+ {
808
+ "epoch": 2.9241306638566913,
809
+ "grad_norm": 28.35548973083496,
810
+ "learning_rate": 4.873551106427819e-05,
811
+ "loss": 1.9854,
812
+ "step": 2775
813
+ },
814
+ {
815
+ "epoch": 2.9504741833508956,
816
+ "grad_norm": 20.59808921813965,
817
+ "learning_rate": 4.917456972251493e-05,
818
+ "loss": 2.0036,
819
+ "step": 2800
820
+ },
821
+ {
822
+ "epoch": 2.9768177028451,
823
+ "grad_norm": 24.746854782104492,
824
+ "learning_rate": 4.961362838075167e-05,
825
+ "loss": 2.0894,
826
+ "step": 2825
827
+ },
828
+ {
829
+ "epoch": 3.0,
830
+ "eval_gen_len": 18.1389,
831
+ "eval_loss": 2.2465784549713135,
832
+ "eval_rouge1": 27.1383,
833
+ "eval_rouge2": 12.4631,
834
+ "eval_rougeL": 26.7601,
835
+ "eval_rougeLsum": 26.8419,
836
+ "eval_runtime": 963.3991,
837
+ "eval_samples_per_second": 0.493,
838
+ "eval_steps_per_second": 0.124,
839
+ "step": 2847
840
+ },
841
+ {
842
+ "epoch": 3.0031612223393047,
843
+ "grad_norm": 15.351967811584473,
844
+ "learning_rate": 4.9994145884556845e-05,
845
+ "loss": 1.7484,
846
+ "step": 2850
847
+ },
848
+ {
849
+ "epoch": 3.029504741833509,
850
+ "grad_norm": 12.023371696472168,
851
+ "learning_rate": 4.994536158919721e-05,
852
+ "loss": 1.6976,
853
+ "step": 2875
854
+ },
855
+ {
856
+ "epoch": 3.0558482613277134,
857
+ "grad_norm": 16.859472274780273,
858
+ "learning_rate": 4.989657729383757e-05,
859
+ "loss": 1.5859,
860
+ "step": 2900
861
+ },
862
+ {
863
+ "epoch": 3.0821917808219177,
864
+ "grad_norm": 11.666887283325195,
865
+ "learning_rate": 4.984779299847793e-05,
866
+ "loss": 1.518,
867
+ "step": 2925
868
+ },
869
+ {
870
+ "epoch": 3.108535300316122,
871
+ "grad_norm": 16.126272201538086,
872
+ "learning_rate": 4.979900870311829e-05,
873
+ "loss": 1.6562,
874
+ "step": 2950
875
+ },
876
+ {
877
+ "epoch": 3.134878819810327,
878
+ "grad_norm": 16.743412017822266,
879
+ "learning_rate": 4.9750224407758655e-05,
880
+ "loss": 1.7747,
881
+ "step": 2975
882
+ },
883
+ {
884
+ "epoch": 3.161222339304531,
885
+ "grad_norm": 21.94359588623047,
886
+ "learning_rate": 4.9701440112399017e-05,
887
+ "loss": 1.3415,
888
+ "step": 3000
889
+ },
890
+ {
891
+ "epoch": 3.1875658587987354,
892
+ "grad_norm": 20.92635154724121,
893
+ "learning_rate": 4.965265581703938e-05,
894
+ "loss": 1.5768,
895
+ "step": 3025
896
+ },
897
+ {
898
+ "epoch": 3.2139093782929398,
899
+ "grad_norm": 19.112150192260742,
900
+ "learning_rate": 4.960387152167975e-05,
901
+ "loss": 1.4553,
902
+ "step": 3050
903
+ },
904
+ {
905
+ "epoch": 3.2402528977871445,
906
+ "grad_norm": 14.94166088104248,
907
+ "learning_rate": 4.955508722632011e-05,
908
+ "loss": 1.5237,
909
+ "step": 3075
910
+ },
911
+ {
912
+ "epoch": 3.266596417281349,
913
+ "grad_norm": 32.23541259765625,
914
+ "learning_rate": 4.950630293096047e-05,
915
+ "loss": 1.7323,
916
+ "step": 3100
917
+ },
918
+ {
919
+ "epoch": 3.292939936775553,
920
+ "grad_norm": 15.371477127075195,
921
+ "learning_rate": 4.945751863560083e-05,
922
+ "loss": 1.661,
923
+ "step": 3125
924
+ },
925
+ {
926
+ "epoch": 3.3192834562697575,
927
+ "grad_norm": 17.414264678955078,
928
+ "learning_rate": 4.9408734340241195e-05,
929
+ "loss": 1.8292,
930
+ "step": 3150
931
+ },
932
+ {
933
+ "epoch": 3.3456269757639623,
934
+ "grad_norm": 24.73505401611328,
935
+ "learning_rate": 4.935995004488155e-05,
936
+ "loss": 1.4615,
937
+ "step": 3175
938
+ },
939
+ {
940
+ "epoch": 3.3719704952581666,
941
+ "grad_norm": 33.94083023071289,
942
+ "learning_rate": 4.931116574952191e-05,
943
+ "loss": 1.7071,
944
+ "step": 3200
945
+ },
946
+ {
947
+ "epoch": 3.398314014752371,
948
+ "grad_norm": 38.488319396972656,
949
+ "learning_rate": 4.9262381454162274e-05,
950
+ "loss": 1.5914,
951
+ "step": 3225
952
+ },
953
+ {
954
+ "epoch": 3.4246575342465753,
955
+ "grad_norm": 15.29854965209961,
956
+ "learning_rate": 4.9213597158802636e-05,
957
+ "loss": 1.529,
958
+ "step": 3250
959
+ },
960
+ {
961
+ "epoch": 3.4510010537407796,
962
+ "grad_norm": 39.15888977050781,
963
+ "learning_rate": 4.9164812863443005e-05,
964
+ "loss": 1.6042,
965
+ "step": 3275
966
+ },
967
+ {
968
+ "epoch": 3.4773445732349844,
969
+ "grad_norm": 37.293724060058594,
970
+ "learning_rate": 4.9116028568083366e-05,
971
+ "loss": 1.4374,
972
+ "step": 3300
973
+ },
974
+ {
975
+ "epoch": 3.5036880927291887,
976
+ "grad_norm": 27.48360824584961,
977
+ "learning_rate": 4.906724427272373e-05,
978
+ "loss": 1.5287,
979
+ "step": 3325
980
+ },
981
+ {
982
+ "epoch": 3.530031612223393,
983
+ "grad_norm": 13.632729530334473,
984
+ "learning_rate": 4.901845997736409e-05,
985
+ "loss": 1.563,
986
+ "step": 3350
987
+ },
988
+ {
989
+ "epoch": 3.5563751317175973,
990
+ "grad_norm": 20.885683059692383,
991
+ "learning_rate": 4.896967568200445e-05,
992
+ "loss": 1.638,
993
+ "step": 3375
994
+ },
995
+ {
996
+ "epoch": 3.5827186512118017,
997
+ "grad_norm": 46.394649505615234,
998
+ "learning_rate": 4.8920891386644814e-05,
999
+ "loss": 1.6575,
1000
+ "step": 3400
1001
+ },
1002
+ {
1003
+ "epoch": 3.6090621707060064,
1004
+ "grad_norm": 37.50140380859375,
1005
+ "learning_rate": 4.8872107091285176e-05,
1006
+ "loss": 1.7124,
1007
+ "step": 3425
1008
+ },
1009
+ {
1010
+ "epoch": 3.6354056902002108,
1011
+ "grad_norm": 20.98285675048828,
1012
+ "learning_rate": 4.882332279592554e-05,
1013
+ "loss": 1.6254,
1014
+ "step": 3450
1015
+ },
1016
+ {
1017
+ "epoch": 3.661749209694415,
1018
+ "grad_norm": 11.166362762451172,
1019
+ "learning_rate": 4.87745385005659e-05,
1020
+ "loss": 1.69,
1021
+ "step": 3475
1022
+ },
1023
+ {
1024
+ "epoch": 3.68809272918862,
1025
+ "grad_norm": 29.99367904663086,
1026
+ "learning_rate": 4.872575420520626e-05,
1027
+ "loss": 1.5022,
1028
+ "step": 3500
1029
+ },
1030
+ {
1031
+ "epoch": 3.714436248682824,
1032
+ "grad_norm": 24.22369384765625,
1033
+ "learning_rate": 4.8676969909846624e-05,
1034
+ "loss": 1.8366,
1035
+ "step": 3525
1036
+ },
1037
+ {
1038
+ "epoch": 3.7407797681770285,
1039
+ "grad_norm": 18.417123794555664,
1040
+ "learning_rate": 4.8628185614486986e-05,
1041
+ "loss": 1.5953,
1042
+ "step": 3550
1043
+ },
1044
+ {
1045
+ "epoch": 3.767123287671233,
1046
+ "grad_norm": 31.92102813720703,
1047
+ "learning_rate": 4.857940131912735e-05,
1048
+ "loss": 1.8851,
1049
+ "step": 3575
1050
+ },
1051
+ {
1052
+ "epoch": 3.793466807165437,
1053
+ "grad_norm": 19.559165954589844,
1054
+ "learning_rate": 4.853061702376771e-05,
1055
+ "loss": 1.3543,
1056
+ "step": 3600
1057
+ },
1058
+ {
1059
+ "epoch": 3.819810326659642,
1060
+ "grad_norm": 27.526994705200195,
1061
+ "learning_rate": 4.848183272840807e-05,
1062
+ "loss": 1.4503,
1063
+ "step": 3625
1064
+ },
1065
+ {
1066
+ "epoch": 3.8461538461538463,
1067
+ "grad_norm": 21.375267028808594,
1068
+ "learning_rate": 4.8433048433048433e-05,
1069
+ "loss": 1.7254,
1070
+ "step": 3650
1071
+ },
1072
+ {
1073
+ "epoch": 3.8724973656480506,
1074
+ "grad_norm": 23.377002716064453,
1075
+ "learning_rate": 4.8384264137688795e-05,
1076
+ "loss": 1.6948,
1077
+ "step": 3675
1078
+ },
1079
+ {
1080
+ "epoch": 3.898840885142255,
1081
+ "grad_norm": 32.76591110229492,
1082
+ "learning_rate": 4.8335479842329164e-05,
1083
+ "loss": 1.5097,
1084
+ "step": 3700
1085
+ },
1086
+ {
1087
+ "epoch": 3.9251844046364592,
1088
+ "grad_norm": 28.73731803894043,
1089
+ "learning_rate": 4.8286695546969526e-05,
1090
+ "loss": 1.3383,
1091
+ "step": 3725
1092
+ },
1093
+ {
1094
+ "epoch": 3.951527924130664,
1095
+ "grad_norm": 33.73754119873047,
1096
+ "learning_rate": 4.823791125160989e-05,
1097
+ "loss": 1.4893,
1098
+ "step": 3750
1099
+ },
1100
+ {
1101
+ "epoch": 3.9778714436248683,
1102
+ "grad_norm": 16.65566635131836,
1103
+ "learning_rate": 4.818912695625025e-05,
1104
+ "loss": 1.6426,
1105
+ "step": 3775
1106
+ },
1107
+ {
1108
+ "epoch": 4.0,
1109
+ "eval_gen_len": 8.9979,
1110
+ "eval_loss": 2.1172335147857666,
1111
+ "eval_rouge1": 33.7585,
1112
+ "eval_rouge2": 17.8059,
1113
+ "eval_rougeL": 33.2997,
1114
+ "eval_rougeLsum": 33.2581,
1115
+ "eval_runtime": 180.3517,
1116
+ "eval_samples_per_second": 2.634,
1117
+ "eval_steps_per_second": 0.66,
1118
+ "step": 3796
1119
+ },
1120
+ {
1121
+ "epoch": 4.004214963119073,
1122
+ "grad_norm": 26.248737335205078,
1123
+ "learning_rate": 4.8140342660890605e-05,
1124
+ "loss": 1.4139,
1125
+ "step": 3800
1126
+ },
1127
+ {
1128
+ "epoch": 4.030558482613277,
1129
+ "grad_norm": 40.171348571777344,
1130
+ "learning_rate": 4.809155836553097e-05,
1131
+ "loss": 0.9346,
1132
+ "step": 3825
1133
+ },
1134
+ {
1135
+ "epoch": 4.056902002107481,
1136
+ "grad_norm": 18.67420768737793,
1137
+ "learning_rate": 4.804277407017133e-05,
1138
+ "loss": 1.0417,
1139
+ "step": 3850
1140
+ },
1141
+ {
1142
+ "epoch": 4.083245521601686,
1143
+ "grad_norm": 15.394213676452637,
1144
+ "learning_rate": 4.799398977481169e-05,
1145
+ "loss": 1.1243,
1146
+ "step": 3875
1147
+ },
1148
+ {
1149
+ "epoch": 4.109589041095891,
1150
+ "grad_norm": 14.950016021728516,
1151
+ "learning_rate": 4.794520547945205e-05,
1152
+ "loss": 0.9402,
1153
+ "step": 3900
1154
+ },
1155
+ {
1156
+ "epoch": 4.135932560590095,
1157
+ "grad_norm": 33.47651290893555,
1158
+ "learning_rate": 4.789642118409242e-05,
1159
+ "loss": 1.053,
1160
+ "step": 3925
1161
+ },
1162
+ {
1163
+ "epoch": 4.1622760800842995,
1164
+ "grad_norm": 23.766538619995117,
1165
+ "learning_rate": 4.784763688873278e-05,
1166
+ "loss": 0.9098,
1167
+ "step": 3950
1168
+ },
1169
+ {
1170
+ "epoch": 4.188619599578503,
1171
+ "grad_norm": 24.7260684967041,
1172
+ "learning_rate": 4.7798852593373145e-05,
1173
+ "loss": 0.8711,
1174
+ "step": 3975
1175
+ },
1176
+ {
1177
+ "epoch": 4.214963119072708,
1178
+ "grad_norm": 14.13185977935791,
1179
+ "learning_rate": 4.775006829801351e-05,
1180
+ "loss": 0.9576,
1181
+ "step": 4000
1182
+ },
1183
+ {
1184
+ "epoch": 4.241306638566913,
1185
+ "grad_norm": 26.87774085998535,
1186
+ "learning_rate": 4.770128400265387e-05,
1187
+ "loss": 1.1116,
1188
+ "step": 4025
1189
+ },
1190
+ {
1191
+ "epoch": 4.267650158061117,
1192
+ "grad_norm": 24.789899826049805,
1193
+ "learning_rate": 4.765249970729423e-05,
1194
+ "loss": 1.2492,
1195
+ "step": 4050
1196
+ },
1197
+ {
1198
+ "epoch": 4.293993677555322,
1199
+ "grad_norm": 35.61201858520508,
1200
+ "learning_rate": 4.760371541193459e-05,
1201
+ "loss": 1.0259,
1202
+ "step": 4075
1203
+ },
1204
+ {
1205
+ "epoch": 4.3203371970495255,
1206
+ "grad_norm": 18.178863525390625,
1207
+ "learning_rate": 4.7554931116574955e-05,
1208
+ "loss": 1.0043,
1209
+ "step": 4100
1210
+ },
1211
+ {
1212
+ "epoch": 4.34668071654373,
1213
+ "grad_norm": 18.80617904663086,
1214
+ "learning_rate": 4.750614682121532e-05,
1215
+ "loss": 1.254,
1216
+ "step": 4125
1217
+ },
1218
+ {
1219
+ "epoch": 4.373024236037935,
1220
+ "grad_norm": 23.337120056152344,
1221
+ "learning_rate": 4.745736252585568e-05,
1222
+ "loss": 1.3335,
1223
+ "step": 4150
1224
+ },
1225
+ {
1226
+ "epoch": 4.399367755532139,
1227
+ "grad_norm": 27.5556640625,
1228
+ "learning_rate": 4.740857823049604e-05,
1229
+ "loss": 1.1739,
1230
+ "step": 4175
1231
+ },
1232
+ {
1233
+ "epoch": 4.425711275026344,
1234
+ "grad_norm": 14.718132019042969,
1235
+ "learning_rate": 4.73597939351364e-05,
1236
+ "loss": 1.232,
1237
+ "step": 4200
1238
+ },
1239
+ {
1240
+ "epoch": 4.4520547945205475,
1241
+ "grad_norm": 16.896610260009766,
1242
+ "learning_rate": 4.7311009639776765e-05,
1243
+ "loss": 0.9822,
1244
+ "step": 4225
1245
+ },
1246
+ {
1247
+ "epoch": 4.478398314014752,
1248
+ "grad_norm": 32.641151428222656,
1249
+ "learning_rate": 4.7262225344417126e-05,
1250
+ "loss": 1.1221,
1251
+ "step": 4250
1252
+ },
1253
+ {
1254
+ "epoch": 4.504741833508957,
1255
+ "grad_norm": 8.409883499145508,
1256
+ "learning_rate": 4.721344104905749e-05,
1257
+ "loss": 1.0836,
1258
+ "step": 4275
1259
+ },
1260
+ {
1261
+ "epoch": 4.531085353003161,
1262
+ "grad_norm": 24.261465072631836,
1263
+ "learning_rate": 4.716465675369785e-05,
1264
+ "loss": 1.0266,
1265
+ "step": 4300
1266
+ },
1267
+ {
1268
+ "epoch": 4.557428872497366,
1269
+ "grad_norm": 27.14542579650879,
1270
+ "learning_rate": 4.711587245833821e-05,
1271
+ "loss": 0.9686,
1272
+ "step": 4325
1273
+ },
1274
+ {
1275
+ "epoch": 4.58377239199157,
1276
+ "grad_norm": 27.667455673217773,
1277
+ "learning_rate": 4.706708816297858e-05,
1278
+ "loss": 1.0164,
1279
+ "step": 4350
1280
+ },
1281
+ {
1282
+ "epoch": 4.610115911485774,
1283
+ "grad_norm": 22.17734718322754,
1284
+ "learning_rate": 4.701830386761894e-05,
1285
+ "loss": 1.1826,
1286
+ "step": 4375
1287
+ },
1288
+ {
1289
+ "epoch": 4.636459430979979,
1290
+ "grad_norm": 19.197708129882812,
1291
+ "learning_rate": 4.6969519572259305e-05,
1292
+ "loss": 0.8725,
1293
+ "step": 4400
1294
+ },
1295
+ {
1296
+ "epoch": 4.662802950474183,
1297
+ "grad_norm": 18.602115631103516,
1298
+ "learning_rate": 4.692073527689966e-05,
1299
+ "loss": 1.3157,
1300
+ "step": 4425
1301
+ },
1302
+ {
1303
+ "epoch": 4.689146469968388,
1304
+ "grad_norm": 19.589008331298828,
1305
+ "learning_rate": 4.687195098154002e-05,
1306
+ "loss": 1.2225,
1307
+ "step": 4450
1308
+ },
1309
+ {
1310
+ "epoch": 4.715489989462593,
1311
+ "grad_norm": 24.026233673095703,
1312
+ "learning_rate": 4.6823166686180384e-05,
1313
+ "loss": 1.1642,
1314
+ "step": 4475
1315
+ },
1316
+ {
1317
+ "epoch": 4.7418335089567965,
1318
+ "grad_norm": 16.99645233154297,
1319
+ "learning_rate": 4.6774382390820746e-05,
1320
+ "loss": 1.0141,
1321
+ "step": 4500
1322
+ },
1323
+ {
1324
+ "epoch": 4.768177028451001,
1325
+ "grad_norm": 15.595638275146484,
1326
+ "learning_rate": 4.672559809546111e-05,
1327
+ "loss": 1.1805,
1328
+ "step": 4525
1329
+ },
1330
+ {
1331
+ "epoch": 4.794520547945205,
1332
+ "grad_norm": 16.28839683532715,
1333
+ "learning_rate": 4.667681380010147e-05,
1334
+ "loss": 1.3,
1335
+ "step": 4550
1336
+ },
1337
+ {
1338
+ "epoch": 4.82086406743941,
1339
+ "grad_norm": 27.51399040222168,
1340
+ "learning_rate": 4.662802950474184e-05,
1341
+ "loss": 1.1732,
1342
+ "step": 4575
1343
+ },
1344
+ {
1345
+ "epoch": 4.847207586933615,
1346
+ "grad_norm": 30.363340377807617,
1347
+ "learning_rate": 4.65792452093822e-05,
1348
+ "loss": 1.2096,
1349
+ "step": 4600
1350
+ },
1351
+ {
1352
+ "epoch": 4.8735511064278185,
1353
+ "grad_norm": 27.562767028808594,
1354
+ "learning_rate": 4.653046091402256e-05,
1355
+ "loss": 1.0491,
1356
+ "step": 4625
1357
+ },
1358
+ {
1359
+ "epoch": 4.899894625922023,
1360
+ "grad_norm": 15.739079475402832,
1361
+ "learning_rate": 4.6481676618662924e-05,
1362
+ "loss": 1.0191,
1363
+ "step": 4650
1364
+ },
1365
+ {
1366
+ "epoch": 4.926238145416228,
1367
+ "grad_norm": 30.40022850036621,
1368
+ "learning_rate": 4.6432892323303286e-05,
1369
+ "loss": 1.2202,
1370
+ "step": 4675
1371
+ },
1372
+ {
1373
+ "epoch": 4.952581664910432,
1374
+ "grad_norm": 14.871630668640137,
1375
+ "learning_rate": 4.638410802794365e-05,
1376
+ "loss": 1.0677,
1377
+ "step": 4700
1378
+ },
1379
+ {
1380
+ "epoch": 4.978925184404637,
1381
+ "grad_norm": 25.979448318481445,
1382
+ "learning_rate": 4.633532373258401e-05,
1383
+ "loss": 1.2033,
1384
+ "step": 4725
1385
+ },
1386
+ {
1387
+ "epoch": 5.0,
1388
+ "eval_gen_len": 11.7958,
1389
+ "eval_loss": 2.07828688621521,
1390
+ "eval_rouge1": 37.8388,
1391
+ "eval_rouge2": 19.8532,
1392
+ "eval_rougeL": 37.3344,
1393
+ "eval_rougeLsum": 37.4253,
1394
+ "eval_runtime": 401.1496,
1395
+ "eval_samples_per_second": 1.184,
1396
+ "eval_steps_per_second": 0.297,
1397
+ "step": 4745
1398
+ }
1399
+ ],
1400
+ "logging_steps": 25,
1401
+ "max_steps": 28470,
1402
+ "num_input_tokens_seen": 0,
1403
+ "num_train_epochs": 30,
1404
+ "save_steps": 500,
1405
+ "stateful_callbacks": {
1406
+ "EarlyStoppingCallback": {
1407
+ "args": {
1408
+ "early_stopping_patience": 5,
1409
+ "early_stopping_threshold": 0.01
1410
+ },
1411
+ "attributes": {
1412
+ "early_stopping_patience_counter": 0
1413
+ }
1414
+ },
1415
+ "TrainerControl": {
1416
+ "args": {
1417
+ "should_epoch_stop": false,
1418
+ "should_evaluate": false,
1419
+ "should_log": false,
1420
+ "should_save": true,
1421
+ "should_training_stop": false
1422
+ },
1423
+ "attributes": {}
1424
+ }
1425
+ },
1426
+ "total_flos": 15600663134208.0,
1427
+ "train_batch_size": 2,
1428
+ "trial_name": null,
1429
+ "trial_params": null
1430
+ }
checkpoint-4745/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77268f0ad258622841b5501d64b1c7366ec392ad693c1e9020a129b71fa216dd
3
+ size 5368
checkpoint-4745/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
config.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Helsinki-NLP/opus-mt-fr-en",
3
+ "_num_labels": 3,
4
+ "activation_dropout": 0.0,
5
+ "activation_function": "swish",
6
+ "add_bias_logits": false,
7
+ "add_final_layer_norm": false,
8
+ "architectures": [
9
+ "MarianMTModel"
10
+ ],
11
+ "attention_dropout": 0.0,
12
+ "bad_words_ids": [
13
+ [
14
+ 59513
15
+ ]
16
+ ],
17
+ "bos_token_id": 0,
18
+ "classif_dropout": 0.0,
19
+ "classifier_dropout": 0.0,
20
+ "d_model": 512,
21
+ "decoder_attention_heads": 8,
22
+ "decoder_ffn_dim": 2048,
23
+ "decoder_layerdrop": 0.0,
24
+ "decoder_layers": 6,
25
+ "decoder_start_token_id": 59513,
26
+ "decoder_vocab_size": 59514,
27
+ "dropout": 0.1,
28
+ "encoder_attention_heads": 8,
29
+ "encoder_ffn_dim": 2048,
30
+ "encoder_layerdrop": 0.0,
31
+ "encoder_layers": 6,
32
+ "eos_token_id": 0,
33
+ "forced_eos_token_id": 0,
34
+ "gradient_checkpointing": false,
35
+ "id2label": {
36
+ "0": "LABEL_0",
37
+ "1": "LABEL_1",
38
+ "2": "LABEL_2"
39
+ },
40
+ "init_std": 0.02,
41
+ "is_encoder_decoder": true,
42
+ "label2id": {
43
+ "LABEL_0": 0,
44
+ "LABEL_1": 1,
45
+ "LABEL_2": 2
46
+ },
47
+ "max_length": 512,
48
+ "max_position_embeddings": 512,
49
+ "model_type": "marian",
50
+ "normalize_before": false,
51
+ "normalize_embedding": false,
52
+ "num_beams": 4,
53
+ "num_hidden_layers": 6,
54
+ "pad_token_id": 59513,
55
+ "scale_embedding": true,
56
+ "share_encoder_decoder_embeddings": true,
57
+ "static_position_embeddings": true,
58
+ "torch_dtype": "float32",
59
+ "transformers_version": "4.44.2",
60
+ "use_cache": true,
61
+ "vocab_size": 59514
62
+ }
generation_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bad_words_ids": [
3
+ [
4
+ 59513
5
+ ]
6
+ ],
7
+ "bos_token_id": 0,
8
+ "decoder_start_token_id": 59513,
9
+ "eos_token_id": 0,
10
+ "forced_eos_token_id": 0,
11
+ "max_length": 512,
12
+ "num_beams": 4,
13
+ "pad_token_id": 59513,
14
+ "renormalize_logits": true,
15
+ "transformers_version": "4.44.2"
16
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:244221195120bb40e78229e4ec91ea2626ebc10293ea3df2e110120457c4e07a
3
+ size 298705768
runs/Sep11_01-31-00_r-ronaldodev-autotrain-advanced-za5k052z-45777-1hgyl/events.out.tfevents.1726018267.r-ronaldodev-autotrain-advanced-za5k052z-45777-1hgyl.103.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa99da7f92bfd27d52179c5676547f1059fc27961c1f3c5e7209998a6eac456f
3
- size 88815
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:717e3f48daac8769e962cafcf07e24dfd9c05be38a77b61588f53a6707506a14
3
+ size 91382
runs/Sep11_01-31-00_r-ronaldodev-autotrain-advanced-za5k052z-45777-1hgyl/events.out.tfevents.1726031369.r-ronaldodev-autotrain-advanced-za5k052z-45777-1hgyl.103.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb5b7ff5647f7753c285d48be6554f93c387f7d56f2b32483c91570437ceb76f
3
+ size 613
source.spm ADDED
Binary file (802 kB). View file
 
special_tokens_map.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "eos_token": "</s>",
3
+ "pad_token": "<pad>",
4
+ "unk_token": "<unk>"
5
+ }
target.spm ADDED
Binary file (778 kB). View file
 
tokenizer_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "</s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<unk>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "59513": {
20
+ "content": "<pad>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ }
27
+ },
28
+ "clean_up_tokenization_spaces": true,
29
+ "eos_token": "</s>",
30
+ "model_max_length": 512,
31
+ "pad_token": "<pad>",
32
+ "separate_vocabs": false,
33
+ "source_lang": "fr",
34
+ "sp_model_kwargs": {},
35
+ "target_lang": "en",
36
+ "tokenizer_class": "MarianTokenizer",
37
+ "unk_token": "<unk>"
38
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77268f0ad258622841b5501d64b1c7366ec392ad693c1e9020a129b71fa216dd
3
+ size 5368
training_params.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data_path": "fr-mina/autotrain-data",
3
+ "model": "Helsinki-NLP/opus-mt-fr-en",
4
+ "username": "Ronaldodev",
5
+ "seed": 42,
6
+ "train_split": "train",
7
+ "valid_split": "validation",
8
+ "project_name": "fr-mina",
9
+ "push_to_hub": true,
10
+ "text_column": "autotrain_text",
11
+ "target_column": "autotrain_label",
12
+ "lr": 5e-05,
13
+ "epochs": 30,
14
+ "max_seq_length": 128,
15
+ "max_target_length": 128,
16
+ "batch_size": 2,
17
+ "warmup_ratio": 0.1,
18
+ "gradient_accumulation": 1,
19
+ "optimizer": "adamw_torch",
20
+ "scheduler": "linear",
21
+ "weight_decay": 0.0,
22
+ "max_grad_norm": 1.0,
23
+ "logging_steps": -1,
24
+ "eval_strategy": "epoch",
25
+ "auto_find_batch_size": true,
26
+ "mixed_precision": "fp16",
27
+ "save_total_limit": 1,
28
+ "peft": false,
29
+ "quantization": "int8",
30
+ "lora_r": 16,
31
+ "lora_alpha": 32,
32
+ "lora_dropout": 0.05,
33
+ "target_modules": "all-linear",
34
+ "log": "tensorboard",
35
+ "early_stopping_patience": 5,
36
+ "early_stopping_threshold": 0.01
37
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff