nlparabic commited on
Commit
76bdd9c
1 Parent(s): cc4ab94

End of training

Browse files
README.md CHANGED
@@ -18,11 +18,11 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  This model is a fine-tuned version of [riotu-lab/ArabianGPT-01B](https://huggingface.co/riotu-lab/ArabianGPT-01B) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 1.9086
22
- - Bleu: 0.3178
23
- - Rouge1: 0.5876
24
- - Rouge2: 0.3513
25
- - Rougel: 0.5510
26
 
27
  ## Model description
28
 
 
18
 
19
  This model is a fine-tuned version of [riotu-lab/ArabianGPT-01B](https://huggingface.co/riotu-lab/ArabianGPT-01B) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 1.9084
22
+ - Bleu: 0.3172
23
+ - Rouge1: 0.5869
24
+ - Rouge2: 0.3505
25
+ - Rougel: 0.5504
26
 
27
  ## Model description
28
 
all_results.json CHANGED
@@ -1,19 +1,19 @@
1
  {
2
- "epoch": 5.0,
3
- "eval_bleu": 0.16068905926811505,
4
- "eval_loss": 2.6229476928710938,
5
- "eval_rouge1": 0.4264320027787866,
6
- "eval_rouge2": 0.1630682859845051,
7
- "eval_rougeL": 0.367472815476786,
8
- "eval_runtime": 27.1354,
9
- "eval_samples": 847,
10
- "eval_samples_per_second": 31.214,
11
- "eval_steps_per_second": 3.906,
12
- "perplexity": 13.77627201236392,
13
- "total_flos": 5001129492480000.0,
14
- "train_loss": 2.2704222102150275,
15
- "train_runtime": 565.3472,
16
- "train_samples": 2552,
17
- "train_samples_per_second": 22.57,
18
- "train_steps_per_second": 2.821
19
  }
 
1
  {
2
+ "epoch": 20.0,
3
+ "eval_bleu": 0.31723468269919336,
4
+ "eval_loss": 1.9084105491638184,
5
+ "eval_rouge1": 0.5868586694605076,
6
+ "eval_rouge2": 0.350546625127078,
7
+ "eval_rougeL": 0.5503666110741787,
8
+ "eval_runtime": 29.6048,
9
+ "eval_samples": 925,
10
+ "eval_samples_per_second": 31.245,
11
+ "eval_steps_per_second": 3.918,
12
+ "perplexity": 6.742363621722242,
13
+ "total_flos": 2.8862709792768e+16,
14
+ "train_loss": 1.4422371688478681,
15
+ "train_runtime": 3284.8472,
16
+ "train_samples": 3681,
17
+ "train_samples_per_second": 22.412,
18
+ "train_steps_per_second": 2.807
19
  }
egy_training_log.txt CHANGED
@@ -752,3 +752,5 @@ INFO:absl:Using default tokenizer.
752
  INFO:root:Epoch 19.0: Train Loss = 1.4937, Eval Loss = 1.9084105491638184
753
  INFO:absl:Using default tokenizer.
754
  INFO:root:Epoch 20.0: Train Loss = 1.4824, Eval Loss = 1.9086270332336426
 
 
 
752
  INFO:root:Epoch 19.0: Train Loss = 1.4937, Eval Loss = 1.9084105491638184
753
  INFO:absl:Using default tokenizer.
754
  INFO:root:Epoch 20.0: Train Loss = 1.4824, Eval Loss = 1.9086270332336426
755
+ INFO:__main__:*** Evaluate ***
756
+ INFO:absl:Using default tokenizer.
eval_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 5.0,
3
- "eval_bleu": 0.16068905926811505,
4
- "eval_loss": 2.6229476928710938,
5
- "eval_rouge1": 0.4264320027787866,
6
- "eval_rouge2": 0.1630682859845051,
7
- "eval_rougeL": 0.367472815476786,
8
- "eval_runtime": 27.1354,
9
- "eval_samples": 847,
10
- "eval_samples_per_second": 31.214,
11
- "eval_steps_per_second": 3.906,
12
- "perplexity": 13.77627201236392
13
  }
 
1
  {
2
+ "epoch": 20.0,
3
+ "eval_bleu": 0.31723468269919336,
4
+ "eval_loss": 1.9084105491638184,
5
+ "eval_rouge1": 0.5868586694605076,
6
+ "eval_rouge2": 0.350546625127078,
7
+ "eval_rougeL": 0.5503666110741787,
8
+ "eval_runtime": 29.6048,
9
+ "eval_samples": 925,
10
+ "eval_samples_per_second": 31.245,
11
+ "eval_steps_per_second": 3.918,
12
+ "perplexity": 6.742363621722242
13
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 5.0,
3
- "total_flos": 5001129492480000.0,
4
- "train_loss": 2.2704222102150275,
5
- "train_runtime": 565.3472,
6
- "train_samples": 2552,
7
- "train_samples_per_second": 22.57,
8
- "train_steps_per_second": 2.821
9
  }
 
1
  {
2
+ "epoch": 20.0,
3
+ "total_flos": 2.8862709792768e+16,
4
+ "train_loss": 1.4422371688478681,
5
+ "train_runtime": 3284.8472,
6
+ "train_samples": 3681,
7
+ "train_samples_per_second": 22.412,
8
+ "train_steps_per_second": 2.807
9
  }
train_vs_val_loss.png CHANGED
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 2.6229476928710938,
3
- "best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/results/checkpoint-1500",
4
- "epoch": 5.0,
5
  "eval_steps": 500,
6
- "global_step": 1595,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -66,19 +66,304 @@
66
  "step": 1500
67
  },
68
  {
69
- "epoch": 5.0,
70
- "step": 1595,
71
- "total_flos": 5001129492480000.0,
72
- "train_loss": 2.2704222102150275,
73
- "train_runtime": 565.3472,
74
- "train_samples_per_second": 22.57,
75
- "train_steps_per_second": 2.821
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  }
77
  ],
78
  "logging_steps": 500,
79
- "max_steps": 1595,
80
  "num_input_tokens_seen": 0,
81
- "num_train_epochs": 5,
82
  "save_steps": 500,
83
  "stateful_callbacks": {
84
  "EarlyStoppingCallback": {
@@ -101,7 +386,7 @@
101
  "attributes": {}
102
  }
103
  },
104
- "total_flos": 5001129492480000.0,
105
  "train_batch_size": 8,
106
  "trial_name": null,
107
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.9084105491638184,
3
+ "best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/results/checkpoint-8500",
4
+ "epoch": 20.0,
5
  "eval_steps": 500,
6
+ "global_step": 9220,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
66
  "step": 1500
67
  },
68
  {
69
+ "epoch": 4.3383947939262475,
70
+ "grad_norm": 1.10550856590271,
71
+ "learning_rate": 4.139908256880734e-05,
72
+ "loss": 2.4047,
73
+ "step": 2000
74
+ },
75
+ {
76
+ "epoch": 4.3383947939262475,
77
+ "eval_bleu": 0.27212534220096674,
78
+ "eval_loss": 2.200192451477051,
79
+ "eval_rouge1": 0.49764917064550795,
80
+ "eval_rouge2": 0.25417403674525624,
81
+ "eval_rougeL": 0.4505978761161964,
82
+ "eval_runtime": 29.8301,
83
+ "eval_samples_per_second": 31.009,
84
+ "eval_steps_per_second": 3.889,
85
+ "step": 2000
86
+ },
87
+ {
88
+ "epoch": 5.422993492407809,
89
+ "grad_norm": 1.0486189126968384,
90
+ "learning_rate": 3.8532110091743125e-05,
91
+ "loss": 2.19,
92
+ "step": 2500
93
+ },
94
+ {
95
+ "epoch": 5.422993492407809,
96
+ "eval_bleu": 0.2853635265097057,
97
+ "eval_loss": 2.099168539047241,
98
+ "eval_rouge1": 0.5205238075842558,
99
+ "eval_rouge2": 0.27883621341002174,
100
+ "eval_rougeL": 0.4772785679427928,
101
+ "eval_runtime": 29.5017,
102
+ "eval_samples_per_second": 31.354,
103
+ "eval_steps_per_second": 3.932,
104
+ "step": 2500
105
+ },
106
+ {
107
+ "epoch": 6.507592190889371,
108
+ "grad_norm": 1.0022239685058594,
109
+ "learning_rate": 3.56651376146789e-05,
110
+ "loss": 2.0473,
111
+ "step": 3000
112
+ },
113
+ {
114
+ "epoch": 6.507592190889371,
115
+ "eval_bleu": 0.29294689624288234,
116
+ "eval_loss": 2.0362119674682617,
117
+ "eval_rouge1": 0.5380910185587349,
118
+ "eval_rouge2": 0.29647105961235576,
119
+ "eval_rougeL": 0.49649873151947865,
120
+ "eval_runtime": 29.6658,
121
+ "eval_samples_per_second": 31.181,
122
+ "eval_steps_per_second": 3.91,
123
+ "step": 3000
124
+ },
125
+ {
126
+ "epoch": 7.592190889370933,
127
+ "grad_norm": 1.1853405237197876,
128
+ "learning_rate": 3.2798165137614676e-05,
129
+ "loss": 1.9397,
130
+ "step": 3500
131
+ },
132
+ {
133
+ "epoch": 7.592190889370933,
134
+ "eval_bleu": 0.2996126116957466,
135
+ "eval_loss": 1.9933106899261475,
136
+ "eval_rouge1": 0.5494053286639744,
137
+ "eval_rouge2": 0.31025003697020603,
138
+ "eval_rougeL": 0.5101736274334897,
139
+ "eval_runtime": 29.6088,
140
+ "eval_samples_per_second": 31.241,
141
+ "eval_steps_per_second": 3.918,
142
+ "step": 3500
143
+ },
144
+ {
145
+ "epoch": 8.676789587852495,
146
+ "grad_norm": 1.1255462169647217,
147
+ "learning_rate": 2.9931192660550462e-05,
148
+ "loss": 1.857,
149
+ "step": 4000
150
+ },
151
+ {
152
+ "epoch": 8.676789587852495,
153
+ "eval_bleu": 0.30241485912380783,
154
+ "eval_loss": 1.9647237062454224,
155
+ "eval_rouge1": 0.5597611557009092,
156
+ "eval_rouge2": 0.3191422306947157,
157
+ "eval_rougeL": 0.5202653323875917,
158
+ "eval_runtime": 29.9377,
159
+ "eval_samples_per_second": 30.897,
160
+ "eval_steps_per_second": 3.875,
161
+ "step": 4000
162
+ },
163
+ {
164
+ "epoch": 9.761388286334057,
165
+ "grad_norm": 1.1697229146957397,
166
+ "learning_rate": 2.7064220183486238e-05,
167
+ "loss": 1.784,
168
+ "step": 4500
169
+ },
170
+ {
171
+ "epoch": 9.761388286334057,
172
+ "eval_bleu": 0.3061719577143718,
173
+ "eval_loss": 1.9443068504333496,
174
+ "eval_rouge1": 0.567492271856554,
175
+ "eval_rouge2": 0.3269182124324805,
176
+ "eval_rougeL": 0.5278573882748132,
177
+ "eval_runtime": 29.751,
178
+ "eval_samples_per_second": 31.091,
179
+ "eval_steps_per_second": 3.899,
180
+ "step": 4500
181
+ },
182
+ {
183
+ "epoch": 10.845986984815617,
184
+ "grad_norm": 1.070591926574707,
185
+ "learning_rate": 2.419724770642202e-05,
186
+ "loss": 1.7239,
187
+ "step": 5000
188
+ },
189
+ {
190
+ "epoch": 10.845986984815617,
191
+ "eval_bleu": 0.309858394526436,
192
+ "eval_loss": 1.931990385055542,
193
+ "eval_rouge1": 0.5723606535196859,
194
+ "eval_rouge2": 0.3338521436125379,
195
+ "eval_rougeL": 0.5341216118802655,
196
+ "eval_runtime": 29.6886,
197
+ "eval_samples_per_second": 31.157,
198
+ "eval_steps_per_second": 3.907,
199
+ "step": 5000
200
+ },
201
+ {
202
+ "epoch": 11.93058568329718,
203
+ "grad_norm": 1.0755261182785034,
204
+ "learning_rate": 2.13302752293578e-05,
205
+ "loss": 1.6713,
206
+ "step": 5500
207
+ },
208
+ {
209
+ "epoch": 11.93058568329718,
210
+ "eval_bleu": 0.3115672562854492,
211
+ "eval_loss": 1.920640230178833,
212
+ "eval_rouge1": 0.5765467952167939,
213
+ "eval_rouge2": 0.33826641143296676,
214
+ "eval_rougeL": 0.5387314433190069,
215
+ "eval_runtime": 29.7016,
216
+ "eval_samples_per_second": 31.143,
217
+ "eval_steps_per_second": 3.906,
218
+ "step": 5500
219
+ },
220
+ {
221
+ "epoch": 13.015184381778742,
222
+ "grad_norm": 1.0826488733291626,
223
+ "learning_rate": 1.8463302752293578e-05,
224
+ "loss": 1.6263,
225
+ "step": 6000
226
+ },
227
+ {
228
+ "epoch": 13.015184381778742,
229
+ "eval_bleu": 0.31268695772405475,
230
+ "eval_loss": 1.916778564453125,
231
+ "eval_rouge1": 0.5780842791223908,
232
+ "eval_rouge2": 0.34164409810850394,
233
+ "eval_rougeL": 0.5415509673961407,
234
+ "eval_runtime": 29.789,
235
+ "eval_samples_per_second": 31.052,
236
+ "eval_steps_per_second": 3.894,
237
+ "step": 6000
238
+ },
239
+ {
240
+ "epoch": 14.099783080260304,
241
+ "grad_norm": 1.0868735313415527,
242
+ "learning_rate": 1.559633027522936e-05,
243
+ "loss": 1.5869,
244
+ "step": 6500
245
+ },
246
+ {
247
+ "epoch": 14.099783080260304,
248
+ "eval_bleu": 0.31365743559233084,
249
+ "eval_loss": 1.9147837162017822,
250
+ "eval_rouge1": 0.5829184758698387,
251
+ "eval_rouge2": 0.3448101826360943,
252
+ "eval_rougeL": 0.5450794961513086,
253
+ "eval_runtime": 29.7645,
254
+ "eval_samples_per_second": 31.077,
255
+ "eval_steps_per_second": 3.897,
256
+ "step": 6500
257
+ },
258
+ {
259
+ "epoch": 15.184381778741866,
260
+ "grad_norm": 1.0827687978744507,
261
+ "learning_rate": 1.2729357798165138e-05,
262
+ "loss": 1.5544,
263
+ "step": 7000
264
+ },
265
+ {
266
+ "epoch": 15.184381778741866,
267
+ "eval_bleu": 0.315769500599606,
268
+ "eval_loss": 1.9121257066726685,
269
+ "eval_rouge1": 0.5844681250407762,
270
+ "eval_rouge2": 0.34764910748110744,
271
+ "eval_rougeL": 0.5476190296456669,
272
+ "eval_runtime": 29.7415,
273
+ "eval_samples_per_second": 31.101,
274
+ "eval_steps_per_second": 3.9,
275
+ "step": 7000
276
+ },
277
+ {
278
+ "epoch": 16.268980477223426,
279
+ "grad_norm": 1.1430450677871704,
280
+ "learning_rate": 9.862385321100918e-06,
281
+ "loss": 1.5307,
282
+ "step": 7500
283
+ },
284
+ {
285
+ "epoch": 16.268980477223426,
286
+ "eval_bleu": 0.31648880861794926,
287
+ "eval_loss": 1.9105726480484009,
288
+ "eval_rouge1": 0.5852713451659596,
289
+ "eval_rouge2": 0.34877835378762495,
290
+ "eval_rougeL": 0.5486197186684263,
291
+ "eval_runtime": 29.7345,
292
+ "eval_samples_per_second": 31.109,
293
+ "eval_steps_per_second": 3.901,
294
+ "step": 7500
295
+ },
296
+ {
297
+ "epoch": 17.35357917570499,
298
+ "grad_norm": 1.0865087509155273,
299
+ "learning_rate": 6.995412844036697e-06,
300
+ "loss": 1.5087,
301
+ "step": 8000
302
+ },
303
+ {
304
+ "epoch": 17.35357917570499,
305
+ "eval_bleu": 0.31692571547155524,
306
+ "eval_loss": 1.9093118906021118,
307
+ "eval_rouge1": 0.5860996975913157,
308
+ "eval_rouge2": 0.3503907384934047,
309
+ "eval_rougeL": 0.5500340150392318,
310
+ "eval_runtime": 29.7497,
311
+ "eval_samples_per_second": 31.093,
312
+ "eval_steps_per_second": 3.899,
313
+ "step": 8000
314
+ },
315
+ {
316
+ "epoch": 18.43817787418655,
317
+ "grad_norm": 1.1252211332321167,
318
+ "learning_rate": 4.128440366972477e-06,
319
+ "loss": 1.4937,
320
+ "step": 8500
321
+ },
322
+ {
323
+ "epoch": 18.43817787418655,
324
+ "eval_bleu": 0.31723468269919336,
325
+ "eval_loss": 1.9084105491638184,
326
+ "eval_rouge1": 0.5868586694605076,
327
+ "eval_rouge2": 0.350546625127078,
328
+ "eval_rougeL": 0.5503666110741787,
329
+ "eval_runtime": 29.7351,
330
+ "eval_samples_per_second": 31.108,
331
+ "eval_steps_per_second": 3.901,
332
+ "step": 8500
333
+ },
334
+ {
335
+ "epoch": 19.522776572668114,
336
+ "grad_norm": 1.150936245918274,
337
+ "learning_rate": 1.261467889908257e-06,
338
+ "loss": 1.4824,
339
+ "step": 9000
340
+ },
341
+ {
342
+ "epoch": 19.522776572668114,
343
+ "eval_bleu": 0.3177718226409019,
344
+ "eval_loss": 1.9086270332336426,
345
+ "eval_rouge1": 0.5875550437490973,
346
+ "eval_rouge2": 0.3512666976647323,
347
+ "eval_rougeL": 0.5509556223633276,
348
+ "eval_runtime": 30.1604,
349
+ "eval_samples_per_second": 30.669,
350
+ "eval_steps_per_second": 3.846,
351
+ "step": 9000
352
+ },
353
+ {
354
+ "epoch": 20.0,
355
+ "step": 9220,
356
+ "total_flos": 2.8862709792768e+16,
357
+ "train_loss": 1.4422371688478681,
358
+ "train_runtime": 3284.8472,
359
+ "train_samples_per_second": 22.412,
360
+ "train_steps_per_second": 2.807
361
  }
362
  ],
363
  "logging_steps": 500,
364
+ "max_steps": 9220,
365
  "num_input_tokens_seen": 0,
366
+ "num_train_epochs": 20,
367
  "save_steps": 500,
368
  "stateful_callbacks": {
369
  "EarlyStoppingCallback": {
 
386
  "attributes": {}
387
  }
388
  },
389
+ "total_flos": 2.8862709792768e+16,
390
  "train_batch_size": 8,
391
  "trial_name": null,
392
  "trial_params": null