nlparabic commited on
Commit
c05e286
1 Parent(s): 3c83bd1

End of training

Browse files
README.md CHANGED
@@ -18,11 +18,11 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  This model is a fine-tuned version of [riotu-lab/ArabianGPT-03B](https://huggingface.co/riotu-lab/ArabianGPT-03B) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 1.3693
22
- - Bleu: 0.4248
23
- - Rouge1: 0.6534
24
- - Rouge2: 0.4079
25
- - Rougel: 0.6523
26
 
27
  ## Model description
28
 
 
18
 
19
  This model is a fine-tuned version of [riotu-lab/ArabianGPT-03B](https://huggingface.co/riotu-lab/ArabianGPT-03B) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.9735
22
+ - Bleu: 0.3759
23
+ - Rouge1: 0.5621
24
+ - Rouge2: 0.3073
25
+ - Rougel: 0.5603
26
 
27
  ## Model description
28
 
all_results.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 6.0,
3
+ "eval_bleu": 0.3758671129852906,
4
+ "eval_loss": 0.9734994769096375,
5
+ "eval_rouge1": 0.5620694784691431,
6
+ "eval_rouge2": 0.3072634138927888,
7
+ "eval_rougeL": 0.5602967261743523,
8
+ "eval_runtime": 136.3007,
9
+ "eval_samples": 2113,
10
+ "eval_samples_per_second": 15.502,
11
+ "eval_steps_per_second": 1.944,
12
+ "perplexity": 2.647192056517508,
13
+ "total_flos": 1.1774067405225984e+16,
14
+ "train_loss": 0.37803844186042396,
15
+ "train_runtime": 4074.3615,
16
+ "train_samples": 8452,
17
+ "train_samples_per_second": 41.489,
18
+ "train_steps_per_second": 5.189
19
+ }
egy_training_log.txt CHANGED
@@ -152,3 +152,5 @@ INFO:root:Epoch 5.0: Train Loss = 0.1771, Eval Loss = 1.3314274549484253
152
  INFO:absl:Using default tokenizer.
153
  INFO:root:Epoch 6.0: Train Loss = 0.1337, Eval Loss = 1.344425916671753
154
  INFO:absl:Using default tokenizer.
 
 
 
152
  INFO:absl:Using default tokenizer.
153
  INFO:root:Epoch 6.0: Train Loss = 0.1337, Eval Loss = 1.344425916671753
154
  INFO:absl:Using default tokenizer.
155
+ INFO:__main__:*** Evaluate ***
156
+ INFO:absl:Using default tokenizer.
eval_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 6.0,
3
+ "eval_bleu": 0.3758671129852906,
4
+ "eval_loss": 0.9734994769096375,
5
+ "eval_rouge1": 0.5620694784691431,
6
+ "eval_rouge2": 0.3072634138927888,
7
+ "eval_rougeL": 0.5602967261743523,
8
+ "eval_runtime": 136.3007,
9
+ "eval_samples": 2113,
10
+ "eval_samples_per_second": 15.502,
11
+ "eval_steps_per_second": 1.944,
12
+ "perplexity": 2.647192056517508
13
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 6.0,
3
+ "total_flos": 1.1774067405225984e+16,
4
+ "train_loss": 0.37803844186042396,
5
+ "train_runtime": 4074.3615,
6
+ "train_samples": 8452,
7
+ "train_samples_per_second": 41.489,
8
+ "train_steps_per_second": 5.189
9
+ }
train_vs_val_loss.png ADDED
trainer_state.json ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9734994769096375,
3
+ "best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_irq_03/checkpoint-1057",
4
+ "epoch": 6.0,
5
+ "eval_steps": 500,
6
+ "global_step": 6342,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "grad_norm": 2.316096782684326,
14
+ "learning_rate": 4.865067829457365e-05,
15
+ "loss": 1.1923,
16
+ "step": 1057
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_bleu": 0.3758671129852906,
21
+ "eval_loss": 0.9734994769096375,
22
+ "eval_rouge1": 0.5620694784691431,
23
+ "eval_rouge2": 0.3072634138927888,
24
+ "eval_rougeL": 0.5602967261743523,
25
+ "eval_runtime": 18.3819,
26
+ "eval_samples_per_second": 114.95,
27
+ "eval_steps_per_second": 14.416,
28
+ "step": 1057
29
+ },
30
+ {
31
+ "epoch": 2.0,
32
+ "grad_norm": 1.9589498043060303,
33
+ "learning_rate": 4.609011627906977e-05,
34
+ "loss": 0.3956,
35
+ "step": 2114
36
+ },
37
+ {
38
+ "epoch": 2.0,
39
+ "eval_bleu": 0.41456228931689637,
40
+ "eval_loss": 1.2529053688049316,
41
+ "eval_rouge1": 0.6342121068042533,
42
+ "eval_rouge2": 0.3777098530906886,
43
+ "eval_rougeL": 0.6329704343270921,
44
+ "eval_runtime": 27.6701,
45
+ "eval_samples_per_second": 76.364,
46
+ "eval_steps_per_second": 9.577,
47
+ "step": 2114
48
+ },
49
+ {
50
+ "epoch": 3.0,
51
+ "grad_norm": 1.9037576913833618,
52
+ "learning_rate": 4.3529554263565894e-05,
53
+ "loss": 0.2578,
54
+ "step": 3171
55
+ },
56
+ {
57
+ "epoch": 3.0,
58
+ "eval_bleu": 0.4177567847266788,
59
+ "eval_loss": 1.1432485580444336,
60
+ "eval_rouge1": 0.6356354703691816,
61
+ "eval_rouge2": 0.38873743710479597,
62
+ "eval_rougeL": 0.6340522473648312,
63
+ "eval_runtime": 16.0819,
64
+ "eval_samples_per_second": 131.39,
65
+ "eval_steps_per_second": 16.478,
66
+ "step": 3171
67
+ },
68
+ {
69
+ "epoch": 4.0,
70
+ "grad_norm": 3.466954469680786,
71
+ "learning_rate": 4.096899224806201e-05,
72
+ "loss": 0.1771,
73
+ "step": 4228
74
+ },
75
+ {
76
+ "epoch": 4.0,
77
+ "eval_bleu": 0.420916242293927,
78
+ "eval_loss": 1.3314274549484253,
79
+ "eval_rouge1": 0.6564326494399506,
80
+ "eval_rouge2": 0.4082972964966378,
81
+ "eval_rougeL": 0.6545946336979762,
82
+ "eval_runtime": 15.9444,
83
+ "eval_samples_per_second": 132.523,
84
+ "eval_steps_per_second": 16.62,
85
+ "step": 4228
86
+ },
87
+ {
88
+ "epoch": 5.0,
89
+ "grad_norm": 2.057302236557007,
90
+ "learning_rate": 3.840843023255814e-05,
91
+ "loss": 0.1337,
92
+ "step": 5285
93
+ },
94
+ {
95
+ "epoch": 5.0,
96
+ "eval_bleu": 0.42563539215296015,
97
+ "eval_loss": 1.344425916671753,
98
+ "eval_rouge1": 0.6508055039664861,
99
+ "eval_rouge2": 0.40613669991922435,
100
+ "eval_rougeL": 0.6494650833785085,
101
+ "eval_runtime": 139.4783,
102
+ "eval_samples_per_second": 15.149,
103
+ "eval_steps_per_second": 1.9,
104
+ "step": 5285
105
+ },
106
+ {
107
+ "epoch": 6.0,
108
+ "grad_norm": 1.6741355657577515,
109
+ "learning_rate": 3.5847868217054265e-05,
110
+ "loss": 0.1118,
111
+ "step": 6342
112
+ },
113
+ {
114
+ "epoch": 6.0,
115
+ "eval_bleu": 0.42475546499314026,
116
+ "eval_loss": 1.369273066520691,
117
+ "eval_rouge1": 0.6534368445259939,
118
+ "eval_rouge2": 0.40793487128864964,
119
+ "eval_rougeL": 0.6523435258107617,
120
+ "eval_runtime": 22.7821,
121
+ "eval_samples_per_second": 92.748,
122
+ "eval_steps_per_second": 11.632,
123
+ "step": 6342
124
+ },
125
+ {
126
+ "epoch": 6.0,
127
+ "step": 6342,
128
+ "total_flos": 1.1774067405225984e+16,
129
+ "train_loss": 0.37803844186042396,
130
+ "train_runtime": 4074.3615,
131
+ "train_samples_per_second": 41.489,
132
+ "train_steps_per_second": 5.189
133
+ }
134
+ ],
135
+ "logging_steps": 500,
136
+ "max_steps": 21140,
137
+ "num_input_tokens_seen": 0,
138
+ "num_train_epochs": 20,
139
+ "save_steps": 500,
140
+ "stateful_callbacks": {
141
+ "EarlyStoppingCallback": {
142
+ "args": {
143
+ "early_stopping_patience": 5,
144
+ "early_stopping_threshold": 0.0
145
+ },
146
+ "attributes": {
147
+ "early_stopping_patience_counter": 0
148
+ }
149
+ },
150
+ "TrainerControl": {
151
+ "args": {
152
+ "should_epoch_stop": false,
153
+ "should_evaluate": false,
154
+ "should_log": false,
155
+ "should_save": true,
156
+ "should_training_stop": true
157
+ },
158
+ "attributes": {}
159
+ }
160
+ },
161
+ "total_flos": 1.1774067405225984e+16,
162
+ "train_batch_size": 8,
163
+ "trial_name": null,
164
+ "trial_params": null
165
+ }