akahana commited on
Commit
d339fa3
1 Parent(s): dd9a102

End of training

Browse files
README.md CHANGED
@@ -1,9 +1,24 @@
1
  ---
2
  tags:
3
  - generated_from_trainer
 
 
 
 
4
  model-index:
5
  - name: mini-roberta-javanese
6
- results: []
 
 
 
 
 
 
 
 
 
 
 
7
  ---
8
 
9
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -11,7 +26,10 @@ should probably proofread and complete it, then remove this comment. -->
11
 
12
  # mini-roberta-javanese
13
 
14
- This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
 
 
 
15
 
16
  ## Model description
17
 
 
1
  ---
2
  tags:
3
  - generated_from_trainer
4
+ datasets:
5
+ - akahana/GlotCC-V1-jav-Latn
6
+ metrics:
7
+ - accuracy
8
  model-index:
9
  - name: mini-roberta-javanese
10
+ results:
11
+ - task:
12
+ name: Masked Language Modeling
13
+ type: fill-mask
14
+ dataset:
15
+ name: akahana/GlotCC-V1-jav-Latn default
16
+ type: akahana/GlotCC-V1-jav-Latn
17
+ args: default
18
+ metrics:
19
+ - name: Accuracy
20
+ type: accuracy
21
+ value: 0.14698866640019598
22
  ---
23
 
24
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
26
 
27
  # mini-roberta-javanese
28
 
29
+ This model is a fine-tuned version of [](https://huggingface.co/) on the akahana/GlotCC-V1-jav-Latn default dataset.
30
+ It achieves the following results on the evaluation set:
31
+ - Loss: 6.3746
32
+ - Accuracy: 0.1470
33
 
34
  ## Model description
35
 
all_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "epoch": 10.0,
3
- "eval_accuracy": 0.13617949114930072,
4
- "eval_loss": 6.647983074188232,
5
- "eval_runtime": 53.5181,
6
  "eval_samples": 8106,
7
- "eval_samples_per_second": 151.463,
8
- "eval_steps_per_second": 37.875,
9
- "perplexity": 771.2272476651699,
10
- "total_flos": 5910924298141440.0,
11
- "train_loss": 7.017849675472083,
12
- "train_runtime": 3142.2268,
13
  "train_samples": 160441,
14
- "train_samples_per_second": 510.596,
15
- "train_steps_per_second": 3.991
16
  }
 
1
  {
2
+ "epoch": 15.0,
3
+ "eval_accuracy": 0.14698866640019598,
4
+ "eval_loss": 6.374551773071289,
5
+ "eval_runtime": 52.0555,
6
  "eval_samples": 8106,
7
+ "eval_samples_per_second": 155.718,
8
+ "eval_steps_per_second": 38.939,
9
+ "perplexity": 586.7223872208433,
10
+ "total_flos": 8866386447212160.0,
11
+ "train_loss": 2.1665261722384206,
12
+ "train_runtime": 1599.9695,
13
  "train_samples": 160441,
14
+ "train_samples_per_second": 1504.163,
15
+ "train_steps_per_second": 11.756
16
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "epoch": 10.0,
3
- "eval_accuracy": 0.13617949114930072,
4
- "eval_loss": 6.647983074188232,
5
- "eval_runtime": 53.5181,
6
  "eval_samples": 8106,
7
- "eval_samples_per_second": 151.463,
8
- "eval_steps_per_second": 37.875,
9
- "perplexity": 771.2272476651699
10
  }
 
1
  {
2
+ "epoch": 15.0,
3
+ "eval_accuracy": 0.14698866640019598,
4
+ "eval_loss": 6.374551773071289,
5
+ "eval_runtime": 52.0555,
6
  "eval_samples": 8106,
7
+ "eval_samples_per_second": 155.718,
8
+ "eval_steps_per_second": 38.939,
9
+ "perplexity": 586.7223872208433
10
  }
runs/Jul18_02-12-34_fe084eaf0329/events.out.tfevents.1721270420.fe084eaf0329.45280.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5a04b186a27db5b552af5c112090f332c6676d2d70a28a61baa7c5b8285399c
3
+ size 417
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 10.0,
3
- "total_flos": 5910924298141440.0,
4
- "train_loss": 7.017849675472083,
5
- "train_runtime": 3142.2268,
6
  "train_samples": 160441,
7
- "train_samples_per_second": 510.596,
8
- "train_steps_per_second": 3.991
9
  }
 
1
  {
2
+ "epoch": 15.0,
3
+ "total_flos": 8866386447212160.0,
4
+ "train_loss": 2.1665261722384206,
5
+ "train_runtime": 1599.9695,
6
  "train_samples": 160441,
7
+ "train_samples_per_second": 1504.163,
8
+ "train_steps_per_second": 11.756
9
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 10.0,
5
  "eval_steps": 500,
6
- "global_step": 12540,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -191,12 +191,105 @@
191
  "train_runtime": 3142.2268,
192
  "train_samples_per_second": 510.596,
193
  "train_steps_per_second": 3.991
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
  }
195
  ],
196
  "logging_steps": 500,
197
- "max_steps": 12540,
198
  "num_input_tokens_seen": 0,
199
- "num_train_epochs": 10,
200
  "save_steps": 500,
201
  "stateful_callbacks": {
202
  "TrainerControl": {
@@ -210,7 +303,7 @@
210
  "attributes": {}
211
  }
212
  },
213
- "total_flos": 5910924298141440.0,
214
  "train_batch_size": 128,
215
  "trial_name": null,
216
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 15.0,
5
  "eval_steps": 500,
6
+ "global_step": 18810,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
191
  "train_runtime": 3142.2268,
192
  "train_samples_per_second": 510.596,
193
  "train_steps_per_second": 3.991
194
+ },
195
+ {
196
+ "epoch": 10.36682615629984,
197
+ "grad_norm": 2.343043327331543,
198
+ "learning_rate": 4.87772461456672e-05,
199
+ "loss": 6.6945,
200
+ "step": 13000
201
+ },
202
+ {
203
+ "epoch": 10.76555023923445,
204
+ "grad_norm": 2.0774621963500977,
205
+ "learning_rate": 4.7448165869218504e-05,
206
+ "loss": 6.6465,
207
+ "step": 13500
208
+ },
209
+ {
210
+ "epoch": 11.164274322169058,
211
+ "grad_norm": 2.5551674365997314,
212
+ "learning_rate": 4.611908559276981e-05,
213
+ "loss": 6.6086,
214
+ "step": 14000
215
+ },
216
+ {
217
+ "epoch": 11.562998405103668,
218
+ "grad_norm": 2.335573196411133,
219
+ "learning_rate": 4.479000531632111e-05,
220
+ "loss": 6.5677,
221
+ "step": 14500
222
+ },
223
+ {
224
+ "epoch": 11.961722488038278,
225
+ "grad_norm": 2.7488040924072266,
226
+ "learning_rate": 4.346092503987241e-05,
227
+ "loss": 6.5439,
228
+ "step": 15000
229
+ },
230
+ {
231
+ "epoch": 12.360446570972886,
232
+ "grad_norm": 2.1827917098999023,
233
+ "learning_rate": 4.213184476342371e-05,
234
+ "loss": 6.4975,
235
+ "step": 15500
236
+ },
237
+ {
238
+ "epoch": 12.759170653907496,
239
+ "grad_norm": 2.1983468532562256,
240
+ "learning_rate": 4.080276448697501e-05,
241
+ "loss": 6.4953,
242
+ "step": 16000
243
+ },
244
+ {
245
+ "epoch": 13.157894736842104,
246
+ "grad_norm": 2.4948813915252686,
247
+ "learning_rate": 3.9473684210526316e-05,
248
+ "loss": 6.4546,
249
+ "step": 16500
250
+ },
251
+ {
252
+ "epoch": 13.556618819776714,
253
+ "grad_norm": 2.0236194133758545,
254
+ "learning_rate": 3.814460393407762e-05,
255
+ "loss": 6.432,
256
+ "step": 17000
257
+ },
258
+ {
259
+ "epoch": 13.955342902711324,
260
+ "grad_norm": 2.3203213214874268,
261
+ "learning_rate": 3.681552365762892e-05,
262
+ "loss": 6.4038,
263
+ "step": 17500
264
+ },
265
+ {
266
+ "epoch": 14.354066985645932,
267
+ "grad_norm": 2.347102165222168,
268
+ "learning_rate": 3.5486443381180226e-05,
269
+ "loss": 6.3872,
270
+ "step": 18000
271
+ },
272
+ {
273
+ "epoch": 14.752791068580542,
274
+ "grad_norm": 2.5316317081451416,
275
+ "learning_rate": 3.415736310473153e-05,
276
+ "loss": 6.3687,
277
+ "step": 18500
278
+ },
279
+ {
280
+ "epoch": 15.0,
281
+ "step": 18810,
282
+ "total_flos": 8866386447212160.0,
283
+ "train_loss": 2.1665261722384206,
284
+ "train_runtime": 1599.9695,
285
+ "train_samples_per_second": 1504.163,
286
+ "train_steps_per_second": 11.756
287
  }
288
  ],
289
  "logging_steps": 500,
290
+ "max_steps": 18810,
291
  "num_input_tokens_seen": 0,
292
+ "num_train_epochs": 15,
293
  "save_steps": 500,
294
  "stateful_callbacks": {
295
  "TrainerControl": {
 
303
  "attributes": {}
304
  }
305
  },
306
+ "total_flos": 8866386447212160.0,
307
  "train_batch_size": 128,
308
  "trial_name": null,
309
  "trial_params": null