akahana commited on
Commit
226b105
1 Parent(s): 60d00aa

End of training

Browse files
README.md CHANGED
@@ -1,9 +1,24 @@
1
  ---
2
  tags:
3
  - generated_from_trainer
 
 
 
 
4
  model-index:
5
  - name: roberta-javanese
6
- results: []
 
 
 
 
 
 
 
 
 
 
 
7
  ---
8
 
9
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -11,7 +26,10 @@ should probably proofread and complete it, then remove this comment. -->
11
 
12
  # roberta-javanese
13
 
14
- This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
 
 
 
15
 
16
  ## Model description
17
 
 
1
  ---
2
  tags:
3
  - generated_from_trainer
4
+ datasets:
5
+ - akahana/GlotCC-V1-jav-Latn
6
+ metrics:
7
+ - accuracy
8
  model-index:
9
  - name: roberta-javanese
10
+ results:
11
+ - task:
12
+ name: Masked Language Modeling
13
+ type: fill-mask
14
+ dataset:
15
+ name: akahana/GlotCC-V1-jav-Latn default
16
+ type: akahana/GlotCC-V1-jav-Latn
17
+ args: default
18
+ metrics:
19
+ - name: Accuracy
20
+ type: accuracy
21
+ value: 0.5302221081011683
22
  ---
23
 
24
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
26
 
27
  # roberta-javanese
28
 
29
+ This model is a fine-tuned version of [](https://huggingface.co/) on the akahana/GlotCC-V1-jav-Latn default dataset.
30
+ It achieves the following results on the evaluation set:
31
+ - Loss: 2.9194
32
+ - Accuracy: 0.5302
33
 
34
  ## Model description
35
 
all_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "epoch": 30.0,
3
- "eval_accuracy": 0.5187187058672487,
4
- "eval_loss": 2.996563196182251,
5
- "eval_runtime": 30.3359,
6
  "eval_samples": 4053,
7
- "eval_samples_per_second": 133.604,
8
- "eval_steps_per_second": 33.426,
9
- "perplexity": 20.01662535880206,
10
- "total_flos": 1.5839169150106368e+17,
11
- "train_loss": 0.47119966579742084,
12
- "train_runtime": 6930.0607,
13
  "train_samples": 80219,
14
- "train_samples_per_second": 347.265,
15
- "train_steps_per_second": 21.705
16
  }
 
1
  {
2
+ "epoch": 35.0,
3
+ "eval_accuracy": 0.5302221081011683,
4
+ "eval_loss": 2.9193999767303467,
5
+ "eval_runtime": 31.3487,
6
  "eval_samples": 4053,
7
+ "eval_samples_per_second": 129.287,
8
+ "eval_steps_per_second": 32.346,
9
+ "perplexity": 18.530165592844845,
10
+ "total_flos": 1.8479030675124096e+17,
11
+ "train_loss": 0.37831091759340585,
12
+ "train_runtime": 6392.496,
13
  "train_samples": 80219,
14
+ "train_samples_per_second": 439.213,
15
+ "train_steps_per_second": 27.453
16
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "epoch": 30.0,
3
- "eval_accuracy": 0.5187187058672487,
4
- "eval_loss": 2.996563196182251,
5
- "eval_runtime": 30.3359,
6
  "eval_samples": 4053,
7
- "eval_samples_per_second": 133.604,
8
- "eval_steps_per_second": 33.426,
9
- "perplexity": 20.01662535880206
10
  }
 
1
  {
2
+ "epoch": 35.0,
3
+ "eval_accuracy": 0.5302221081011683,
4
+ "eval_loss": 2.9193999767303467,
5
+ "eval_runtime": 31.3487,
6
  "eval_samples": 4053,
7
+ "eval_samples_per_second": 129.287,
8
+ "eval_steps_per_second": 32.346,
9
+ "perplexity": 18.530165592844845
10
  }
runs/Jul17_23-18-43_fe084eaf0329/events.out.tfevents.1721264859.fe084eaf0329.1588.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:359c054406426308763fd4c7fcc18b3e809ebf6ab4e0ade30f1f237069aa4e55
3
+ size 417
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 30.0,
3
- "total_flos": 1.5839169150106368e+17,
4
- "train_loss": 0.47119966579742084,
5
- "train_runtime": 6930.0607,
6
  "train_samples": 80219,
7
- "train_samples_per_second": 347.265,
8
- "train_steps_per_second": 21.705
9
  }
 
1
  {
2
+ "epoch": 35.0,
3
+ "total_flos": 1.8479030675124096e+17,
4
+ "train_loss": 0.37831091759340585,
5
+ "train_runtime": 6392.496,
6
  "train_samples": 80219,
7
+ "train_samples_per_second": 439.213,
8
+ "train_steps_per_second": 27.453
9
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 30.0,
5
  "eval_steps": 500,
6
- "global_step": 150420,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2161,12 +2161,371 @@
2161
  "train_runtime": 6930.0607,
2162
  "train_samples_per_second": 347.265,
2163
  "train_steps_per_second": 21.705
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2164
  }
2165
  ],
2166
  "logging_steps": 500,
2167
- "max_steps": 150420,
2168
  "num_input_tokens_seen": 0,
2169
- "num_train_epochs": 30,
2170
  "save_steps": 500,
2171
  "stateful_callbacks": {
2172
  "TrainerControl": {
@@ -2175,12 +2534,12 @@
2175
  "should_evaluate": false,
2176
  "should_log": false,
2177
  "should_save": true,
2178
- "should_training_stop": false
2179
  },
2180
  "attributes": {}
2181
  }
2182
  },
2183
- "total_flos": 1.5839169150106368e+17,
2184
  "train_batch_size": 16,
2185
  "trial_name": null,
2186
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 35.0,
5
  "eval_steps": 500,
6
+ "global_step": 175490,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2161
  "train_runtime": 6930.0607,
2162
  "train_samples_per_second": 347.265,
2163
  "train_steps_per_second": 21.705
2164
+ },
2165
+ {
2166
+ "epoch": 30.01595532508975,
2167
+ "grad_norm": 6.077478885650635,
2168
+ "learning_rate": 4.997720667844322e-05,
2169
+ "loss": 2.7286,
2170
+ "step": 150500
2171
+ },
2172
+ {
2173
+ "epoch": 30.11567610690068,
2174
+ "grad_norm": 6.566033363342285,
2175
+ "learning_rate": 4.983474841871332e-05,
2176
+ "loss": 2.7319,
2177
+ "step": 151000
2178
+ },
2179
+ {
2180
+ "epoch": 30.215396888711606,
2181
+ "grad_norm": 7.486234188079834,
2182
+ "learning_rate": 4.969229015898342e-05,
2183
+ "loss": 2.7899,
2184
+ "step": 151500
2185
+ },
2186
+ {
2187
+ "epoch": 30.315117670522536,
2188
+ "grad_norm": 7.640929222106934,
2189
+ "learning_rate": 4.954983189925352e-05,
2190
+ "loss": 2.7598,
2191
+ "step": 152000
2192
+ },
2193
+ {
2194
+ "epoch": 30.414838452333466,
2195
+ "grad_norm": 7.036547660827637,
2196
+ "learning_rate": 4.940737363952362e-05,
2197
+ "loss": 2.754,
2198
+ "step": 152500
2199
+ },
2200
+ {
2201
+ "epoch": 30.514559234144397,
2202
+ "grad_norm": 7.128058910369873,
2203
+ "learning_rate": 4.926491537979372e-05,
2204
+ "loss": 2.7888,
2205
+ "step": 153000
2206
+ },
2207
+ {
2208
+ "epoch": 30.614280015955327,
2209
+ "grad_norm": 7.1788249015808105,
2210
+ "learning_rate": 4.912245712006382e-05,
2211
+ "loss": 2.7662,
2212
+ "step": 153500
2213
+ },
2214
+ {
2215
+ "epoch": 30.714000797766253,
2216
+ "grad_norm": 7.081215858459473,
2217
+ "learning_rate": 4.897999886033392e-05,
2218
+ "loss": 2.7722,
2219
+ "step": 154000
2220
+ },
2221
+ {
2222
+ "epoch": 30.813721579577184,
2223
+ "grad_norm": 6.131695747375488,
2224
+ "learning_rate": 4.883754060060402e-05,
2225
+ "loss": 2.7464,
2226
+ "step": 154500
2227
+ },
2228
+ {
2229
+ "epoch": 30.913442361388114,
2230
+ "grad_norm": 6.66817569732666,
2231
+ "learning_rate": 4.869508234087412e-05,
2232
+ "loss": 2.7352,
2233
+ "step": 155000
2234
+ },
2235
+ {
2236
+ "epoch": 31.013163143199044,
2237
+ "grad_norm": 7.4430952072143555,
2238
+ "learning_rate": 4.8552908997663685e-05,
2239
+ "loss": 2.7503,
2240
+ "step": 155500
2241
+ },
2242
+ {
2243
+ "epoch": 31.11288392500997,
2244
+ "grad_norm": 7.984841346740723,
2245
+ "learning_rate": 4.8410450737933786e-05,
2246
+ "loss": 2.6821,
2247
+ "step": 156000
2248
+ },
2249
+ {
2250
+ "epoch": 31.2126047068209,
2251
+ "grad_norm": 7.386984348297119,
2252
+ "learning_rate": 4.8267992478203886e-05,
2253
+ "loss": 2.6916,
2254
+ "step": 156500
2255
+ },
2256
+ {
2257
+ "epoch": 31.31232548863183,
2258
+ "grad_norm": 6.3857951164245605,
2259
+ "learning_rate": 4.8125534218473987e-05,
2260
+ "loss": 2.6826,
2261
+ "step": 157000
2262
+ },
2263
+ {
2264
+ "epoch": 31.41204627044276,
2265
+ "grad_norm": 7.394888401031494,
2266
+ "learning_rate": 4.798307595874409e-05,
2267
+ "loss": 2.7099,
2268
+ "step": 157500
2269
+ },
2270
+ {
2271
+ "epoch": 31.51176705225369,
2272
+ "grad_norm": 7.39955997467041,
2273
+ "learning_rate": 4.784061769901419e-05,
2274
+ "loss": 2.7056,
2275
+ "step": 158000
2276
+ },
2277
+ {
2278
+ "epoch": 31.61148783406462,
2279
+ "grad_norm": 6.624033451080322,
2280
+ "learning_rate": 4.769844435580375e-05,
2281
+ "loss": 2.6903,
2282
+ "step": 158500
2283
+ },
2284
+ {
2285
+ "epoch": 31.71120861587555,
2286
+ "grad_norm": 6.656693458557129,
2287
+ "learning_rate": 4.755627101259331e-05,
2288
+ "loss": 2.6877,
2289
+ "step": 159000
2290
+ },
2291
+ {
2292
+ "epoch": 31.81092939768648,
2293
+ "grad_norm": 7.474542140960693,
2294
+ "learning_rate": 4.741381275286341e-05,
2295
+ "loss": 2.6965,
2296
+ "step": 159500
2297
+ },
2298
+ {
2299
+ "epoch": 31.910650179497406,
2300
+ "grad_norm": 7.388774394989014,
2301
+ "learning_rate": 4.727135449313351e-05,
2302
+ "loss": 2.7145,
2303
+ "step": 160000
2304
+ },
2305
+ {
2306
+ "epoch": 32.01037096130834,
2307
+ "grad_norm": 7.423541069030762,
2308
+ "learning_rate": 4.712889623340361e-05,
2309
+ "loss": 2.6943,
2310
+ "step": 160500
2311
+ },
2312
+ {
2313
+ "epoch": 32.11009174311926,
2314
+ "grad_norm": 6.063508033752441,
2315
+ "learning_rate": 4.698643797367371e-05,
2316
+ "loss": 2.6214,
2317
+ "step": 161000
2318
+ },
2319
+ {
2320
+ "epoch": 32.20981252493019,
2321
+ "grad_norm": 7.619082450866699,
2322
+ "learning_rate": 4.6843979713943814e-05,
2323
+ "loss": 2.6318,
2324
+ "step": 161500
2325
+ },
2326
+ {
2327
+ "epoch": 32.30953330674112,
2328
+ "grad_norm": 6.978066921234131,
2329
+ "learning_rate": 4.670152145421392e-05,
2330
+ "loss": 2.6327,
2331
+ "step": 162000
2332
+ },
2333
+ {
2334
+ "epoch": 32.40925408855205,
2335
+ "grad_norm": 6.166346073150635,
2336
+ "learning_rate": 4.655906319448402e-05,
2337
+ "loss": 2.6419,
2338
+ "step": 162500
2339
+ },
2340
+ {
2341
+ "epoch": 32.508974870362984,
2342
+ "grad_norm": 7.364738464355469,
2343
+ "learning_rate": 4.641660493475412e-05,
2344
+ "loss": 2.6356,
2345
+ "step": 163000
2346
+ },
2347
+ {
2348
+ "epoch": 32.608695652173914,
2349
+ "grad_norm": 7.476531982421875,
2350
+ "learning_rate": 4.627414667502422e-05,
2351
+ "loss": 2.6344,
2352
+ "step": 163500
2353
+ },
2354
+ {
2355
+ "epoch": 32.708416433984844,
2356
+ "grad_norm": 7.627068042755127,
2357
+ "learning_rate": 4.613168841529432e-05,
2358
+ "loss": 2.6434,
2359
+ "step": 164000
2360
+ },
2361
+ {
2362
+ "epoch": 32.808137215795774,
2363
+ "grad_norm": 7.334908962249756,
2364
+ "learning_rate": 4.598923015556442e-05,
2365
+ "loss": 2.663,
2366
+ "step": 164500
2367
+ },
2368
+ {
2369
+ "epoch": 32.907857997606705,
2370
+ "grad_norm": 6.580120086669922,
2371
+ "learning_rate": 4.5847341728873446e-05,
2372
+ "loss": 2.6406,
2373
+ "step": 165000
2374
+ },
2375
+ {
2376
+ "epoch": 33.00757877941763,
2377
+ "grad_norm": 6.953055381774902,
2378
+ "learning_rate": 4.570488346914355e-05,
2379
+ "loss": 2.6517,
2380
+ "step": 165500
2381
+ },
2382
+ {
2383
+ "epoch": 33.10729956122856,
2384
+ "grad_norm": 6.980926036834717,
2385
+ "learning_rate": 4.556242520941365e-05,
2386
+ "loss": 2.589,
2387
+ "step": 166000
2388
+ },
2389
+ {
2390
+ "epoch": 33.20702034303949,
2391
+ "grad_norm": 7.215412616729736,
2392
+ "learning_rate": 4.541996694968375e-05,
2393
+ "loss": 2.5831,
2394
+ "step": 166500
2395
+ },
2396
+ {
2397
+ "epoch": 33.30674112485042,
2398
+ "grad_norm": 7.203444004058838,
2399
+ "learning_rate": 4.527750868995385e-05,
2400
+ "loss": 2.5739,
2401
+ "step": 167000
2402
+ },
2403
+ {
2404
+ "epoch": 33.40646190666135,
2405
+ "grad_norm": 5.696502685546875,
2406
+ "learning_rate": 4.513505043022395e-05,
2407
+ "loss": 2.604,
2408
+ "step": 167500
2409
+ },
2410
+ {
2411
+ "epoch": 33.50618268847228,
2412
+ "grad_norm": 6.160342216491699,
2413
+ "learning_rate": 4.499259217049405e-05,
2414
+ "loss": 2.5848,
2415
+ "step": 168000
2416
+ },
2417
+ {
2418
+ "epoch": 33.60590347028321,
2419
+ "grad_norm": 6.758869171142578,
2420
+ "learning_rate": 4.485013391076415e-05,
2421
+ "loss": 2.6157,
2422
+ "step": 168500
2423
+ },
2424
+ {
2425
+ "epoch": 33.70562425209414,
2426
+ "grad_norm": 7.064002513885498,
2427
+ "learning_rate": 4.4708245484073166e-05,
2428
+ "loss": 2.5765,
2429
+ "step": 169000
2430
+ },
2431
+ {
2432
+ "epoch": 33.80534503390506,
2433
+ "grad_norm": 7.993391513824463,
2434
+ "learning_rate": 4.4565787224343267e-05,
2435
+ "loss": 2.6115,
2436
+ "step": 169500
2437
+ },
2438
+ {
2439
+ "epoch": 33.90506581571599,
2440
+ "grad_norm": 7.196022033691406,
2441
+ "learning_rate": 4.442332896461337e-05,
2442
+ "loss": 2.591,
2443
+ "step": 170000
2444
+ },
2445
+ {
2446
+ "epoch": 34.00478659752692,
2447
+ "grad_norm": 8.118667602539062,
2448
+ "learning_rate": 4.428115562140293e-05,
2449
+ "loss": 2.5833,
2450
+ "step": 170500
2451
+ },
2452
+ {
2453
+ "epoch": 34.10450737933785,
2454
+ "grad_norm": 7.465199947357178,
2455
+ "learning_rate": 4.413869736167303e-05,
2456
+ "loss": 2.5509,
2457
+ "step": 171000
2458
+ },
2459
+ {
2460
+ "epoch": 34.204228161148784,
2461
+ "grad_norm": 6.739304542541504,
2462
+ "learning_rate": 4.399623910194313e-05,
2463
+ "loss": 2.5357,
2464
+ "step": 171500
2465
+ },
2466
+ {
2467
+ "epoch": 34.303948942959714,
2468
+ "grad_norm": 6.758444786071777,
2469
+ "learning_rate": 4.385378084221323e-05,
2470
+ "loss": 2.567,
2471
+ "step": 172000
2472
+ },
2473
+ {
2474
+ "epoch": 34.403669724770644,
2475
+ "grad_norm": 6.511049270629883,
2476
+ "learning_rate": 4.371132258248333e-05,
2477
+ "loss": 2.5759,
2478
+ "step": 172500
2479
+ },
2480
+ {
2481
+ "epoch": 34.503390506581574,
2482
+ "grad_norm": 7.730967044830322,
2483
+ "learning_rate": 4.356886432275343e-05,
2484
+ "loss": 2.5494,
2485
+ "step": 173000
2486
+ },
2487
+ {
2488
+ "epoch": 34.6031112883925,
2489
+ "grad_norm": 6.543623924255371,
2490
+ "learning_rate": 4.342640606302353e-05,
2491
+ "loss": 2.5482,
2492
+ "step": 173500
2493
+ },
2494
+ {
2495
+ "epoch": 34.70283207020343,
2496
+ "grad_norm": 7.216828346252441,
2497
+ "learning_rate": 4.328394780329364e-05,
2498
+ "loss": 2.5593,
2499
+ "step": 174000
2500
+ },
2501
+ {
2502
+ "epoch": 34.80255285201436,
2503
+ "grad_norm": 6.891706943511963,
2504
+ "learning_rate": 4.3141774460083194e-05,
2505
+ "loss": 2.5409,
2506
+ "step": 174500
2507
+ },
2508
+ {
2509
+ "epoch": 34.90227363382529,
2510
+ "grad_norm": 7.4927778244018555,
2511
+ "learning_rate": 4.29993162003533e-05,
2512
+ "loss": 2.5673,
2513
+ "step": 175000
2514
+ },
2515
+ {
2516
+ "epoch": 35.0,
2517
+ "step": 175490,
2518
+ "total_flos": 1.8479030675124096e+17,
2519
+ "train_loss": 0.37831091759340585,
2520
+ "train_runtime": 6392.496,
2521
+ "train_samples_per_second": 439.213,
2522
+ "train_steps_per_second": 27.453
2523
  }
2524
  ],
2525
  "logging_steps": 500,
2526
+ "max_steps": 175490,
2527
  "num_input_tokens_seen": 0,
2528
+ "num_train_epochs": 35,
2529
  "save_steps": 500,
2530
  "stateful_callbacks": {
2531
  "TrainerControl": {
 
2534
  "should_evaluate": false,
2535
  "should_log": false,
2536
  "should_save": true,
2537
+ "should_training_stop": true
2538
  },
2539
  "attributes": {}
2540
  }
2541
  },
2542
+ "total_flos": 1.8479030675124096e+17,
2543
  "train_batch_size": 16,
2544
  "trial_name": null,
2545
  "trial_params": null