YL95 commited on
Commit
d117e5b
1 Parent(s): 4dd0882

training state at step 100

Browse files
Files changed (1) hide show
  1. trainer_state.json +78 -3
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.3103448275862069,
5
  "eval_steps": 1,
6
- "global_step": 95,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1387,6 +1387,81 @@
1387
  "eval_samples_per_second": 1.098,
1388
  "eval_steps_per_second": 0.549,
1389
  "step": 94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1390
  }
1391
  ],
1392
  "logging_steps": 1,
@@ -1406,7 +1481,7 @@
1406
  "attributes": {}
1407
  }
1408
  },
1409
- "total_flos": 1.2071987453003366e+17,
1410
  "train_batch_size": 2,
1411
  "trial_name": null,
1412
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.3793103448275863,
5
  "eval_steps": 1,
6
+ "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1387
  "eval_samples_per_second": 1.098,
1388
  "eval_steps_per_second": 0.549,
1389
  "step": 94
1390
+ },
1391
+ {
1392
+ "epoch": 1.3103448275862069,
1393
+ "grad_norm": 2.077143669128418,
1394
+ "learning_rate": 4.894543310469968e-05,
1395
+ "loss": 1.2378,
1396
+ "step": 95
1397
+ },
1398
+ {
1399
+ "epoch": 1.3103448275862069,
1400
+ "eval_loss": 0.6547893285751343,
1401
+ "eval_runtime": 18.7488,
1402
+ "eval_samples_per_second": 1.067,
1403
+ "eval_steps_per_second": 0.533,
1404
+ "step": 95
1405
+ },
1406
+ {
1407
+ "epoch": 1.3241379310344827,
1408
+ "grad_norm": 1.9517972469329834,
1409
+ "learning_rate": 4.882595527372152e-05,
1410
+ "loss": 1.0997,
1411
+ "step": 96
1412
+ },
1413
+ {
1414
+ "epoch": 1.3241379310344827,
1415
+ "eval_loss": 0.6498640775680542,
1416
+ "eval_runtime": 18.4304,
1417
+ "eval_samples_per_second": 1.085,
1418
+ "eval_steps_per_second": 0.543,
1419
+ "step": 96
1420
+ },
1421
+ {
1422
+ "epoch": 1.3379310344827586,
1423
+ "grad_norm": 2.0447959899902344,
1424
+ "learning_rate": 4.870022949890676e-05,
1425
+ "loss": 0.9613,
1426
+ "step": 97
1427
+ },
1428
+ {
1429
+ "epoch": 1.3379310344827586,
1430
+ "eval_loss": 0.6370054483413696,
1431
+ "eval_runtime": 18.252,
1432
+ "eval_samples_per_second": 1.096,
1433
+ "eval_steps_per_second": 0.548,
1434
+ "step": 97
1435
+ },
1436
+ {
1437
+ "epoch": 1.3517241379310345,
1438
+ "grad_norm": 2.078657865524292,
1439
+ "learning_rate": 4.856828874966086e-05,
1440
+ "loss": 1.1216,
1441
+ "step": 98
1442
+ },
1443
+ {
1444
+ "epoch": 1.3517241379310345,
1445
+ "eval_loss": 0.6291982531547546,
1446
+ "eval_runtime": 18.2386,
1447
+ "eval_samples_per_second": 1.097,
1448
+ "eval_steps_per_second": 0.548,
1449
+ "step": 98
1450
+ },
1451
+ {
1452
+ "epoch": 1.3655172413793104,
1453
+ "grad_norm": 2.0556623935699463,
1454
+ "learning_rate": 4.8430167625158595e-05,
1455
+ "loss": 1.0718,
1456
+ "step": 99
1457
+ },
1458
+ {
1459
+ "epoch": 1.3655172413793104,
1460
+ "eval_loss": 0.6218433380126953,
1461
+ "eval_runtime": 18.1671,
1462
+ "eval_samples_per_second": 1.101,
1463
+ "eval_steps_per_second": 0.55,
1464
+ "step": 99
1465
  }
1466
  ],
1467
  "logging_steps": 1,
 
1481
  "attributes": {}
1482
  }
1483
  },
1484
+ "total_flos": 1.2709753377329971e+17,
1485
  "train_batch_size": 2,
1486
  "trial_name": null,
1487
  "trial_params": null