training state at step 100
Browse files- trainer_state.json +78 -3
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 1,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1387,6 +1387,81 @@
|
|
1387 |
"eval_samples_per_second": 1.098,
|
1388 |
"eval_steps_per_second": 0.549,
|
1389 |
"step": 94
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1390 |
}
|
1391 |
],
|
1392 |
"logging_steps": 1,
|
@@ -1406,7 +1481,7 @@
|
|
1406 |
"attributes": {}
|
1407 |
}
|
1408 |
},
|
1409 |
-
"total_flos": 1.
|
1410 |
"train_batch_size": 2,
|
1411 |
"trial_name": null,
|
1412 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.3793103448275863,
|
5 |
"eval_steps": 1,
|
6 |
+
"global_step": 100,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1387 |
"eval_samples_per_second": 1.098,
|
1388 |
"eval_steps_per_second": 0.549,
|
1389 |
"step": 94
|
1390 |
+
},
|
1391 |
+
{
|
1392 |
+
"epoch": 1.3103448275862069,
|
1393 |
+
"grad_norm": 2.077143669128418,
|
1394 |
+
"learning_rate": 4.894543310469968e-05,
|
1395 |
+
"loss": 1.2378,
|
1396 |
+
"step": 95
|
1397 |
+
},
|
1398 |
+
{
|
1399 |
+
"epoch": 1.3103448275862069,
|
1400 |
+
"eval_loss": 0.6547893285751343,
|
1401 |
+
"eval_runtime": 18.7488,
|
1402 |
+
"eval_samples_per_second": 1.067,
|
1403 |
+
"eval_steps_per_second": 0.533,
|
1404 |
+
"step": 95
|
1405 |
+
},
|
1406 |
+
{
|
1407 |
+
"epoch": 1.3241379310344827,
|
1408 |
+
"grad_norm": 1.9517972469329834,
|
1409 |
+
"learning_rate": 4.882595527372152e-05,
|
1410 |
+
"loss": 1.0997,
|
1411 |
+
"step": 96
|
1412 |
+
},
|
1413 |
+
{
|
1414 |
+
"epoch": 1.3241379310344827,
|
1415 |
+
"eval_loss": 0.6498640775680542,
|
1416 |
+
"eval_runtime": 18.4304,
|
1417 |
+
"eval_samples_per_second": 1.085,
|
1418 |
+
"eval_steps_per_second": 0.543,
|
1419 |
+
"step": 96
|
1420 |
+
},
|
1421 |
+
{
|
1422 |
+
"epoch": 1.3379310344827586,
|
1423 |
+
"grad_norm": 2.0447959899902344,
|
1424 |
+
"learning_rate": 4.870022949890676e-05,
|
1425 |
+
"loss": 0.9613,
|
1426 |
+
"step": 97
|
1427 |
+
},
|
1428 |
+
{
|
1429 |
+
"epoch": 1.3379310344827586,
|
1430 |
+
"eval_loss": 0.6370054483413696,
|
1431 |
+
"eval_runtime": 18.252,
|
1432 |
+
"eval_samples_per_second": 1.096,
|
1433 |
+
"eval_steps_per_second": 0.548,
|
1434 |
+
"step": 97
|
1435 |
+
},
|
1436 |
+
{
|
1437 |
+
"epoch": 1.3517241379310345,
|
1438 |
+
"grad_norm": 2.078657865524292,
|
1439 |
+
"learning_rate": 4.856828874966086e-05,
|
1440 |
+
"loss": 1.1216,
|
1441 |
+
"step": 98
|
1442 |
+
},
|
1443 |
+
{
|
1444 |
+
"epoch": 1.3517241379310345,
|
1445 |
+
"eval_loss": 0.6291982531547546,
|
1446 |
+
"eval_runtime": 18.2386,
|
1447 |
+
"eval_samples_per_second": 1.097,
|
1448 |
+
"eval_steps_per_second": 0.548,
|
1449 |
+
"step": 98
|
1450 |
+
},
|
1451 |
+
{
|
1452 |
+
"epoch": 1.3655172413793104,
|
1453 |
+
"grad_norm": 2.0556623935699463,
|
1454 |
+
"learning_rate": 4.8430167625158595e-05,
|
1455 |
+
"loss": 1.0718,
|
1456 |
+
"step": 99
|
1457 |
+
},
|
1458 |
+
{
|
1459 |
+
"epoch": 1.3655172413793104,
|
1460 |
+
"eval_loss": 0.6218433380126953,
|
1461 |
+
"eval_runtime": 18.1671,
|
1462 |
+
"eval_samples_per_second": 1.101,
|
1463 |
+
"eval_steps_per_second": 0.55,
|
1464 |
+
"step": 99
|
1465 |
}
|
1466 |
],
|
1467 |
"logging_steps": 1,
|
|
|
1481 |
"attributes": {}
|
1482 |
}
|
1483 |
},
|
1484 |
+
"total_flos": 1.2709753377329971e+17,
|
1485 |
"train_batch_size": 2,
|
1486 |
"trial_name": null,
|
1487 |
"trial_params": null
|