|
{ |
|
"best_metric": 0.9358974358974359, |
|
"best_model_checkpoint": "output-models/checkpoint-470", |
|
"epoch": 40.0, |
|
"eval_steps": 500, |
|
"global_step": 1880, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"step": 47, |
|
"train_accuracy": 0.7072649572649573, |
|
"train_loss": 0.6782774329185486, |
|
"train_runtime": 13.5956, |
|
"train_samples_per_second": 34.423, |
|
"train_steps_per_second": 8.606 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7243589743589743, |
|
"eval_loss": 0.6180987358093262, |
|
"eval_runtime": 35.9375, |
|
"eval_samples_per_second": 4.341, |
|
"eval_steps_per_second": 1.085, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 94, |
|
"train_accuracy": 0.75, |
|
"train_loss": 0.5690865516662598, |
|
"train_runtime": 13.7448, |
|
"train_samples_per_second": 34.049, |
|
"train_steps_per_second": 8.512 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8012820512820513, |
|
"eval_loss": 0.5130882263183594, |
|
"eval_runtime": 4.4157, |
|
"eval_samples_per_second": 35.328, |
|
"eval_steps_per_second": 8.832, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 141, |
|
"train_accuracy": 0.7350427350427351, |
|
"train_loss": 0.6460429430007935, |
|
"train_runtime": 14.0717, |
|
"train_samples_per_second": 33.258, |
|
"train_steps_per_second": 8.315 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8205128205128205, |
|
"eval_loss": 0.45179083943367004, |
|
"eval_runtime": 4.8055, |
|
"eval_samples_per_second": 32.463, |
|
"eval_steps_per_second": 8.116, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 188, |
|
"train_accuracy": 0.8098290598290598, |
|
"train_loss": 0.39663246273994446, |
|
"train_runtime": 13.7878, |
|
"train_samples_per_second": 33.943, |
|
"train_steps_per_second": 8.486 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8846153846153846, |
|
"eval_loss": 0.2980358898639679, |
|
"eval_runtime": 4.4674, |
|
"eval_samples_per_second": 34.919, |
|
"eval_steps_per_second": 8.73, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 235, |
|
"train_accuracy": 0.8632478632478633, |
|
"train_loss": 0.3636291027069092, |
|
"train_runtime": 13.58, |
|
"train_samples_per_second": 34.462, |
|
"train_steps_per_second": 8.616 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8717948717948718, |
|
"eval_loss": 0.2997760772705078, |
|
"eval_runtime": 4.9986, |
|
"eval_samples_per_second": 31.208, |
|
"eval_steps_per_second": 7.802, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"step": 282, |
|
"train_accuracy": 0.8376068376068376, |
|
"train_loss": 0.4208720922470093, |
|
"train_runtime": 13.5735, |
|
"train_samples_per_second": 34.479, |
|
"train_steps_per_second": 8.62 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9102564102564102, |
|
"eval_loss": 0.33183348178863525, |
|
"eval_runtime": 4.4091, |
|
"eval_samples_per_second": 35.381, |
|
"eval_steps_per_second": 8.845, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"step": 329, |
|
"train_accuracy": 0.8568376068376068, |
|
"train_loss": 0.32207924127578735, |
|
"train_runtime": 13.5635, |
|
"train_samples_per_second": 34.504, |
|
"train_steps_per_second": 8.626 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.24086996912956238, |
|
"eval_runtime": 4.7402, |
|
"eval_samples_per_second": 32.91, |
|
"eval_steps_per_second": 8.228, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"step": 376, |
|
"train_accuracy": 0.8846153846153846, |
|
"train_loss": 0.3257001042366028, |
|
"train_runtime": 13.9348, |
|
"train_samples_per_second": 33.585, |
|
"train_steps_per_second": 8.396 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8717948717948718, |
|
"eval_loss": 0.3424080014228821, |
|
"eval_runtime": 4.9539, |
|
"eval_samples_per_second": 31.49, |
|
"eval_steps_per_second": 7.873, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"step": 423, |
|
"train_accuracy": 0.8717948717948718, |
|
"train_loss": 0.2687961757183075, |
|
"train_runtime": 13.6829, |
|
"train_samples_per_second": 34.203, |
|
"train_steps_per_second": 8.551 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.25928938388824463, |
|
"eval_runtime": 4.4508, |
|
"eval_samples_per_second": 35.05, |
|
"eval_steps_per_second": 8.762, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 470, |
|
"train_accuracy": 0.9102564102564102, |
|
"train_loss": 0.2113831341266632, |
|
"train_runtime": 13.44, |
|
"train_samples_per_second": 34.821, |
|
"train_steps_per_second": 8.705 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9358974358974359, |
|
"eval_loss": 0.25084006786346436, |
|
"eval_runtime": 4.8092, |
|
"eval_samples_per_second": 32.438, |
|
"eval_steps_per_second": 8.109, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 10.64, |
|
"grad_norm": 7.874776840209961, |
|
"learning_rate": 1.4680851063829789e-05, |
|
"loss": 0.4595, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"step": 517, |
|
"train_accuracy": 0.8995726495726496, |
|
"train_loss": 0.25938984751701355, |
|
"train_runtime": 13.5841, |
|
"train_samples_per_second": 34.452, |
|
"train_steps_per_second": 8.613 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.9294871794871795, |
|
"eval_loss": 0.32409772276878357, |
|
"eval_runtime": 4.4182, |
|
"eval_samples_per_second": 35.308, |
|
"eval_steps_per_second": 8.827, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"step": 564, |
|
"train_accuracy": 0.8952991452991453, |
|
"train_loss": 0.26763853430747986, |
|
"train_runtime": 13.8203, |
|
"train_samples_per_second": 33.863, |
|
"train_steps_per_second": 8.466 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8910256410256411, |
|
"eval_loss": 0.3308241069316864, |
|
"eval_runtime": 4.4447, |
|
"eval_samples_per_second": 35.098, |
|
"eval_steps_per_second": 8.774, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"step": 611, |
|
"train_accuracy": 0.9081196581196581, |
|
"train_loss": 0.23129615187644958, |
|
"train_runtime": 13.4973, |
|
"train_samples_per_second": 34.674, |
|
"train_steps_per_second": 8.668 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.9294871794871795, |
|
"eval_loss": 0.255931556224823, |
|
"eval_runtime": 4.6436, |
|
"eval_samples_per_second": 33.595, |
|
"eval_steps_per_second": 8.399, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"step": 658, |
|
"train_accuracy": 0.8846153846153846, |
|
"train_loss": 0.31185245513916016, |
|
"train_runtime": 13.5189, |
|
"train_samples_per_second": 34.618, |
|
"train_steps_per_second": 8.655 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.3017214834690094, |
|
"eval_runtime": 4.6078, |
|
"eval_samples_per_second": 33.856, |
|
"eval_steps_per_second": 8.464, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"step": 705, |
|
"train_accuracy": 0.9166666666666666, |
|
"train_loss": 0.3007480502128601, |
|
"train_runtime": 13.4373, |
|
"train_samples_per_second": 34.829, |
|
"train_steps_per_second": 8.707 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.9038461538461539, |
|
"eval_loss": 0.4220944046974182, |
|
"eval_runtime": 4.9304, |
|
"eval_samples_per_second": 31.64, |
|
"eval_steps_per_second": 7.91, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"step": 752, |
|
"train_accuracy": 0.9188034188034188, |
|
"train_loss": 0.20939397811889648, |
|
"train_runtime": 13.6683, |
|
"train_samples_per_second": 34.24, |
|
"train_steps_per_second": 8.56 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.36174264550209045, |
|
"eval_runtime": 4.4608, |
|
"eval_samples_per_second": 34.971, |
|
"eval_steps_per_second": 8.743, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"step": 799, |
|
"train_accuracy": 0.9209401709401709, |
|
"train_loss": 0.18879051506519318, |
|
"train_runtime": 13.7929, |
|
"train_samples_per_second": 33.931, |
|
"train_steps_per_second": 8.483 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.9102564102564102, |
|
"eval_loss": 0.35188791155815125, |
|
"eval_runtime": 4.7732, |
|
"eval_samples_per_second": 32.683, |
|
"eval_steps_per_second": 8.171, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"step": 846, |
|
"train_accuracy": 0.8952991452991453, |
|
"train_loss": 0.25016605854034424, |
|
"train_runtime": 13.3521, |
|
"train_samples_per_second": 35.051, |
|
"train_steps_per_second": 8.763 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.9102564102564102, |
|
"eval_loss": 0.3965354859828949, |
|
"eval_runtime": 4.3963, |
|
"eval_samples_per_second": 35.485, |
|
"eval_steps_per_second": 8.871, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"step": 893, |
|
"train_accuracy": 0.9209401709401709, |
|
"train_loss": 0.1891285479068756, |
|
"train_runtime": 13.4884, |
|
"train_samples_per_second": 34.696, |
|
"train_steps_per_second": 8.674 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.9038461538461539, |
|
"eval_loss": 0.31604066491127014, |
|
"eval_runtime": 5.1415, |
|
"eval_samples_per_second": 30.341, |
|
"eval_steps_per_second": 7.585, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 940, |
|
"train_accuracy": 0.9401709401709402, |
|
"train_loss": 0.1873449832201004, |
|
"train_runtime": 13.9057, |
|
"train_samples_per_second": 33.655, |
|
"train_steps_per_second": 8.414 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9294871794871795, |
|
"eval_loss": 0.3332672119140625, |
|
"eval_runtime": 4.9421, |
|
"eval_samples_per_second": 31.565, |
|
"eval_steps_per_second": 7.891, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"step": 987, |
|
"train_accuracy": 0.9230769230769231, |
|
"train_loss": 0.18881197273731232, |
|
"train_runtime": 13.5338, |
|
"train_samples_per_second": 34.58, |
|
"train_steps_per_second": 8.645 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.8910256410256411, |
|
"eval_loss": 0.3720751404762268, |
|
"eval_runtime": 4.8223, |
|
"eval_samples_per_second": 32.35, |
|
"eval_steps_per_second": 8.088, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 21.28, |
|
"grad_norm": 5.682499408721924, |
|
"learning_rate": 9.361702127659576e-06, |
|
"loss": 0.2485, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"step": 1034, |
|
"train_accuracy": 0.9444444444444444, |
|
"train_loss": 0.1338244080543518, |
|
"train_runtime": 13.6664, |
|
"train_samples_per_second": 34.245, |
|
"train_steps_per_second": 8.561 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.3777410686016083, |
|
"eval_runtime": 4.3337, |
|
"eval_samples_per_second": 35.997, |
|
"eval_steps_per_second": 8.999, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"step": 1081, |
|
"train_accuracy": 0.9252136752136753, |
|
"train_loss": 0.18711484968662262, |
|
"train_runtime": 13.702, |
|
"train_samples_per_second": 34.155, |
|
"train_steps_per_second": 8.539 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.9038461538461539, |
|
"eval_loss": 0.3984796702861786, |
|
"eval_runtime": 4.5267, |
|
"eval_samples_per_second": 34.462, |
|
"eval_steps_per_second": 8.616, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"step": 1128, |
|
"train_accuracy": 0.9444444444444444, |
|
"train_loss": 0.161672905087471, |
|
"train_runtime": 13.9523, |
|
"train_samples_per_second": 33.543, |
|
"train_steps_per_second": 8.386 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.9230769230769231, |
|
"eval_loss": 0.38641923666000366, |
|
"eval_runtime": 5.1128, |
|
"eval_samples_per_second": 30.511, |
|
"eval_steps_per_second": 7.628, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"step": 1175, |
|
"train_accuracy": 0.9444444444444444, |
|
"train_loss": 0.17891307175159454, |
|
"train_runtime": 13.7483, |
|
"train_samples_per_second": 34.041, |
|
"train_steps_per_second": 8.51 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.9230769230769231, |
|
"eval_loss": 0.42098188400268555, |
|
"eval_runtime": 5.0358, |
|
"eval_samples_per_second": 30.978, |
|
"eval_steps_per_second": 7.745, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"step": 1222, |
|
"train_accuracy": 0.9572649572649573, |
|
"train_loss": 0.10899731516838074, |
|
"train_runtime": 14.0913, |
|
"train_samples_per_second": 33.212, |
|
"train_steps_per_second": 8.303 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.9038461538461539, |
|
"eval_loss": 0.4160342216491699, |
|
"eval_runtime": 4.7918, |
|
"eval_samples_per_second": 32.555, |
|
"eval_steps_per_second": 8.139, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"step": 1269, |
|
"train_accuracy": 0.938034188034188, |
|
"train_loss": 0.16018715500831604, |
|
"train_runtime": 13.888, |
|
"train_samples_per_second": 33.698, |
|
"train_steps_per_second": 8.425 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.9102564102564102, |
|
"eval_loss": 0.39854034781455994, |
|
"eval_runtime": 4.8553, |
|
"eval_samples_per_second": 32.13, |
|
"eval_steps_per_second": 8.032, |
|
"step": 1269 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"step": 1316, |
|
"train_accuracy": 0.9444444444444444, |
|
"train_loss": 0.14988763630390167, |
|
"train_runtime": 13.7687, |
|
"train_samples_per_second": 33.99, |
|
"train_steps_per_second": 8.498 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.9102564102564102, |
|
"eval_loss": 0.40767335891723633, |
|
"eval_runtime": 4.434, |
|
"eval_samples_per_second": 35.182, |
|
"eval_steps_per_second": 8.796, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"step": 1363, |
|
"train_accuracy": 0.9316239316239316, |
|
"train_loss": 0.17876969277858734, |
|
"train_runtime": 13.3686, |
|
"train_samples_per_second": 35.008, |
|
"train_steps_per_second": 8.752 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.8782051282051282, |
|
"eval_loss": 0.6035234928131104, |
|
"eval_runtime": 4.8916, |
|
"eval_samples_per_second": 31.891, |
|
"eval_steps_per_second": 7.973, |
|
"step": 1363 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"step": 1410, |
|
"train_accuracy": 0.9572649572649573, |
|
"train_loss": 0.12170404940843582, |
|
"train_runtime": 13.3084, |
|
"train_samples_per_second": 35.166, |
|
"train_steps_per_second": 8.791 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.9230769230769231, |
|
"eval_loss": 0.3604837954044342, |
|
"eval_runtime": 4.5664, |
|
"eval_samples_per_second": 34.163, |
|
"eval_steps_per_second": 8.541, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"step": 1457, |
|
"train_accuracy": 0.9444444444444444, |
|
"train_loss": 0.17193935811519623, |
|
"train_runtime": 13.3567, |
|
"train_samples_per_second": 35.039, |
|
"train_steps_per_second": 8.76 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.9038461538461539, |
|
"eval_loss": 0.42400404810905457, |
|
"eval_runtime": 4.3974, |
|
"eval_samples_per_second": 35.475, |
|
"eval_steps_per_second": 8.869, |
|
"step": 1457 |
|
}, |
|
{ |
|
"epoch": 31.91, |
|
"grad_norm": 0.40821418166160583, |
|
"learning_rate": 4.042553191489362e-06, |
|
"loss": 0.1715, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"step": 1504, |
|
"train_accuracy": 0.9551282051282052, |
|
"train_loss": 0.15071353316307068, |
|
"train_runtime": 13.338, |
|
"train_samples_per_second": 35.088, |
|
"train_steps_per_second": 8.772 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.3800322711467743, |
|
"eval_runtime": 4.4234, |
|
"eval_samples_per_second": 35.267, |
|
"eval_steps_per_second": 8.817, |
|
"step": 1504 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"step": 1551, |
|
"train_accuracy": 0.9423076923076923, |
|
"train_loss": 0.15298214554786682, |
|
"train_runtime": 13.6206, |
|
"train_samples_per_second": 34.36, |
|
"train_steps_per_second": 8.59 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.9038461538461539, |
|
"eval_loss": 0.42538413405418396, |
|
"eval_runtime": 4.5076, |
|
"eval_samples_per_second": 34.608, |
|
"eval_steps_per_second": 8.652, |
|
"step": 1551 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"step": 1598, |
|
"train_accuracy": 0.9615384615384616, |
|
"train_loss": 0.09072276204824448, |
|
"train_runtime": 13.7151, |
|
"train_samples_per_second": 34.123, |
|
"train_steps_per_second": 8.531 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.9230769230769231, |
|
"eval_loss": 0.41397902369499207, |
|
"eval_runtime": 4.448, |
|
"eval_samples_per_second": 35.072, |
|
"eval_steps_per_second": 8.768, |
|
"step": 1598 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"step": 1645, |
|
"train_accuracy": 0.9594017094017094, |
|
"train_loss": 0.15199129283428192, |
|
"train_runtime": 13.6214, |
|
"train_samples_per_second": 34.358, |
|
"train_steps_per_second": 8.589 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.9230769230769231, |
|
"eval_loss": 0.39104607701301575, |
|
"eval_runtime": 4.4304, |
|
"eval_samples_per_second": 35.211, |
|
"eval_steps_per_second": 8.803, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"step": 1692, |
|
"train_accuracy": 0.9594017094017094, |
|
"train_loss": 0.134719118475914, |
|
"train_runtime": 13.9053, |
|
"train_samples_per_second": 33.656, |
|
"train_steps_per_second": 8.414 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.9102564102564102, |
|
"eval_loss": 0.4161369204521179, |
|
"eval_runtime": 4.6871, |
|
"eval_samples_per_second": 33.283, |
|
"eval_steps_per_second": 8.321, |
|
"step": 1692 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"step": 1739, |
|
"train_accuracy": 0.9529914529914529, |
|
"train_loss": 0.16535791754722595, |
|
"train_runtime": 14.0143, |
|
"train_samples_per_second": 33.394, |
|
"train_steps_per_second": 8.349 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.9102564102564102, |
|
"eval_loss": 0.43847039341926575, |
|
"eval_runtime": 4.8404, |
|
"eval_samples_per_second": 32.229, |
|
"eval_steps_per_second": 8.057, |
|
"step": 1739 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"step": 1786, |
|
"train_accuracy": 0.9487179487179487, |
|
"train_loss": 0.11996147781610489, |
|
"train_runtime": 14.1086, |
|
"train_samples_per_second": 33.171, |
|
"train_steps_per_second": 8.293 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.41302695870399475, |
|
"eval_runtime": 4.7702, |
|
"eval_samples_per_second": 32.703, |
|
"eval_steps_per_second": 8.176, |
|
"step": 1786 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"step": 1833, |
|
"train_accuracy": 0.9529914529914529, |
|
"train_loss": 0.17013560235500336, |
|
"train_runtime": 13.8197, |
|
"train_samples_per_second": 33.865, |
|
"train_steps_per_second": 8.466 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.4181523323059082, |
|
"eval_runtime": 5.0402, |
|
"eval_samples_per_second": 30.951, |
|
"eval_steps_per_second": 7.738, |
|
"step": 1833 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"step": 1880, |
|
"train_accuracy": 0.9551282051282052, |
|
"train_loss": 0.11466003954410553, |
|
"train_runtime": 13.6881, |
|
"train_samples_per_second": 34.19, |
|
"train_steps_per_second": 8.548 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.9166666666666666, |
|
"eval_loss": 0.4133930802345276, |
|
"eval_runtime": 4.7653, |
|
"eval_samples_per_second": 32.736, |
|
"eval_steps_per_second": 8.184, |
|
"step": 1880 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 1880, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 40, |
|
"save_steps": 500, |
|
"total_flos": 5.128065177052447e+18, |
|
"train_batch_size": 10, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|