|
{ |
|
"best_metric": 0.29289329051971436, |
|
"best_model_checkpoint": "/content/aptner_deberta/checkpoint-1000", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 8430, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.881376037959668e-05, |
|
"loss": 0.6136, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_accuracy": 0.9171865696756323, |
|
"eval_f1": 0.5143439638852483, |
|
"eval_loss": 0.3298434615135193, |
|
"eval_precision": 0.5007088176920896, |
|
"eval_recall": 0.5287425149700599, |
|
"eval_runtime": 11.2485, |
|
"eval_samples_per_second": 155.488, |
|
"eval_steps_per_second": 19.469, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.762752075919336e-05, |
|
"loss": 0.308, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"eval_accuracy": 0.9205001423800772, |
|
"eval_f1": 0.5688849970811442, |
|
"eval_loss": 0.29289329051971436, |
|
"eval_precision": 0.5549544419134397, |
|
"eval_recall": 0.5835329341317366, |
|
"eval_runtime": 11.4351, |
|
"eval_samples_per_second": 152.95, |
|
"eval_steps_per_second": 19.152, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 1.6441281138790037e-05, |
|
"loss": 0.2428, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"eval_accuracy": 0.9176525408371948, |
|
"eval_f1": 0.5728531855955679, |
|
"eval_loss": 0.31239837408065796, |
|
"eval_precision": 0.5329896907216495, |
|
"eval_recall": 0.6191616766467066, |
|
"eval_runtime": 11.5194, |
|
"eval_samples_per_second": 151.831, |
|
"eval_steps_per_second": 19.011, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.5255041518386714e-05, |
|
"loss": 0.2088, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"eval_accuracy": 0.9146755028605452, |
|
"eval_f1": 0.5848287112561175, |
|
"eval_loss": 0.32038480043411255, |
|
"eval_precision": 0.5356075697211156, |
|
"eval_recall": 0.6440119760479042, |
|
"eval_runtime": 12.1642, |
|
"eval_samples_per_second": 143.783, |
|
"eval_steps_per_second": 18.004, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 1.4068801897983393e-05, |
|
"loss": 0.1783, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"eval_accuracy": 0.9149343757280799, |
|
"eval_f1": 0.6023742830465519, |
|
"eval_loss": 0.3319493234157562, |
|
"eval_precision": 0.5431801780129901, |
|
"eval_recall": 0.6760479041916168, |
|
"eval_runtime": 11.6403, |
|
"eval_samples_per_second": 150.254, |
|
"eval_steps_per_second": 18.814, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 1.2882562277580073e-05, |
|
"loss": 0.1434, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"eval_accuracy": 0.9202930440860494, |
|
"eval_f1": 0.6036738101864737, |
|
"eval_loss": 0.3370673358440399, |
|
"eval_precision": 0.5639625585023401, |
|
"eval_recall": 0.6494011976047904, |
|
"eval_runtime": 11.5779, |
|
"eval_samples_per_second": 151.064, |
|
"eval_steps_per_second": 18.915, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 1.169632265717675e-05, |
|
"loss": 0.1352, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"eval_accuracy": 0.9135364622433922, |
|
"eval_f1": 0.5807708362320857, |
|
"eval_loss": 0.3826988637447357, |
|
"eval_precision": 0.5425006498570315, |
|
"eval_recall": 0.6248502994011976, |
|
"eval_runtime": 11.732, |
|
"eval_samples_per_second": 149.08, |
|
"eval_steps_per_second": 18.667, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 1.0510083036773429e-05, |
|
"loss": 0.1135, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"eval_accuracy": 0.9135623495301457, |
|
"eval_f1": 0.5979081159804052, |
|
"eval_loss": 0.3862306475639343, |
|
"eval_precision": 0.5359601234274863, |
|
"eval_recall": 0.6760479041916168, |
|
"eval_runtime": 11.4556, |
|
"eval_samples_per_second": 152.676, |
|
"eval_steps_per_second": 19.117, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 9.323843416370107e-06, |
|
"loss": 0.0987, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"eval_accuracy": 0.9141318698387222, |
|
"eval_f1": 0.5920873124147339, |
|
"eval_loss": 0.3977762758731842, |
|
"eval_precision": 0.543859649122807, |
|
"eval_recall": 0.6497005988023952, |
|
"eval_runtime": 11.4436, |
|
"eval_samples_per_second": 152.836, |
|
"eval_steps_per_second": 19.137, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 8.137603795966786e-06, |
|
"loss": 0.0942, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"eval_accuracy": 0.9224934634600948, |
|
"eval_f1": 0.6091399375532217, |
|
"eval_loss": 0.37382081151008606, |
|
"eval_precision": 0.5790609821910415, |
|
"eval_recall": 0.6425149700598802, |
|
"eval_runtime": 12.0507, |
|
"eval_samples_per_second": 145.137, |
|
"eval_steps_per_second": 18.173, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 6.951364175563464e-06, |
|
"loss": 0.0746, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"eval_accuracy": 0.9161251909187398, |
|
"eval_f1": 0.5943422136775611, |
|
"eval_loss": 0.4268680810928345, |
|
"eval_precision": 0.5489599188229325, |
|
"eval_recall": 0.6479041916167665, |
|
"eval_runtime": 11.3716, |
|
"eval_samples_per_second": 153.804, |
|
"eval_steps_per_second": 19.258, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"learning_rate": 5.765124555160143e-06, |
|
"loss": 0.0727, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"eval_accuracy": 0.9170571332418649, |
|
"eval_f1": 0.5977203224909646, |
|
"eval_loss": 0.4236016869544983, |
|
"eval_precision": 0.5578619615983393, |
|
"eval_recall": 0.6437125748502994, |
|
"eval_runtime": 11.9963, |
|
"eval_samples_per_second": 145.795, |
|
"eval_steps_per_second": 18.256, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"learning_rate": 4.5788849347568215e-06, |
|
"loss": 0.0661, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"eval_accuracy": 0.9200341712185146, |
|
"eval_f1": 0.603626220362622, |
|
"eval_loss": 0.4239303171634674, |
|
"eval_precision": 0.5650130548302872, |
|
"eval_recall": 0.6479041916167665, |
|
"eval_runtime": 12.2361, |
|
"eval_samples_per_second": 142.938, |
|
"eval_steps_per_second": 17.898, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 3.3926453143535e-06, |
|
"loss": 0.0578, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"eval_accuracy": 0.9175231044034274, |
|
"eval_f1": 0.5931846865797223, |
|
"eval_loss": 0.44854551553726196, |
|
"eval_precision": 0.5579002901609074, |
|
"eval_recall": 0.6332335329341318, |
|
"eval_runtime": 12.2264, |
|
"eval_samples_per_second": 143.051, |
|
"eval_steps_per_second": 17.912, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"learning_rate": 2.2064056939501782e-06, |
|
"loss": 0.0505, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"eval_accuracy": 0.9162546273525072, |
|
"eval_f1": 0.5922411387105778, |
|
"eval_loss": 0.4552724361419678, |
|
"eval_precision": 0.5546262415054888, |
|
"eval_recall": 0.6353293413173653, |
|
"eval_runtime": 12.0192, |
|
"eval_samples_per_second": 145.517, |
|
"eval_steps_per_second": 18.221, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"learning_rate": 1.0201660735468566e-06, |
|
"loss": 0.0513, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"eval_accuracy": 0.9171347951021254, |
|
"eval_f1": 0.597688344241749, |
|
"eval_loss": 0.46287792921066284, |
|
"eval_precision": 0.5587086696172872, |
|
"eval_recall": 0.6425149700598802, |
|
"eval_runtime": 11.7878, |
|
"eval_samples_per_second": 148.374, |
|
"eval_steps_per_second": 18.579, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 8430, |
|
"total_flos": 2261553362554848.0, |
|
"train_loss": 0.15120792004278688, |
|
"train_runtime": 2180.3385, |
|
"train_samples_per_second": 30.931, |
|
"train_steps_per_second": 3.866 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 8430, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 2261553362554848.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|