DeBERTa-APTNER / trainer_state.json
Anonymous
Upload folder using huggingface_hub
c75fff0
{
"best_metric": 0.29289329051971436,
"best_model_checkpoint": "/content/aptner_deberta/checkpoint-1000",
"epoch": 10.0,
"eval_steps": 500,
"global_step": 8430,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.59,
"learning_rate": 1.881376037959668e-05,
"loss": 0.6136,
"step": 500
},
{
"epoch": 0.59,
"eval_accuracy": 0.9171865696756323,
"eval_f1": 0.5143439638852483,
"eval_loss": 0.3298434615135193,
"eval_precision": 0.5007088176920896,
"eval_recall": 0.5287425149700599,
"eval_runtime": 11.2485,
"eval_samples_per_second": 155.488,
"eval_steps_per_second": 19.469,
"step": 500
},
{
"epoch": 1.19,
"learning_rate": 1.762752075919336e-05,
"loss": 0.308,
"step": 1000
},
{
"epoch": 1.19,
"eval_accuracy": 0.9205001423800772,
"eval_f1": 0.5688849970811442,
"eval_loss": 0.29289329051971436,
"eval_precision": 0.5549544419134397,
"eval_recall": 0.5835329341317366,
"eval_runtime": 11.4351,
"eval_samples_per_second": 152.95,
"eval_steps_per_second": 19.152,
"step": 1000
},
{
"epoch": 1.78,
"learning_rate": 1.6441281138790037e-05,
"loss": 0.2428,
"step": 1500
},
{
"epoch": 1.78,
"eval_accuracy": 0.9176525408371948,
"eval_f1": 0.5728531855955679,
"eval_loss": 0.31239837408065796,
"eval_precision": 0.5329896907216495,
"eval_recall": 0.6191616766467066,
"eval_runtime": 11.5194,
"eval_samples_per_second": 151.831,
"eval_steps_per_second": 19.011,
"step": 1500
},
{
"epoch": 2.37,
"learning_rate": 1.5255041518386714e-05,
"loss": 0.2088,
"step": 2000
},
{
"epoch": 2.37,
"eval_accuracy": 0.9146755028605452,
"eval_f1": 0.5848287112561175,
"eval_loss": 0.32038480043411255,
"eval_precision": 0.5356075697211156,
"eval_recall": 0.6440119760479042,
"eval_runtime": 12.1642,
"eval_samples_per_second": 143.783,
"eval_steps_per_second": 18.004,
"step": 2000
},
{
"epoch": 2.97,
"learning_rate": 1.4068801897983393e-05,
"loss": 0.1783,
"step": 2500
},
{
"epoch": 2.97,
"eval_accuracy": 0.9149343757280799,
"eval_f1": 0.6023742830465519,
"eval_loss": 0.3319493234157562,
"eval_precision": 0.5431801780129901,
"eval_recall": 0.6760479041916168,
"eval_runtime": 11.6403,
"eval_samples_per_second": 150.254,
"eval_steps_per_second": 18.814,
"step": 2500
},
{
"epoch": 3.56,
"learning_rate": 1.2882562277580073e-05,
"loss": 0.1434,
"step": 3000
},
{
"epoch": 3.56,
"eval_accuracy": 0.9202930440860494,
"eval_f1": 0.6036738101864737,
"eval_loss": 0.3370673358440399,
"eval_precision": 0.5639625585023401,
"eval_recall": 0.6494011976047904,
"eval_runtime": 11.5779,
"eval_samples_per_second": 151.064,
"eval_steps_per_second": 18.915,
"step": 3000
},
{
"epoch": 4.15,
"learning_rate": 1.169632265717675e-05,
"loss": 0.1352,
"step": 3500
},
{
"epoch": 4.15,
"eval_accuracy": 0.9135364622433922,
"eval_f1": 0.5807708362320857,
"eval_loss": 0.3826988637447357,
"eval_precision": 0.5425006498570315,
"eval_recall": 0.6248502994011976,
"eval_runtime": 11.732,
"eval_samples_per_second": 149.08,
"eval_steps_per_second": 18.667,
"step": 3500
},
{
"epoch": 4.74,
"learning_rate": 1.0510083036773429e-05,
"loss": 0.1135,
"step": 4000
},
{
"epoch": 4.74,
"eval_accuracy": 0.9135623495301457,
"eval_f1": 0.5979081159804052,
"eval_loss": 0.3862306475639343,
"eval_precision": 0.5359601234274863,
"eval_recall": 0.6760479041916168,
"eval_runtime": 11.4556,
"eval_samples_per_second": 152.676,
"eval_steps_per_second": 19.117,
"step": 4000
},
{
"epoch": 5.34,
"learning_rate": 9.323843416370107e-06,
"loss": 0.0987,
"step": 4500
},
{
"epoch": 5.34,
"eval_accuracy": 0.9141318698387222,
"eval_f1": 0.5920873124147339,
"eval_loss": 0.3977762758731842,
"eval_precision": 0.543859649122807,
"eval_recall": 0.6497005988023952,
"eval_runtime": 11.4436,
"eval_samples_per_second": 152.836,
"eval_steps_per_second": 19.137,
"step": 4500
},
{
"epoch": 5.93,
"learning_rate": 8.137603795966786e-06,
"loss": 0.0942,
"step": 5000
},
{
"epoch": 5.93,
"eval_accuracy": 0.9224934634600948,
"eval_f1": 0.6091399375532217,
"eval_loss": 0.37382081151008606,
"eval_precision": 0.5790609821910415,
"eval_recall": 0.6425149700598802,
"eval_runtime": 12.0507,
"eval_samples_per_second": 145.137,
"eval_steps_per_second": 18.173,
"step": 5000
},
{
"epoch": 6.52,
"learning_rate": 6.951364175563464e-06,
"loss": 0.0746,
"step": 5500
},
{
"epoch": 6.52,
"eval_accuracy": 0.9161251909187398,
"eval_f1": 0.5943422136775611,
"eval_loss": 0.4268680810928345,
"eval_precision": 0.5489599188229325,
"eval_recall": 0.6479041916167665,
"eval_runtime": 11.3716,
"eval_samples_per_second": 153.804,
"eval_steps_per_second": 19.258,
"step": 5500
},
{
"epoch": 7.12,
"learning_rate": 5.765124555160143e-06,
"loss": 0.0727,
"step": 6000
},
{
"epoch": 7.12,
"eval_accuracy": 0.9170571332418649,
"eval_f1": 0.5977203224909646,
"eval_loss": 0.4236016869544983,
"eval_precision": 0.5578619615983393,
"eval_recall": 0.6437125748502994,
"eval_runtime": 11.9963,
"eval_samples_per_second": 145.795,
"eval_steps_per_second": 18.256,
"step": 6000
},
{
"epoch": 7.71,
"learning_rate": 4.5788849347568215e-06,
"loss": 0.0661,
"step": 6500
},
{
"epoch": 7.71,
"eval_accuracy": 0.9200341712185146,
"eval_f1": 0.603626220362622,
"eval_loss": 0.4239303171634674,
"eval_precision": 0.5650130548302872,
"eval_recall": 0.6479041916167665,
"eval_runtime": 12.2361,
"eval_samples_per_second": 142.938,
"eval_steps_per_second": 17.898,
"step": 6500
},
{
"epoch": 8.3,
"learning_rate": 3.3926453143535e-06,
"loss": 0.0578,
"step": 7000
},
{
"epoch": 8.3,
"eval_accuracy": 0.9175231044034274,
"eval_f1": 0.5931846865797223,
"eval_loss": 0.44854551553726196,
"eval_precision": 0.5579002901609074,
"eval_recall": 0.6332335329341318,
"eval_runtime": 12.2264,
"eval_samples_per_second": 143.051,
"eval_steps_per_second": 17.912,
"step": 7000
},
{
"epoch": 8.9,
"learning_rate": 2.2064056939501782e-06,
"loss": 0.0505,
"step": 7500
},
{
"epoch": 8.9,
"eval_accuracy": 0.9162546273525072,
"eval_f1": 0.5922411387105778,
"eval_loss": 0.4552724361419678,
"eval_precision": 0.5546262415054888,
"eval_recall": 0.6353293413173653,
"eval_runtime": 12.0192,
"eval_samples_per_second": 145.517,
"eval_steps_per_second": 18.221,
"step": 7500
},
{
"epoch": 9.49,
"learning_rate": 1.0201660735468566e-06,
"loss": 0.0513,
"step": 8000
},
{
"epoch": 9.49,
"eval_accuracy": 0.9171347951021254,
"eval_f1": 0.597688344241749,
"eval_loss": 0.46287792921066284,
"eval_precision": 0.5587086696172872,
"eval_recall": 0.6425149700598802,
"eval_runtime": 11.7878,
"eval_samples_per_second": 148.374,
"eval_steps_per_second": 18.579,
"step": 8000
},
{
"epoch": 10.0,
"step": 8430,
"total_flos": 2261553362554848.0,
"train_loss": 0.15120792004278688,
"train_runtime": 2180.3385,
"train_samples_per_second": 30.931,
"train_steps_per_second": 3.866
}
],
"logging_steps": 500,
"max_steps": 8430,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 2261553362554848.0,
"trial_name": null,
"trial_params": null
}