harithapliyal's picture
Upload 15 files
db5c5f3 verified
raw
history blame
7.32 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 27.149321266968325,
"eval_steps": 1000,
"global_step": 6000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_loss": 1.9201030731201172,
"eval_runtime": 10.5666,
"eval_samples_per_second": 20.915,
"eval_steps_per_second": 5.3,
"step": 221
},
{
"epoch": 2.0,
"eval_loss": 1.8042283058166504,
"eval_runtime": 10.5219,
"eval_samples_per_second": 21.004,
"eval_steps_per_second": 5.322,
"step": 442
},
{
"epoch": 2.26,
"learning_rate": 4.622926093514329e-05,
"loss": 2.1534,
"step": 500
},
{
"epoch": 3.0,
"eval_loss": 1.7254605293273926,
"eval_runtime": 10.5492,
"eval_samples_per_second": 20.95,
"eval_steps_per_second": 5.308,
"step": 663
},
{
"epoch": 4.0,
"eval_loss": 1.653130054473877,
"eval_runtime": 10.5858,
"eval_samples_per_second": 20.877,
"eval_steps_per_second": 5.29,
"step": 884
},
{
"epoch": 4.52,
"learning_rate": 4.2458521870286574e-05,
"loss": 1.8808,
"step": 1000
},
{
"epoch": 5.0,
"eval_loss": 1.5984195470809937,
"eval_runtime": 10.5172,
"eval_samples_per_second": 21.013,
"eval_steps_per_second": 5.325,
"step": 1105
},
{
"epoch": 6.0,
"eval_loss": 1.5462396144866943,
"eval_runtime": 10.5813,
"eval_samples_per_second": 20.886,
"eval_steps_per_second": 5.292,
"step": 1326
},
{
"epoch": 6.79,
"learning_rate": 3.868778280542987e-05,
"loss": 1.7597,
"step": 1500
},
{
"epoch": 7.0,
"eval_loss": 1.4970717430114746,
"eval_runtime": 10.5617,
"eval_samples_per_second": 20.925,
"eval_steps_per_second": 5.302,
"step": 1547
},
{
"epoch": 8.0,
"eval_loss": 1.4608112573623657,
"eval_runtime": 10.5482,
"eval_samples_per_second": 20.952,
"eval_steps_per_second": 5.309,
"step": 1768
},
{
"epoch": 9.0,
"eval_loss": 1.426717758178711,
"eval_runtime": 10.5377,
"eval_samples_per_second": 20.972,
"eval_steps_per_second": 5.314,
"step": 1989
},
{
"epoch": 9.05,
"learning_rate": 3.491704374057315e-05,
"loss": 1.673,
"step": 2000
},
{
"epoch": 10.0,
"eval_loss": 1.3964918851852417,
"eval_runtime": 10.5931,
"eval_samples_per_second": 20.863,
"eval_steps_per_second": 5.286,
"step": 2210
},
{
"epoch": 11.0,
"eval_loss": 1.3659923076629639,
"eval_runtime": 10.5424,
"eval_samples_per_second": 20.963,
"eval_steps_per_second": 5.312,
"step": 2431
},
{
"epoch": 11.31,
"learning_rate": 3.114630467571644e-05,
"loss": 1.6087,
"step": 2500
},
{
"epoch": 12.0,
"eval_loss": 1.3398691415786743,
"eval_runtime": 10.5482,
"eval_samples_per_second": 20.951,
"eval_steps_per_second": 5.309,
"step": 2652
},
{
"epoch": 13.0,
"eval_loss": 1.3123878240585327,
"eval_runtime": 10.4901,
"eval_samples_per_second": 21.067,
"eval_steps_per_second": 5.338,
"step": 2873
},
{
"epoch": 13.57,
"learning_rate": 2.737556561085973e-05,
"loss": 1.5523,
"step": 3000
},
{
"epoch": 14.0,
"eval_loss": 1.2912379503250122,
"eval_runtime": 10.5302,
"eval_samples_per_second": 20.987,
"eval_steps_per_second": 5.318,
"step": 3094
},
{
"epoch": 15.0,
"eval_loss": 1.2690876722335815,
"eval_runtime": 10.5127,
"eval_samples_per_second": 21.022,
"eval_steps_per_second": 5.327,
"step": 3315
},
{
"epoch": 15.84,
"learning_rate": 2.3604826546003017e-05,
"loss": 1.5072,
"step": 3500
},
{
"epoch": 16.0,
"eval_loss": 1.2487969398498535,
"eval_runtime": 10.5677,
"eval_samples_per_second": 20.913,
"eval_steps_per_second": 5.299,
"step": 3536
},
{
"epoch": 17.0,
"eval_loss": 1.2326879501342773,
"eval_runtime": 10.5424,
"eval_samples_per_second": 20.963,
"eval_steps_per_second": 5.312,
"step": 3757
},
{
"epoch": 18.0,
"eval_loss": 1.2160953283309937,
"eval_runtime": 10.5427,
"eval_samples_per_second": 20.962,
"eval_steps_per_second": 5.312,
"step": 3978
},
{
"epoch": 18.1,
"learning_rate": 1.9834087481146303e-05,
"loss": 1.4711,
"step": 4000
},
{
"epoch": 19.0,
"eval_loss": 1.2029471397399902,
"eval_runtime": 10.5308,
"eval_samples_per_second": 20.986,
"eval_steps_per_second": 5.318,
"step": 4199
},
{
"epoch": 20.0,
"eval_loss": 1.1921287775039673,
"eval_runtime": 10.5502,
"eval_samples_per_second": 20.948,
"eval_steps_per_second": 5.308,
"step": 4420
},
{
"epoch": 20.36,
"learning_rate": 1.6063348416289596e-05,
"loss": 1.4329,
"step": 4500
},
{
"epoch": 21.0,
"eval_loss": 1.1807990074157715,
"eval_runtime": 10.5791,
"eval_samples_per_second": 20.89,
"eval_steps_per_second": 5.293,
"step": 4641
},
{
"epoch": 22.0,
"eval_loss": 1.170788288116455,
"eval_runtime": 10.5201,
"eval_samples_per_second": 21.007,
"eval_steps_per_second": 5.323,
"step": 4862
},
{
"epoch": 22.62,
"learning_rate": 1.229260935143288e-05,
"loss": 1.4091,
"step": 5000
},
{
"epoch": 23.0,
"eval_loss": 1.1616638898849487,
"eval_runtime": 10.541,
"eval_samples_per_second": 20.966,
"eval_steps_per_second": 5.313,
"step": 5083
},
{
"epoch": 24.0,
"eval_loss": 1.1520771980285645,
"eval_runtime": 10.5678,
"eval_samples_per_second": 20.913,
"eval_steps_per_second": 5.299,
"step": 5304
},
{
"epoch": 24.89,
"learning_rate": 8.52187028657617e-06,
"loss": 1.392,
"step": 5500
},
{
"epoch": 25.0,
"eval_loss": 1.1455986499786377,
"eval_runtime": 10.5773,
"eval_samples_per_second": 20.894,
"eval_steps_per_second": 5.294,
"step": 5525
},
{
"epoch": 26.0,
"eval_loss": 1.1402668952941895,
"eval_runtime": 10.5575,
"eval_samples_per_second": 20.933,
"eval_steps_per_second": 5.304,
"step": 5746
},
{
"epoch": 27.0,
"eval_loss": 1.1367387771606445,
"eval_runtime": 10.5633,
"eval_samples_per_second": 20.921,
"eval_steps_per_second": 5.301,
"step": 5967
},
{
"epoch": 27.15,
"learning_rate": 4.751131221719457e-06,
"loss": 1.3771,
"step": 6000
}
],
"logging_steps": 500,
"max_steps": 6630,
"num_train_epochs": 30,
"save_steps": 1000,
"total_flos": 9484103172096000.0,
"trial_name": null,
"trial_params": null
}