mrm8488's picture
Initial commit from mrm8488
3f15d18
raw
history blame
7.18 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.4471057884231537,
"global_step": 28000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 4.993346640053227e-05,
"loss": 2.0933,
"step": 500
},
{
"epoch": 0.02,
"learning_rate": 4.986693280106454e-05,
"loss": 1.7924,
"step": 1000
},
{
"epoch": 0.02,
"learning_rate": 4.980039920159681e-05,
"loss": 1.0278,
"step": 1500
},
{
"epoch": 0.03,
"learning_rate": 4.973386560212908e-05,
"loss": 1.0513,
"step": 2000
},
{
"epoch": 0.04,
"learning_rate": 4.966733200266135e-05,
"loss": 0.9437,
"step": 2500
},
{
"epoch": 0.05,
"learning_rate": 4.9600798403193615e-05,
"loss": 0.9916,
"step": 3000
},
{
"epoch": 0.06,
"learning_rate": 4.953426480372588e-05,
"loss": 0.926,
"step": 3500
},
{
"epoch": 0.06,
"learning_rate": 4.946773120425816e-05,
"loss": 0.9047,
"step": 4000
},
{
"epoch": 0.07,
"learning_rate": 4.9401197604790424e-05,
"loss": 0.8648,
"step": 4500
},
{
"epoch": 0.08,
"learning_rate": 4.933466400532269e-05,
"loss": 0.8708,
"step": 5000
},
{
"epoch": 0.09,
"learning_rate": 4.9268130405854955e-05,
"loss": 0.856,
"step": 5500
},
{
"epoch": 0.1,
"learning_rate": 4.920159680638723e-05,
"loss": 0.8403,
"step": 6000
},
{
"epoch": 0.1,
"learning_rate": 4.91350632069195e-05,
"loss": 0.8486,
"step": 6500
},
{
"epoch": 0.11,
"learning_rate": 4.9068529607451764e-05,
"loss": 0.8463,
"step": 7000
},
{
"epoch": 0.12,
"learning_rate": 4.9001996007984036e-05,
"loss": 0.8168,
"step": 7500
},
{
"epoch": 0.13,
"learning_rate": 4.89354624085163e-05,
"loss": 0.8255,
"step": 8000
},
{
"epoch": 0.14,
"learning_rate": 4.886892880904857e-05,
"loss": 0.8256,
"step": 8500
},
{
"epoch": 0.14,
"learning_rate": 4.8802395209580846e-05,
"loss": 0.8397,
"step": 9000
},
{
"epoch": 0.15,
"learning_rate": 4.873586161011311e-05,
"loss": 0.8315,
"step": 9500
},
{
"epoch": 0.16,
"learning_rate": 4.8669328010645376e-05,
"loss": 0.8033,
"step": 10000
},
{
"epoch": 0.17,
"learning_rate": 4.860279441117765e-05,
"loss": 0.8076,
"step": 10500
},
{
"epoch": 0.18,
"learning_rate": 4.8536260811709914e-05,
"loss": 0.8126,
"step": 11000
},
{
"epoch": 0.18,
"learning_rate": 4.8469727212242186e-05,
"loss": 0.7916,
"step": 11500
},
{
"epoch": 0.19,
"learning_rate": 4.840319361277446e-05,
"loss": 0.8119,
"step": 12000
},
{
"epoch": 0.2,
"learning_rate": 4.833666001330672e-05,
"loss": 0.8081,
"step": 12500
},
{
"epoch": 0.21,
"learning_rate": 4.827012641383899e-05,
"loss": 0.7944,
"step": 13000
},
{
"epoch": 0.22,
"learning_rate": 4.820359281437126e-05,
"loss": 0.7926,
"step": 13500
},
{
"epoch": 0.22,
"learning_rate": 4.813705921490353e-05,
"loss": 0.7993,
"step": 14000
},
{
"epoch": 0.23,
"learning_rate": 4.80705256154358e-05,
"loss": 0.785,
"step": 14500
},
{
"epoch": 0.24,
"learning_rate": 4.800399201596807e-05,
"loss": 0.7799,
"step": 15000
},
{
"epoch": 0.25,
"learning_rate": 4.7937458416500335e-05,
"loss": 0.7773,
"step": 15500
},
{
"epoch": 0.26,
"learning_rate": 4.78709248170326e-05,
"loss": 0.7806,
"step": 16000
},
{
"epoch": 0.26,
"learning_rate": 4.780439121756487e-05,
"loss": 0.7824,
"step": 16500
},
{
"epoch": 0.27,
"learning_rate": 4.7737857618097145e-05,
"loss": 0.7609,
"step": 17000
},
{
"epoch": 0.28,
"learning_rate": 4.767132401862941e-05,
"loss": 0.7855,
"step": 17500
},
{
"epoch": 0.29,
"learning_rate": 4.7604790419161675e-05,
"loss": 0.7805,
"step": 18000
},
{
"epoch": 0.3,
"learning_rate": 4.753825681969395e-05,
"loss": 0.7591,
"step": 18500
},
{
"epoch": 0.3,
"learning_rate": 4.747172322022622e-05,
"loss": 0.7765,
"step": 19000
},
{
"epoch": 0.31,
"learning_rate": 4.7405189620758485e-05,
"loss": 0.7643,
"step": 19500
},
{
"epoch": 0.32,
"learning_rate": 4.733865602129076e-05,
"loss": 0.767,
"step": 20000
},
{
"epoch": 0.33,
"learning_rate": 4.727212242182302e-05,
"loss": 0.7648,
"step": 20500
},
{
"epoch": 0.34,
"learning_rate": 4.720558882235529e-05,
"loss": 0.7711,
"step": 21000
},
{
"epoch": 0.34,
"learning_rate": 4.713905522288756e-05,
"loss": 0.7772,
"step": 21500
},
{
"epoch": 0.35,
"learning_rate": 4.707252162341983e-05,
"loss": 0.7644,
"step": 22000
},
{
"epoch": 0.36,
"learning_rate": 4.70059880239521e-05,
"loss": 0.767,
"step": 22500
},
{
"epoch": 0.37,
"learning_rate": 4.693945442448437e-05,
"loss": 0.7471,
"step": 23000
},
{
"epoch": 0.38,
"learning_rate": 4.6872920825016634e-05,
"loss": 0.7594,
"step": 23500
},
{
"epoch": 0.38,
"learning_rate": 4.6806387225548906e-05,
"loss": 0.7496,
"step": 24000
},
{
"epoch": 0.39,
"learning_rate": 4.673985362608118e-05,
"loss": 0.7666,
"step": 24500
},
{
"epoch": 0.4,
"learning_rate": 4.6673320026613444e-05,
"loss": 0.7476,
"step": 25000
},
{
"epoch": 0.41,
"learning_rate": 4.660678642714571e-05,
"loss": 0.7393,
"step": 25500
},
{
"epoch": 0.42,
"learning_rate": 4.6540252827677974e-05,
"loss": 0.7488,
"step": 26000
},
{
"epoch": 0.42,
"learning_rate": 4.6473719228210246e-05,
"loss": 0.7408,
"step": 26500
},
{
"epoch": 0.43,
"learning_rate": 4.640718562874252e-05,
"loss": 0.7609,
"step": 27000
},
{
"epoch": 0.44,
"learning_rate": 4.6340652029274784e-05,
"loss": 0.7461,
"step": 27500
},
{
"epoch": 0.45,
"learning_rate": 4.6274118429807056e-05,
"loss": 0.7369,
"step": 28000
}
],
"max_steps": 375750,
"num_train_epochs": 6,
"total_flos": 51286279954268160,
"trial_name": null,
"trial_params": null
}