tutor_mixtral_1000 / trainer_state.json
braunaleMPG's picture
Upload 15 files
813904f verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.032520325203252,
"eval_steps": 500,
"global_step": 500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1,
"grad_norm": 0.7537718524378184,
"learning_rate": 4.998825837977733e-05,
"loss": 1.0335,
"step": 25
},
{
"epoch": 0.2,
"grad_norm": 0.5115893351462029,
"learning_rate": 4.9951068336359185e-05,
"loss": 0.9543,
"step": 50
},
{
"epoch": 0.3,
"grad_norm": 0.3655365544393326,
"learning_rate": 4.9888447388643216e-05,
"loss": 0.889,
"step": 75
},
{
"epoch": 0.41,
"grad_norm": 0.3802482724658219,
"learning_rate": 4.980045936184552e-05,
"loss": 0.8824,
"step": 100
},
{
"epoch": 0.51,
"grad_norm": 0.46128857579583404,
"learning_rate": 4.968719393609757e-05,
"loss": 0.8812,
"step": 125
},
{
"epoch": 0.61,
"grad_norm": 0.4675840689300933,
"learning_rate": 4.954876655504144e-05,
"loss": 0.8626,
"step": 150
},
{
"epoch": 0.71,
"grad_norm": 0.5174033092078555,
"learning_rate": 4.938531830816607e-05,
"loss": 0.8542,
"step": 175
},
{
"epoch": 0.81,
"grad_norm": 0.47966453174679635,
"learning_rate": 4.919701578700444e-05,
"loss": 0.8615,
"step": 200
},
{
"epoch": 0.91,
"grad_norm": 0.5800019356792034,
"learning_rate": 4.898405091533834e-05,
"loss": 0.8198,
"step": 225
},
{
"epoch": 1.02,
"grad_norm": 0.5068384935929343,
"learning_rate": 4.874664075358366e-05,
"loss": 0.835,
"step": 250
},
{
"epoch": 1.12,
"grad_norm": 0.5665554500957887,
"learning_rate": 4.84850272775557e-05,
"loss": 0.833,
"step": 275
},
{
"epoch": 1.22,
"grad_norm": 0.6225574393610873,
"learning_rate": 4.8199477131839854e-05,
"loss": 0.8362,
"step": 300
},
{
"epoch": 1.32,
"grad_norm": 0.5883987854013639,
"learning_rate": 4.789028135801918e-05,
"loss": 0.8315,
"step": 325
},
{
"epoch": 1.42,
"grad_norm": 0.6212622090526995,
"learning_rate": 4.7557755098035814e-05,
"loss": 0.8082,
"step": 350
},
{
"epoch": 1.52,
"grad_norm": 0.6254380356435723,
"learning_rate": 4.720223727298845e-05,
"loss": 0.8112,
"step": 375
},
{
"epoch": 1.63,
"grad_norm": 0.7114667768707209,
"learning_rate": 4.682409023769342e-05,
"loss": 0.8141,
"step": 400
},
{
"epoch": 1.73,
"grad_norm": 0.7156140969579615,
"learning_rate": 4.6423699411361474e-05,
"loss": 0.8214,
"step": 425
},
{
"epoch": 1.83,
"grad_norm": 0.6560300477797654,
"learning_rate": 4.600147288476647e-05,
"loss": 0.819,
"step": 450
},
{
"epoch": 1.93,
"grad_norm": 0.6220749749772762,
"learning_rate": 4.5557841004306625e-05,
"loss": 0.8177,
"step": 475
},
{
"epoch": 2.03,
"grad_norm": 0.7459915153227248,
"learning_rate": 4.509325593338203e-05,
"loss": 0.8207,
"step": 500
}
],
"logging_steps": 25,
"max_steps": 2460,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 152390335463424.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}