braunaleMPG
/

tutor_mixtral_1000

Model card Files Files and versions Community

tutor_mixtral_1000 / trainer_state.json

braunaleMPG's picture

Upload 15 files

813904f verified 7 months ago

history blame contribute delete

3.66 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 2.032520325203252,
	"eval_steps": 500,
	"global_step": 500,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.1,
	"grad_norm": 0.7537718524378184,
	"learning_rate": 4.998825837977733e-05,
	"loss": 1.0335,
	"step": 25
	},
	{
	"epoch": 0.2,
	"grad_norm": 0.5115893351462029,
	"learning_rate": 4.9951068336359185e-05,
	"loss": 0.9543,
	"step": 50
	},
	{
	"epoch": 0.3,
	"grad_norm": 0.3655365544393326,
	"learning_rate": 4.9888447388643216e-05,
	"loss": 0.889,
	"step": 75
	},
	{
	"epoch": 0.41,
	"grad_norm": 0.3802482724658219,
	"learning_rate": 4.980045936184552e-05,
	"loss": 0.8824,
	"step": 100
	},
	{
	"epoch": 0.51,
	"grad_norm": 0.46128857579583404,
	"learning_rate": 4.968719393609757e-05,
	"loss": 0.8812,
	"step": 125
	},
	{
	"epoch": 0.61,
	"grad_norm": 0.4675840689300933,
	"learning_rate": 4.954876655504144e-05,
	"loss": 0.8626,
	"step": 150
	},
	{
	"epoch": 0.71,
	"grad_norm": 0.5174033092078555,
	"learning_rate": 4.938531830816607e-05,
	"loss": 0.8542,
	"step": 175
	},
	{
	"epoch": 0.81,
	"grad_norm": 0.47966453174679635,
	"learning_rate": 4.919701578700444e-05,
	"loss": 0.8615,
	"step": 200
	},
	{
	"epoch": 0.91,
	"grad_norm": 0.5800019356792034,
	"learning_rate": 4.898405091533834e-05,
	"loss": 0.8198,
	"step": 225
	},
	{
	"epoch": 1.02,
	"grad_norm": 0.5068384935929343,
	"learning_rate": 4.874664075358366e-05,
	"loss": 0.835,
	"step": 250
	},
	{
	"epoch": 1.12,
	"grad_norm": 0.5665554500957887,
	"learning_rate": 4.84850272775557e-05,
	"loss": 0.833,
	"step": 275
	},
	{
	"epoch": 1.22,
	"grad_norm": 0.6225574393610873,
	"learning_rate": 4.8199477131839854e-05,
	"loss": 0.8362,
	"step": 300
	},
	{
	"epoch": 1.32,
	"grad_norm": 0.5883987854013639,
	"learning_rate": 4.789028135801918e-05,
	"loss": 0.8315,
	"step": 325
	},
	{
	"epoch": 1.42,
	"grad_norm": 0.6212622090526995,
	"learning_rate": 4.7557755098035814e-05,
	"loss": 0.8082,
	"step": 350
	},
	{
	"epoch": 1.52,
	"grad_norm": 0.6254380356435723,
	"learning_rate": 4.720223727298845e-05,
	"loss": 0.8112,
	"step": 375
	},
	{
	"epoch": 1.63,
	"grad_norm": 0.7114667768707209,
	"learning_rate": 4.682409023769342e-05,
	"loss": 0.8141,
	"step": 400
	},
	{
	"epoch": 1.73,
	"grad_norm": 0.7156140969579615,
	"learning_rate": 4.6423699411361474e-05,
	"loss": 0.8214,
	"step": 425
	},
	{
	"epoch": 1.83,
	"grad_norm": 0.6560300477797654,
	"learning_rate": 4.600147288476647e-05,
	"loss": 0.819,
	"step": 450
	},
	{
	"epoch": 1.93,
	"grad_norm": 0.6220749749772762,
	"learning_rate": 4.5557841004306625e-05,
	"loss": 0.8177,
	"step": 475
	},
	{
	"epoch": 2.03,
	"grad_norm": 0.7459915153227248,
	"learning_rate": 4.509325593338203e-05,
	"loss": 0.8207,
	"step": 500
	}
	],
	"logging_steps": 25,
	"max_steps": 2460,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 10,
	"save_steps": 500,
	"total_flos": 152390335463424.0,
	"train_batch_size": 1,
	"trial_name": null,
	"trial_params": null
	}