squad_albert_large_expert_finetuned / trainer_state.json

End of training

06909ca verified 12 days ago

4.83 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 4.0,
	"eval_steps": 500,
	"global_step": 11080,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.18050541516245489,
	"grad_norm": 31.555688858032227,
	"learning_rate": 2.864620938628159e-05,
	"loss": 1.3005,
	"step": 500
	},
	{
	"epoch": 0.36101083032490977,
	"grad_norm": 20.2050838470459,
	"learning_rate": 2.729241877256318e-05,
	"loss": 0.9978,
	"step": 1000
	},
	{
	"epoch": 0.5415162454873647,
	"grad_norm": 25.661306381225586,
	"learning_rate": 2.5938628158844765e-05,
	"loss": 0.9259,
	"step": 1500
	},
	{
	"epoch": 0.7220216606498195,
	"grad_norm": 22.348859786987305,
	"learning_rate": 2.4584837545126353e-05,
	"loss": 0.8798,
	"step": 2000
	},
	{
	"epoch": 0.9025270758122743,
	"grad_norm": 32.197166442871094,
	"learning_rate": 2.3231046931407943e-05,
	"loss": 0.8471,
	"step": 2500
	},
	{
	"epoch": 1.0830324909747293,
	"grad_norm": 20.764020919799805,
	"learning_rate": 2.1877256317689534e-05,
	"loss": 0.771,
	"step": 3000
	},
	{
	"epoch": 1.263537906137184,
	"grad_norm": 15.258258819580078,
	"learning_rate": 2.0523465703971117e-05,
	"loss": 0.6937,
	"step": 3500
	},
	{
	"epoch": 1.444043321299639,
	"grad_norm": 24.816614151000977,
	"learning_rate": 1.9169675090252708e-05,
	"loss": 0.709,
	"step": 4000
	},
	{
	"epoch": 1.6245487364620939,
	"grad_norm": 38.912071228027344,
	"learning_rate": 1.7815884476534298e-05,
	"loss": 0.6831,
	"step": 4500
	},
	{
	"epoch": 1.8050541516245486,
	"grad_norm": 8.910807609558105,
	"learning_rate": 1.6462093862815885e-05,
	"loss": 0.6742,
	"step": 5000
	},
	{
	"epoch": 1.9855595667870036,
	"grad_norm": 11.664189338684082,
	"learning_rate": 1.5108303249097474e-05,
	"loss": 0.69,
	"step": 5500
	},
	{
	"epoch": 2.1660649819494586,
	"grad_norm": 10.968308448791504,
	"learning_rate": 1.3754512635379063e-05,
	"loss": 0.5436,
	"step": 6000
	},
	{
	"epoch": 2.3465703971119134,
	"grad_norm": 11.711438179016113,
	"learning_rate": 1.240072202166065e-05,
	"loss": 0.5357,
	"step": 6500
	},
	{
	"epoch": 2.527075812274368,
	"grad_norm": 13.477335929870605,
	"learning_rate": 1.1046931407942239e-05,
	"loss": 0.5359,
	"step": 7000
	},
	{
	"epoch": 2.707581227436823,
	"grad_norm": 10.649256706237793,
	"learning_rate": 9.693140794223826e-06,
	"loss": 0.5394,
	"step": 7500
	},
	{
	"epoch": 2.888086642599278,
	"grad_norm": 10.525208473205566,
	"learning_rate": 8.339350180505416e-06,
	"loss": 0.5254,
	"step": 8000
	},
	{
	"epoch": 3.068592057761733,
	"grad_norm": 19.402320861816406,
	"learning_rate": 6.985559566787004e-06,
	"loss": 0.4775,
	"step": 8500
	},
	{
	"epoch": 3.2490974729241877,
	"grad_norm": 41.23615646362305,
	"learning_rate": 5.631768953068592e-06,
	"loss": 0.4003,
	"step": 9000
	},
	{
	"epoch": 3.4296028880866425,
	"grad_norm": 21.56231689453125,
	"learning_rate": 4.277978339350181e-06,
	"loss": 0.3952,
	"step": 9500
	},
	{
	"epoch": 3.6101083032490973,
	"grad_norm": 11.254490852355957,
	"learning_rate": 2.924187725631769e-06,
	"loss": 0.4007,
	"step": 10000
	},
	{
	"epoch": 3.7906137184115525,
	"grad_norm": 29.451414108276367,
	"learning_rate": 1.5703971119133576e-06,
	"loss": 0.3962,
	"step": 10500
	},
	{
	"epoch": 3.9711191335740073,
	"grad_norm": 16.022735595703125,
	"learning_rate": 2.1660649819494586e-07,
	"loss": 0.3853,
	"step": 11000
	},
	{
	"epoch": 4.0,
	"step": 11080,
	"total_flos": 1.0399493167607808e+16,
	"train_loss": 0.6484355885199261,
	"train_runtime": 8433.1347,
	"train_samples_per_second": 42.043,
	"train_steps_per_second": 1.314
	}
	],
	"logging_steps": 500,
	"max_steps": 11080,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 4,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 1.0399493167607808e+16,
	"train_batch_size": 32,
	"trial_name": null,
	"trial_params": null
	}