test / trainer_state.json

Upload 12 files

25b3e5d verified 4 months ago

8.48 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 5000.0,
	"eval_steps": 500,
	"global_step": 5000,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 100.0,
	"grad_norm": 0.0051079667173326015,
	"learning_rate": 0.00019616000000000002,
	"loss": 0.1768,
	"step": 100
	},
	{
	"epoch": 200.0,
	"grad_norm": 0.0013037772150710225,
	"learning_rate": 0.00019216,
	"loss": 0.0,
	"step": 200
	},
	{
	"epoch": 300.0,
	"grad_norm": 0.00030190523830242455,
	"learning_rate": 0.00018816000000000001,
	"loss": 0.0,
	"step": 300
	},
	{
	"epoch": 400.0,
	"grad_norm": 0.00017417919298168272,
	"learning_rate": 0.00018416,
	"loss": 0.0,
	"step": 400
	},
	{
	"epoch": 500.0,
	"grad_norm": 0.00014137968537397683,
	"learning_rate": 0.00018016,
	"loss": 0.0,
	"step": 500
	},
	{
	"epoch": 600.0,
	"grad_norm": 0.0001240275305463001,
	"learning_rate": 0.00017616000000000002,
	"loss": 0.0,
	"step": 600
	},
	{
	"epoch": 700.0,
	"grad_norm": 9.807997412281111e-05,
	"learning_rate": 0.00017216,
	"loss": 0.0,
	"step": 700
	},
	{
	"epoch": 800.0,
	"grad_norm": 6.768624734831974e-05,
	"learning_rate": 0.00016816000000000002,
	"loss": 0.0,
	"step": 800
	},
	{
	"epoch": 900.0,
	"grad_norm": 5.961491842754185e-05,
	"learning_rate": 0.00016416,
	"loss": 0.0,
	"step": 900
	},
	{
	"epoch": 1000.0,
	"grad_norm": 5.017322473577224e-05,
	"learning_rate": 0.00016016,
	"loss": 0.0,
	"step": 1000
	},
	{
	"epoch": 1100.0,
	"grad_norm": 5.257365410216153e-05,
	"learning_rate": 0.00015616000000000002,
	"loss": 0.0,
	"step": 1100
	},
	{
	"epoch": 1200.0,
	"grad_norm": 5.0212354835821316e-05,
	"learning_rate": 0.00015216,
	"loss": 0.0,
	"step": 1200
	},
	{
	"epoch": 1300.0,
	"grad_norm": 0.00011130324128316715,
	"learning_rate": 0.00014816000000000002,
	"loss": 0.0,
	"step": 1300
	},
	{
	"epoch": 1400.0,
	"grad_norm": 3.4537704777903855e-05,
	"learning_rate": 0.00014416,
	"loss": 0.0,
	"step": 1400
	},
	{
	"epoch": 1500.0,
	"grad_norm": 2.7689882699633017e-05,
	"learning_rate": 0.00014016,
	"loss": 0.0,
	"step": 1500
	},
	{
	"epoch": 1600.0,
	"grad_norm": 2.726606180658564e-05,
	"learning_rate": 0.00013616,
	"loss": 0.0,
	"step": 1600
	},
	{
	"epoch": 1700.0,
	"grad_norm": 2.1775686036562547e-05,
	"learning_rate": 0.00013216,
	"loss": 0.0,
	"step": 1700
	},
	{
	"epoch": 1800.0,
	"grad_norm": 2.3525770302512683e-05,
	"learning_rate": 0.00012816000000000002,
	"loss": 0.0,
	"step": 1800
	},
	{
	"epoch": 1900.0,
	"grad_norm": 1.902567055367399e-05,
	"learning_rate": 0.00012416,
	"loss": 0.0,
	"step": 1900
	},
	{
	"epoch": 2000.0,
	"grad_norm": 2.1888447008677758e-05,
	"learning_rate": 0.00012016,
	"loss": 0.0,
	"step": 2000
	},
	{
	"epoch": 2100.0,
	"grad_norm": 1.896571302495431e-05,
	"learning_rate": 0.00011616,
	"loss": 0.0,
	"step": 2100
	},
	{
	"epoch": 2200.0,
	"grad_norm": 1.5480936781386845e-05,
	"learning_rate": 0.00011216,
	"loss": 0.0,
	"step": 2200
	},
	{
	"epoch": 2300.0,
	"grad_norm": 1.3961292097519618e-05,
	"learning_rate": 0.00010816,
	"loss": 0.0,
	"step": 2300
	},
	{
	"epoch": 2400.0,
	"grad_norm": 1.4109475159784779e-05,
	"learning_rate": 0.00010416000000000002,
	"loss": 0.0,
	"step": 2400
	},
	{
	"epoch": 2500.0,
	"grad_norm": 1.2665558642765973e-05,
	"learning_rate": 0.00010016,
	"loss": 0.0,
	"step": 2500
	},
	{
	"epoch": 2600.0,
	"grad_norm": 1.5646817701053806e-05,
	"learning_rate": 9.616e-05,
	"loss": 0.0,
	"step": 2600
	},
	{
	"epoch": 2700.0,
	"grad_norm": 1.2876950677309651e-05,
	"learning_rate": 9.216e-05,
	"loss": 0.0,
	"step": 2700
	},
	{
	"epoch": 2800.0,
	"grad_norm": 1.2121616236981936e-05,
	"learning_rate": 8.816000000000001e-05,
	"loss": 0.0,
	"step": 2800
	},
	{
	"epoch": 2900.0,
	"grad_norm": 1.4524578546115663e-05,
	"learning_rate": 8.416000000000001e-05,
	"loss": 0.0,
	"step": 2900
	},
	{
	"epoch": 3000.0,
	"grad_norm": 1.1223896763112862e-05,
	"learning_rate": 8.016e-05,
	"loss": 0.0,
	"step": 3000
	},
	{
	"epoch": 3100.0,
	"grad_norm": 8.85269673744915e-06,
	"learning_rate": 7.616e-05,
	"loss": 0.0,
	"step": 3100
	},
	{
	"epoch": 3200.0,
	"grad_norm": 1.264509955944959e-05,
	"learning_rate": 7.216e-05,
	"loss": 0.0,
	"step": 3200
	},
	{
	"epoch": 3300.0,
	"grad_norm": 8.284540854219813e-06,
	"learning_rate": 6.816e-05,
	"loss": 0.0,
	"step": 3300
	},
	{
	"epoch": 3400.0,
	"grad_norm": 8.871616046235431e-06,
	"learning_rate": 6.416e-05,
	"loss": 0.0,
	"step": 3400
	},
	{
	"epoch": 3500.0,
	"grad_norm": 9.966872312361374e-06,
	"learning_rate": 6.016000000000001e-05,
	"loss": 0.0,
	"step": 3500
	},
	{
	"epoch": 3600.0,
	"grad_norm": 2.9739601814071648e-05,
	"learning_rate": 5.6160000000000004e-05,
	"loss": 0.0,
	"step": 3600
	},
	{
	"epoch": 3700.0,
	"grad_norm": 7.714033927186392e-06,
	"learning_rate": 5.2159999999999995e-05,
	"loss": 0.0,
	"step": 3700
	},
	{
	"epoch": 3800.0,
	"grad_norm": 1.497406901762588e-05,
	"learning_rate": 4.816e-05,
	"loss": 0.0,
	"step": 3800
	},
	{
	"epoch": 3900.0,
	"grad_norm": 7.307490250241244e-06,
	"learning_rate": 4.4160000000000004e-05,
	"loss": 0.0,
	"step": 3900
	},
	{
	"epoch": 4000.0,
	"grad_norm": 6.682894763798686e-06,
	"learning_rate": 4.016e-05,
	"loss": 0.0,
	"step": 4000
	},
	{
	"epoch": 4100.0,
	"grad_norm": 7.749928954581264e-06,
	"learning_rate": 3.616e-05,
	"loss": 0.0,
	"step": 4100
	},
	{
	"epoch": 4200.0,
	"grad_norm": 1.01770574474358e-05,
	"learning_rate": 3.2160000000000004e-05,
	"loss": 0.0,
	"step": 4200
	},
	{
	"epoch": 4300.0,
	"grad_norm": 6.606936040043365e-06,
	"learning_rate": 2.816e-05,
	"loss": 0.0,
	"step": 4300
	},
	{
	"epoch": 4400.0,
	"grad_norm": 6.749212843715213e-06,
	"learning_rate": 2.4160000000000002e-05,
	"loss": 0.0,
	"step": 4400
	},
	{
	"epoch": 4500.0,
	"grad_norm": 8.575744686822873e-06,
	"learning_rate": 2.016e-05,
	"loss": 0.0,
	"step": 4500
	},
	{
	"epoch": 4600.0,
	"grad_norm": 6.673930329270661e-06,
	"learning_rate": 1.616e-05,
	"loss": 0.0,
	"step": 4600
	},
	{
	"epoch": 4700.0,
	"grad_norm": 6.32612272966071e-06,
	"learning_rate": 1.216e-05,
	"loss": 0.0,
	"step": 4700
	},
	{
	"epoch": 4800.0,
	"grad_norm": 6.985771960899001e-06,
	"learning_rate": 8.160000000000001e-06,
	"loss": 0.0,
	"step": 4800
	},
	{
	"epoch": 4900.0,
	"grad_norm": 5.245818101684563e-06,
	"learning_rate": 4.16e-06,
	"loss": 0.0,
	"step": 4900
	},
	{
	"epoch": 5000.0,
	"grad_norm": 5.854470146005042e-06,
	"learning_rate": 1.6e-07,
	"loss": 0.0,
	"step": 5000
	}
	],
	"logging_steps": 100,
	"max_steps": 5000,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 5000,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 6755965747200000.0,
	"train_batch_size": 1,
	"trial_name": null,
	"trial_params": null
	}