test / trainer_state.json
jjinkoo's picture
Upload 12 files
25b3e5d verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5000.0,
"eval_steps": 500,
"global_step": 5000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 100.0,
"grad_norm": 0.0051079667173326015,
"learning_rate": 0.00019616000000000002,
"loss": 0.1768,
"step": 100
},
{
"epoch": 200.0,
"grad_norm": 0.0013037772150710225,
"learning_rate": 0.00019216,
"loss": 0.0,
"step": 200
},
{
"epoch": 300.0,
"grad_norm": 0.00030190523830242455,
"learning_rate": 0.00018816000000000001,
"loss": 0.0,
"step": 300
},
{
"epoch": 400.0,
"grad_norm": 0.00017417919298168272,
"learning_rate": 0.00018416,
"loss": 0.0,
"step": 400
},
{
"epoch": 500.0,
"grad_norm": 0.00014137968537397683,
"learning_rate": 0.00018016,
"loss": 0.0,
"step": 500
},
{
"epoch": 600.0,
"grad_norm": 0.0001240275305463001,
"learning_rate": 0.00017616000000000002,
"loss": 0.0,
"step": 600
},
{
"epoch": 700.0,
"grad_norm": 9.807997412281111e-05,
"learning_rate": 0.00017216,
"loss": 0.0,
"step": 700
},
{
"epoch": 800.0,
"grad_norm": 6.768624734831974e-05,
"learning_rate": 0.00016816000000000002,
"loss": 0.0,
"step": 800
},
{
"epoch": 900.0,
"grad_norm": 5.961491842754185e-05,
"learning_rate": 0.00016416,
"loss": 0.0,
"step": 900
},
{
"epoch": 1000.0,
"grad_norm": 5.017322473577224e-05,
"learning_rate": 0.00016016,
"loss": 0.0,
"step": 1000
},
{
"epoch": 1100.0,
"grad_norm": 5.257365410216153e-05,
"learning_rate": 0.00015616000000000002,
"loss": 0.0,
"step": 1100
},
{
"epoch": 1200.0,
"grad_norm": 5.0212354835821316e-05,
"learning_rate": 0.00015216,
"loss": 0.0,
"step": 1200
},
{
"epoch": 1300.0,
"grad_norm": 0.00011130324128316715,
"learning_rate": 0.00014816000000000002,
"loss": 0.0,
"step": 1300
},
{
"epoch": 1400.0,
"grad_norm": 3.4537704777903855e-05,
"learning_rate": 0.00014416,
"loss": 0.0,
"step": 1400
},
{
"epoch": 1500.0,
"grad_norm": 2.7689882699633017e-05,
"learning_rate": 0.00014016,
"loss": 0.0,
"step": 1500
},
{
"epoch": 1600.0,
"grad_norm": 2.726606180658564e-05,
"learning_rate": 0.00013616,
"loss": 0.0,
"step": 1600
},
{
"epoch": 1700.0,
"grad_norm": 2.1775686036562547e-05,
"learning_rate": 0.00013216,
"loss": 0.0,
"step": 1700
},
{
"epoch": 1800.0,
"grad_norm": 2.3525770302512683e-05,
"learning_rate": 0.00012816000000000002,
"loss": 0.0,
"step": 1800
},
{
"epoch": 1900.0,
"grad_norm": 1.902567055367399e-05,
"learning_rate": 0.00012416,
"loss": 0.0,
"step": 1900
},
{
"epoch": 2000.0,
"grad_norm": 2.1888447008677758e-05,
"learning_rate": 0.00012016,
"loss": 0.0,
"step": 2000
},
{
"epoch": 2100.0,
"grad_norm": 1.896571302495431e-05,
"learning_rate": 0.00011616,
"loss": 0.0,
"step": 2100
},
{
"epoch": 2200.0,
"grad_norm": 1.5480936781386845e-05,
"learning_rate": 0.00011216,
"loss": 0.0,
"step": 2200
},
{
"epoch": 2300.0,
"grad_norm": 1.3961292097519618e-05,
"learning_rate": 0.00010816,
"loss": 0.0,
"step": 2300
},
{
"epoch": 2400.0,
"grad_norm": 1.4109475159784779e-05,
"learning_rate": 0.00010416000000000002,
"loss": 0.0,
"step": 2400
},
{
"epoch": 2500.0,
"grad_norm": 1.2665558642765973e-05,
"learning_rate": 0.00010016,
"loss": 0.0,
"step": 2500
},
{
"epoch": 2600.0,
"grad_norm": 1.5646817701053806e-05,
"learning_rate": 9.616e-05,
"loss": 0.0,
"step": 2600
},
{
"epoch": 2700.0,
"grad_norm": 1.2876950677309651e-05,
"learning_rate": 9.216e-05,
"loss": 0.0,
"step": 2700
},
{
"epoch": 2800.0,
"grad_norm": 1.2121616236981936e-05,
"learning_rate": 8.816000000000001e-05,
"loss": 0.0,
"step": 2800
},
{
"epoch": 2900.0,
"grad_norm": 1.4524578546115663e-05,
"learning_rate": 8.416000000000001e-05,
"loss": 0.0,
"step": 2900
},
{
"epoch": 3000.0,
"grad_norm": 1.1223896763112862e-05,
"learning_rate": 8.016e-05,
"loss": 0.0,
"step": 3000
},
{
"epoch": 3100.0,
"grad_norm": 8.85269673744915e-06,
"learning_rate": 7.616e-05,
"loss": 0.0,
"step": 3100
},
{
"epoch": 3200.0,
"grad_norm": 1.264509955944959e-05,
"learning_rate": 7.216e-05,
"loss": 0.0,
"step": 3200
},
{
"epoch": 3300.0,
"grad_norm": 8.284540854219813e-06,
"learning_rate": 6.816e-05,
"loss": 0.0,
"step": 3300
},
{
"epoch": 3400.0,
"grad_norm": 8.871616046235431e-06,
"learning_rate": 6.416e-05,
"loss": 0.0,
"step": 3400
},
{
"epoch": 3500.0,
"grad_norm": 9.966872312361374e-06,
"learning_rate": 6.016000000000001e-05,
"loss": 0.0,
"step": 3500
},
{
"epoch": 3600.0,
"grad_norm": 2.9739601814071648e-05,
"learning_rate": 5.6160000000000004e-05,
"loss": 0.0,
"step": 3600
},
{
"epoch": 3700.0,
"grad_norm": 7.714033927186392e-06,
"learning_rate": 5.2159999999999995e-05,
"loss": 0.0,
"step": 3700
},
{
"epoch": 3800.0,
"grad_norm": 1.497406901762588e-05,
"learning_rate": 4.816e-05,
"loss": 0.0,
"step": 3800
},
{
"epoch": 3900.0,
"grad_norm": 7.307490250241244e-06,
"learning_rate": 4.4160000000000004e-05,
"loss": 0.0,
"step": 3900
},
{
"epoch": 4000.0,
"grad_norm": 6.682894763798686e-06,
"learning_rate": 4.016e-05,
"loss": 0.0,
"step": 4000
},
{
"epoch": 4100.0,
"grad_norm": 7.749928954581264e-06,
"learning_rate": 3.616e-05,
"loss": 0.0,
"step": 4100
},
{
"epoch": 4200.0,
"grad_norm": 1.01770574474358e-05,
"learning_rate": 3.2160000000000004e-05,
"loss": 0.0,
"step": 4200
},
{
"epoch": 4300.0,
"grad_norm": 6.606936040043365e-06,
"learning_rate": 2.816e-05,
"loss": 0.0,
"step": 4300
},
{
"epoch": 4400.0,
"grad_norm": 6.749212843715213e-06,
"learning_rate": 2.4160000000000002e-05,
"loss": 0.0,
"step": 4400
},
{
"epoch": 4500.0,
"grad_norm": 8.575744686822873e-06,
"learning_rate": 2.016e-05,
"loss": 0.0,
"step": 4500
},
{
"epoch": 4600.0,
"grad_norm": 6.673930329270661e-06,
"learning_rate": 1.616e-05,
"loss": 0.0,
"step": 4600
},
{
"epoch": 4700.0,
"grad_norm": 6.32612272966071e-06,
"learning_rate": 1.216e-05,
"loss": 0.0,
"step": 4700
},
{
"epoch": 4800.0,
"grad_norm": 6.985771960899001e-06,
"learning_rate": 8.160000000000001e-06,
"loss": 0.0,
"step": 4800
},
{
"epoch": 4900.0,
"grad_norm": 5.245818101684563e-06,
"learning_rate": 4.16e-06,
"loss": 0.0,
"step": 4900
},
{
"epoch": 5000.0,
"grad_norm": 5.854470146005042e-06,
"learning_rate": 1.6e-07,
"loss": 0.0,
"step": 5000
}
],
"logging_steps": 100,
"max_steps": 5000,
"num_input_tokens_seen": 0,
"num_train_epochs": 5000,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 6755965747200000.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}