dimasik1987's picture
Training in progress, step 25, checkpoint
48ed5cf verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.01633186346562143,
"eval_steps": 3,
"global_step": 25,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0006532745386248571,
"grad_norm": 0.5963796377182007,
"learning_rate": 5e-05,
"loss": 1.1508,
"step": 1
},
{
"epoch": 0.0006532745386248571,
"eval_loss": 1.2967846393585205,
"eval_runtime": 400.7493,
"eval_samples_per_second": 6.433,
"eval_steps_per_second": 1.609,
"step": 1
},
{
"epoch": 0.0013065490772497142,
"grad_norm": 0.6392633318901062,
"learning_rate": 0.0001,
"loss": 1.2868,
"step": 2
},
{
"epoch": 0.0019598236158745713,
"grad_norm": 0.6073038578033447,
"learning_rate": 9.953429730181653e-05,
"loss": 1.1726,
"step": 3
},
{
"epoch": 0.0019598236158745713,
"eval_loss": 1.2707507610321045,
"eval_runtime": 402.7641,
"eval_samples_per_second": 6.401,
"eval_steps_per_second": 1.601,
"step": 3
},
{
"epoch": 0.0026130981544994283,
"grad_norm": 0.6297516822814941,
"learning_rate": 9.814586436738998e-05,
"loss": 1.2031,
"step": 4
},
{
"epoch": 0.0032663726931242854,
"grad_norm": 0.5256909728050232,
"learning_rate": 9.586056507527266e-05,
"loss": 1.1517,
"step": 5
},
{
"epoch": 0.0039196472317491425,
"grad_norm": 0.5755878686904907,
"learning_rate": 9.272097022732443e-05,
"loss": 1.328,
"step": 6
},
{
"epoch": 0.0039196472317491425,
"eval_loss": 1.2350541353225708,
"eval_runtime": 402.0728,
"eval_samples_per_second": 6.412,
"eval_steps_per_second": 1.604,
"step": 6
},
{
"epoch": 0.004572921770374,
"grad_norm": 0.5528045892715454,
"learning_rate": 8.8785564535221e-05,
"loss": 1.2442,
"step": 7
},
{
"epoch": 0.005226196308998857,
"grad_norm": 0.5592995882034302,
"learning_rate": 8.412765716093272e-05,
"loss": 1.1386,
"step": 8
},
{
"epoch": 0.005879470847623714,
"grad_norm": 0.626818060874939,
"learning_rate": 7.883401610574336e-05,
"loss": 1.3114,
"step": 9
},
{
"epoch": 0.005879470847623714,
"eval_loss": 1.2249033451080322,
"eval_runtime": 402.0303,
"eval_samples_per_second": 6.412,
"eval_steps_per_second": 1.604,
"step": 9
},
{
"epoch": 0.006532745386248571,
"grad_norm": 0.5507422089576721,
"learning_rate": 7.300325188655761e-05,
"loss": 1.1543,
"step": 10
},
{
"epoch": 0.007186019924873428,
"grad_norm": 0.6158396005630493,
"learning_rate": 6.674398060854931e-05,
"loss": 1.2007,
"step": 11
},
{
"epoch": 0.007839294463498285,
"grad_norm": 0.582711935043335,
"learning_rate": 6.01728006526317e-05,
"loss": 1.2875,
"step": 12
},
{
"epoch": 0.007839294463498285,
"eval_loss": 1.2190592288970947,
"eval_runtime": 401.9323,
"eval_samples_per_second": 6.414,
"eval_steps_per_second": 1.605,
"step": 12
},
{
"epoch": 0.008492569002123142,
"grad_norm": 0.7021968960762024,
"learning_rate": 5.341212066823355e-05,
"loss": 1.3059,
"step": 13
},
{
"epoch": 0.009145843540748,
"grad_norm": 0.5167236328125,
"learning_rate": 4.658787933176646e-05,
"loss": 1.1782,
"step": 14
},
{
"epoch": 0.009799118079372856,
"grad_norm": 0.5842359662055969,
"learning_rate": 3.982719934736832e-05,
"loss": 1.2259,
"step": 15
},
{
"epoch": 0.009799118079372856,
"eval_loss": 1.2140125036239624,
"eval_runtime": 402.6801,
"eval_samples_per_second": 6.402,
"eval_steps_per_second": 1.602,
"step": 15
},
{
"epoch": 0.010452392617997713,
"grad_norm": 0.6493592858314514,
"learning_rate": 3.325601939145069e-05,
"loss": 1.1797,
"step": 16
},
{
"epoch": 0.01110566715662257,
"grad_norm": 0.5524556636810303,
"learning_rate": 2.6996748113442394e-05,
"loss": 1.2013,
"step": 17
},
{
"epoch": 0.011758941695247428,
"grad_norm": 0.6769681572914124,
"learning_rate": 2.1165983894256647e-05,
"loss": 1.2394,
"step": 18
},
{
"epoch": 0.011758941695247428,
"eval_loss": 1.2114737033843994,
"eval_runtime": 403.2749,
"eval_samples_per_second": 6.393,
"eval_steps_per_second": 1.599,
"step": 18
},
{
"epoch": 0.012412216233872285,
"grad_norm": 0.5104557871818542,
"learning_rate": 1.5872342839067306e-05,
"loss": 1.2101,
"step": 19
},
{
"epoch": 0.013065490772497142,
"grad_norm": 0.4540850818157196,
"learning_rate": 1.1214435464779006e-05,
"loss": 1.171,
"step": 20
},
{
"epoch": 0.013718765311121999,
"grad_norm": 0.483037531375885,
"learning_rate": 7.2790297726755716e-06,
"loss": 1.1566,
"step": 21
},
{
"epoch": 0.013718765311121999,
"eval_loss": 1.2103384733200073,
"eval_runtime": 402.9848,
"eval_samples_per_second": 6.397,
"eval_steps_per_second": 1.601,
"step": 21
},
{
"epoch": 0.014372039849746856,
"grad_norm": 0.5860415697097778,
"learning_rate": 4.139434924727359e-06,
"loss": 1.2214,
"step": 22
},
{
"epoch": 0.015025314388371713,
"grad_norm": 0.5485131144523621,
"learning_rate": 1.8541356326100433e-06,
"loss": 1.1605,
"step": 23
},
{
"epoch": 0.01567858892699657,
"grad_norm": 0.45531585812568665,
"learning_rate": 4.6570269818346224e-07,
"loss": 1.1747,
"step": 24
},
{
"epoch": 0.01567858892699657,
"eval_loss": 1.2099705934524536,
"eval_runtime": 402.0889,
"eval_samples_per_second": 6.412,
"eval_steps_per_second": 1.604,
"step": 24
},
{
"epoch": 0.01633186346562143,
"grad_norm": 0.5713936686515808,
"learning_rate": 0.0,
"loss": 1.2352,
"step": 25
}
],
"logging_steps": 1,
"max_steps": 25,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 25,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.13314785247232e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}