robiual-awal's picture
Training in progress, step 200, checkpoint
ebbeec2 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.00823799569564725,
"eval_steps": 50,
"global_step": 200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 4.118997847823624e-05,
"eval_loss": 1.3292065858840942,
"eval_runtime": 269.7834,
"eval_samples_per_second": 37.893,
"eval_steps_per_second": 18.949,
"step": 1
},
{
"epoch": 0.00041189978478236243,
"grad_norm": 1.1009160280227661,
"learning_rate": 0.0002,
"loss": 1.2284,
"step": 10
},
{
"epoch": 0.0008237995695647249,
"grad_norm": 0.706173300743103,
"learning_rate": 0.0002,
"loss": 1.0498,
"step": 20
},
{
"epoch": 0.0012356993543470873,
"grad_norm": 0.7616959810256958,
"learning_rate": 0.0002,
"loss": 1.0403,
"step": 30
},
{
"epoch": 0.0016475991391294497,
"grad_norm": 0.9027200937271118,
"learning_rate": 0.0002,
"loss": 1.0141,
"step": 40
},
{
"epoch": 0.0020594989239118124,
"grad_norm": 0.8844203352928162,
"learning_rate": 0.0002,
"loss": 0.978,
"step": 50
},
{
"epoch": 0.0020594989239118124,
"eval_loss": 0.989996612071991,
"eval_runtime": 269.6524,
"eval_samples_per_second": 37.912,
"eval_steps_per_second": 18.958,
"step": 50
},
{
"epoch": 0.0024713987086941746,
"grad_norm": 0.9188791513442993,
"learning_rate": 0.0002,
"loss": 0.9934,
"step": 60
},
{
"epoch": 0.0028832984934765373,
"grad_norm": 0.7914915680885315,
"learning_rate": 0.0002,
"loss": 0.9846,
"step": 70
},
{
"epoch": 0.0032951982782588995,
"grad_norm": 0.796777069568634,
"learning_rate": 0.0002,
"loss": 0.9583,
"step": 80
},
{
"epoch": 0.003707098063041262,
"grad_norm": 0.7085840702056885,
"learning_rate": 0.0002,
"loss": 0.8841,
"step": 90
},
{
"epoch": 0.004118997847823625,
"grad_norm": 0.766033947467804,
"learning_rate": 0.0002,
"loss": 0.938,
"step": 100
},
{
"epoch": 0.004118997847823625,
"eval_loss": 0.940765917301178,
"eval_runtime": 270.1464,
"eval_samples_per_second": 37.842,
"eval_steps_per_second": 18.923,
"step": 100
},
{
"epoch": 0.004530897632605987,
"grad_norm": 0.8813052177429199,
"learning_rate": 0.0002,
"loss": 0.9171,
"step": 110
},
{
"epoch": 0.004942797417388349,
"grad_norm": 0.8220875263214111,
"learning_rate": 0.0002,
"loss": 0.9411,
"step": 120
},
{
"epoch": 0.005354697202170712,
"grad_norm": 0.7601115107536316,
"learning_rate": 0.0002,
"loss": 0.9324,
"step": 130
},
{
"epoch": 0.0057665969869530745,
"grad_norm": 0.7757460474967957,
"learning_rate": 0.0002,
"loss": 0.9055,
"step": 140
},
{
"epoch": 0.006178496771735437,
"grad_norm": 0.7860616445541382,
"learning_rate": 0.0002,
"loss": 0.8872,
"step": 150
},
{
"epoch": 0.006178496771735437,
"eval_loss": 0.9218949675559998,
"eval_runtime": 270.9263,
"eval_samples_per_second": 37.734,
"eval_steps_per_second": 18.869,
"step": 150
},
{
"epoch": 0.006590396556517799,
"grad_norm": 0.7156699895858765,
"learning_rate": 0.0002,
"loss": 0.8475,
"step": 160
},
{
"epoch": 0.007002296341300162,
"grad_norm": 0.7343199849128723,
"learning_rate": 0.0002,
"loss": 0.9222,
"step": 170
},
{
"epoch": 0.007414196126082524,
"grad_norm": 1.0209944248199463,
"learning_rate": 0.0002,
"loss": 0.8851,
"step": 180
},
{
"epoch": 0.007826095910864887,
"grad_norm": 0.903324544429779,
"learning_rate": 0.0002,
"loss": 0.9177,
"step": 190
},
{
"epoch": 0.00823799569564725,
"grad_norm": 0.8378692269325256,
"learning_rate": 0.0002,
"loss": 0.9269,
"step": 200
},
{
"epoch": 0.00823799569564725,
"eval_loss": 0.9066545367240906,
"eval_runtime": 270.1918,
"eval_samples_per_second": 37.836,
"eval_steps_per_second": 18.92,
"step": 200
}
],
"logging_steps": 10,
"max_steps": 200,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.6400973299712e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}