|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.00823799569564725, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 4.118997847823624e-05, |
|
"eval_loss": 1.3292065858840942, |
|
"eval_runtime": 269.7834, |
|
"eval_samples_per_second": 37.893, |
|
"eval_steps_per_second": 18.949, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00041189978478236243, |
|
"grad_norm": 1.1009160280227661, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2284, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0008237995695647249, |
|
"grad_norm": 0.706173300743103, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0498, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0012356993543470873, |
|
"grad_norm": 0.7616959810256958, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0403, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0016475991391294497, |
|
"grad_norm": 0.9027200937271118, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0141, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0020594989239118124, |
|
"grad_norm": 0.8844203352928162, |
|
"learning_rate": 0.0002, |
|
"loss": 0.978, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0020594989239118124, |
|
"eval_loss": 0.989996612071991, |
|
"eval_runtime": 269.6524, |
|
"eval_samples_per_second": 37.912, |
|
"eval_steps_per_second": 18.958, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0024713987086941746, |
|
"grad_norm": 0.9188791513442993, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9934, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0028832984934765373, |
|
"grad_norm": 0.7914915680885315, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9846, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0032951982782588995, |
|
"grad_norm": 0.796777069568634, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9583, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.003707098063041262, |
|
"grad_norm": 0.7085840702056885, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8841, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.004118997847823625, |
|
"grad_norm": 0.766033947467804, |
|
"learning_rate": 0.0002, |
|
"loss": 0.938, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.004118997847823625, |
|
"eval_loss": 0.940765917301178, |
|
"eval_runtime": 270.1464, |
|
"eval_samples_per_second": 37.842, |
|
"eval_steps_per_second": 18.923, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.004530897632605987, |
|
"grad_norm": 0.8813052177429199, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9171, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.004942797417388349, |
|
"grad_norm": 0.8220875263214111, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9411, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.005354697202170712, |
|
"grad_norm": 0.7601115107536316, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9324, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.0057665969869530745, |
|
"grad_norm": 0.7757460474967957, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9055, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.006178496771735437, |
|
"grad_norm": 0.7860616445541382, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8872, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.006178496771735437, |
|
"eval_loss": 0.9218949675559998, |
|
"eval_runtime": 270.9263, |
|
"eval_samples_per_second": 37.734, |
|
"eval_steps_per_second": 18.869, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.006590396556517799, |
|
"grad_norm": 0.7156699895858765, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8475, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.007002296341300162, |
|
"grad_norm": 0.7343199849128723, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9222, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.007414196126082524, |
|
"grad_norm": 1.0209944248199463, |
|
"learning_rate": 0.0002, |
|
"loss": 0.8851, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.007826095910864887, |
|
"grad_norm": 0.903324544429779, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9177, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.00823799569564725, |
|
"grad_norm": 0.8378692269325256, |
|
"learning_rate": 0.0002, |
|
"loss": 0.9269, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.00823799569564725, |
|
"eval_loss": 0.9066545367240906, |
|
"eval_runtime": 270.1918, |
|
"eval_samples_per_second": 37.836, |
|
"eval_steps_per_second": 18.92, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.6400973299712e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|