|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 59.67567567567568, |
|
"eval_steps": 276, |
|
"global_step": 2760, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 5.967567567567568, |
|
"grad_norm": 7.085669040679932, |
|
"learning_rate": 9.007246376811595e-06, |
|
"loss": 1.1802, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 5.967567567567568, |
|
"eval_accuracy": 0.09781729991915926, |
|
"eval_loss": 2.998732089996338, |
|
"eval_runtime": 4.6867, |
|
"eval_samples_per_second": 263.941, |
|
"eval_steps_per_second": 13.229, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 11.935135135135136, |
|
"grad_norm": 12.396671295166016, |
|
"learning_rate": 8.007246376811595e-06, |
|
"loss": 0.8645, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 11.935135135135136, |
|
"eval_accuracy": 0.09822150363783347, |
|
"eval_loss": 3.489548683166504, |
|
"eval_runtime": 4.7384, |
|
"eval_samples_per_second": 261.059, |
|
"eval_steps_per_second": 13.085, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 17.9027027027027, |
|
"grad_norm": 13.397128105163574, |
|
"learning_rate": 7.007246376811595e-06, |
|
"loss": 0.6579, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 17.9027027027027, |
|
"eval_accuracy": 0.10158986796011857, |
|
"eval_loss": 3.6031882762908936, |
|
"eval_runtime": 4.7432, |
|
"eval_samples_per_second": 260.795, |
|
"eval_steps_per_second": 13.071, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 23.87027027027027, |
|
"grad_norm": 8.087945938110352, |
|
"learning_rate": 6.007246376811595e-06, |
|
"loss": 0.595, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 23.87027027027027, |
|
"eval_accuracy": 0.09822150363783347, |
|
"eval_loss": 3.6242969036102295, |
|
"eval_runtime": 4.9069, |
|
"eval_samples_per_second": 252.096, |
|
"eval_steps_per_second": 12.635, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 29.83783783783784, |
|
"grad_norm": 4.8504486083984375, |
|
"learning_rate": 5.0072463768115946e-06, |
|
"loss": 0.5518, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 29.83783783783784, |
|
"eval_accuracy": 0.09749393694421989, |
|
"eval_loss": 3.6076433658599854, |
|
"eval_runtime": 4.7024, |
|
"eval_samples_per_second": 263.059, |
|
"eval_steps_per_second": 13.185, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 35.8054054054054, |
|
"grad_norm": 7.603194713592529, |
|
"learning_rate": 4.007246376811594e-06, |
|
"loss": 0.5298, |
|
"step": 1656 |
|
}, |
|
{ |
|
"epoch": 35.8054054054054, |
|
"eval_accuracy": 0.09714362705470224, |
|
"eval_loss": 3.6512610912323, |
|
"eval_runtime": 4.7172, |
|
"eval_samples_per_second": 262.229, |
|
"eval_steps_per_second": 13.143, |
|
"step": 1656 |
|
}, |
|
{ |
|
"epoch": 41.77297297297297, |
|
"grad_norm": 5.10509729385376, |
|
"learning_rate": 3.0072463768115946e-06, |
|
"loss": 0.5155, |
|
"step": 1932 |
|
}, |
|
{ |
|
"epoch": 41.77297297297297, |
|
"eval_accuracy": 0.09700889248181083, |
|
"eval_loss": 3.5870466232299805, |
|
"eval_runtime": 4.6922, |
|
"eval_samples_per_second": 263.627, |
|
"eval_steps_per_second": 13.213, |
|
"step": 1932 |
|
}, |
|
{ |
|
"epoch": 47.74054054054054, |
|
"grad_norm": 2.8337128162384033, |
|
"learning_rate": 2.0072463768115945e-06, |
|
"loss": 0.5058, |
|
"step": 2208 |
|
}, |
|
{ |
|
"epoch": 47.74054054054054, |
|
"eval_accuracy": 0.09660468876313662, |
|
"eval_loss": 3.6554582118988037, |
|
"eval_runtime": 4.7001, |
|
"eval_samples_per_second": 263.184, |
|
"eval_steps_per_second": 13.191, |
|
"step": 2208 |
|
}, |
|
{ |
|
"epoch": 53.70810810810811, |
|
"grad_norm": 4.16039514541626, |
|
"learning_rate": 1.0108695652173914e-06, |
|
"loss": 0.4935, |
|
"step": 2484 |
|
}, |
|
{ |
|
"epoch": 53.70810810810811, |
|
"eval_accuracy": 0.09602083894727387, |
|
"eval_loss": 3.673586130142212, |
|
"eval_runtime": 4.7276, |
|
"eval_samples_per_second": 261.655, |
|
"eval_steps_per_second": 13.114, |
|
"step": 2484 |
|
}, |
|
{ |
|
"epoch": 59.67567567567568, |
|
"grad_norm": 2.723360061645508, |
|
"learning_rate": 1.0869565217391305e-08, |
|
"loss": 0.4928, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 59.67567567567568, |
|
"eval_accuracy": 0.09571544058205335, |
|
"eval_loss": 3.731440544128418, |
|
"eval_runtime": 4.6934, |
|
"eval_samples_per_second": 263.563, |
|
"eval_steps_per_second": 13.21, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 59.67567567567568, |
|
"step": 2760, |
|
"total_flos": 2.1787658809800192e+17, |
|
"train_loss": 0.6386864206065302, |
|
"train_runtime": 4685.4185, |
|
"train_samples_per_second": 118.222, |
|
"train_steps_per_second": 0.589 |
|
} |
|
], |
|
"logging_steps": 276, |
|
"max_steps": 2760, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 60, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.1787658809800192e+17, |
|
"train_batch_size": 25, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|