|
{ |
|
"best_metric": 2.364795446395874, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-25", |
|
"epoch": 0.4132231404958678, |
|
"eval_steps": 25, |
|
"global_step": 25, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01652892561983471, |
|
"grad_norm": 8.88481330871582, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 4.7285, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01652892561983471, |
|
"eval_loss": 5.146731376647949, |
|
"eval_runtime": 4.3989, |
|
"eval_samples_per_second": 5.911, |
|
"eval_steps_per_second": 2.955, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03305785123966942, |
|
"grad_norm": 11.944966316223145, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 5.53, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.049586776859504134, |
|
"grad_norm": 8.875107765197754, |
|
"learning_rate": 0.0001, |
|
"loss": 4.5313, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.06611570247933884, |
|
"grad_norm": 9.8264741897583, |
|
"learning_rate": 9.992667069255619e-05, |
|
"loss": 4.5137, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.08264462809917356, |
|
"grad_norm": 10.575345039367676, |
|
"learning_rate": 9.970689785771798e-05, |
|
"loss": 4.0278, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.09917355371900827, |
|
"grad_norm": 12.0147123336792, |
|
"learning_rate": 9.934132612707632e-05, |
|
"loss": 3.6279, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.11570247933884298, |
|
"grad_norm": 9.258858680725098, |
|
"learning_rate": 9.883102778550434e-05, |
|
"loss": 3.2625, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.1322314049586777, |
|
"grad_norm": 8.09635066986084, |
|
"learning_rate": 9.817749962596115e-05, |
|
"loss": 2.8438, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.1487603305785124, |
|
"grad_norm": 9.932831764221191, |
|
"learning_rate": 9.738265855914013e-05, |
|
"loss": 2.8447, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.1652892561983471, |
|
"grad_norm": 10.23928451538086, |
|
"learning_rate": 9.644883599083958e-05, |
|
"loss": 2.7394, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.18181818181818182, |
|
"grad_norm": 9.848809242248535, |
|
"learning_rate": 9.537877098354786e-05, |
|
"loss": 2.5748, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.19834710743801653, |
|
"grad_norm": 8.457206726074219, |
|
"learning_rate": 9.417560222230115e-05, |
|
"loss": 3.1279, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.21487603305785125, |
|
"grad_norm": 9.018540382385254, |
|
"learning_rate": 9.284285880837946e-05, |
|
"loss": 2.6837, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.23140495867768596, |
|
"grad_norm": 9.167037010192871, |
|
"learning_rate": 9.138444990784453e-05, |
|
"loss": 2.5647, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.24793388429752067, |
|
"grad_norm": 12.68354606628418, |
|
"learning_rate": 8.980465328528219e-05, |
|
"loss": 2.9151, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.2644628099173554, |
|
"grad_norm": 8.708942413330078, |
|
"learning_rate": 8.810810275638183e-05, |
|
"loss": 3.2871, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.2809917355371901, |
|
"grad_norm": 7.528436660766602, |
|
"learning_rate": 8.629977459615655e-05, |
|
"loss": 3.1734, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.2975206611570248, |
|
"grad_norm": 8.341012001037598, |
|
"learning_rate": 8.438497294267117e-05, |
|
"loss": 2.3823, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.3140495867768595, |
|
"grad_norm": 8.260071754455566, |
|
"learning_rate": 8.236931423909138e-05, |
|
"loss": 2.3359, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.3305785123966942, |
|
"grad_norm": 9.664773941040039, |
|
"learning_rate": 8.025871075968828e-05, |
|
"loss": 2.2876, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.34710743801652894, |
|
"grad_norm": 9.267929077148438, |
|
"learning_rate": 7.805935326811912e-05, |
|
"loss": 2.4783, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.36363636363636365, |
|
"grad_norm": 9.94819450378418, |
|
"learning_rate": 7.577769285885109e-05, |
|
"loss": 3.06, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.38016528925619836, |
|
"grad_norm": 6.440147876739502, |
|
"learning_rate": 7.342042203498951e-05, |
|
"loss": 2.4391, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.39669421487603307, |
|
"grad_norm": 11.975261688232422, |
|
"learning_rate": 7.099445507801323e-05, |
|
"loss": 3.1622, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.4132231404958678, |
|
"grad_norm": 12.1334867477417, |
|
"learning_rate": 6.850690776699573e-05, |
|
"loss": 2.4117, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.4132231404958678, |
|
"eval_loss": 2.364795446395874, |
|
"eval_runtime": 4.4813, |
|
"eval_samples_per_second": 5.802, |
|
"eval_steps_per_second": 2.901, |
|
"step": 25 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 61, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.91723590385664e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|