|
{ |
|
"best_metric": 0.7776047587394714, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-25", |
|
"epoch": 2.051282051282051, |
|
"eval_steps": 25, |
|
"global_step": 35, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05860805860805861, |
|
"grad_norm": 41.95743179321289, |
|
"learning_rate": 5e-05, |
|
"loss": 11.1238, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.05860805860805861, |
|
"eval_loss": 11.64078426361084, |
|
"eval_runtime": 11.7698, |
|
"eval_samples_per_second": 19.542, |
|
"eval_steps_per_second": 1.274, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.11721611721611722, |
|
"grad_norm": 42.654151916503906, |
|
"learning_rate": 0.0001, |
|
"loss": 11.5029, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.17582417582417584, |
|
"grad_norm": 30.422691345214844, |
|
"learning_rate": 9.977359612865423e-05, |
|
"loss": 8.9994, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.23443223443223443, |
|
"grad_norm": 17.667789459228516, |
|
"learning_rate": 9.909643486313533e-05, |
|
"loss": 4.2615, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.29304029304029305, |
|
"grad_norm": 9.984042167663574, |
|
"learning_rate": 9.797464868072488e-05, |
|
"loss": 1.9114, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.3516483516483517, |
|
"grad_norm": 3.775686740875244, |
|
"learning_rate": 9.641839665080363e-05, |
|
"loss": 1.1071, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.41025641025641024, |
|
"grad_norm": 2.311100721359253, |
|
"learning_rate": 9.444177243274618e-05, |
|
"loss": 0.9249, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.46886446886446886, |
|
"grad_norm": 2.155744791030884, |
|
"learning_rate": 9.206267664155907e-05, |
|
"loss": 0.9448, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.5274725274725275, |
|
"grad_norm": 1.0787104368209839, |
|
"learning_rate": 8.930265473713938e-05, |
|
"loss": 0.8014, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.5860805860805861, |
|
"grad_norm": 0.8048316836357117, |
|
"learning_rate": 8.618670190525352e-05, |
|
"loss": 0.7563, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.6446886446886447, |
|
"grad_norm": 1.2824442386627197, |
|
"learning_rate": 8.274303669726426e-05, |
|
"loss": 0.8288, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.7032967032967034, |
|
"grad_norm": 0.678689181804657, |
|
"learning_rate": 7.900284547855991e-05, |
|
"loss": 0.8168, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.7619047619047619, |
|
"grad_norm": 0.908683717250824, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 0.8063, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.8205128205128205, |
|
"grad_norm": 0.9696321487426758, |
|
"learning_rate": 7.077075065009433e-05, |
|
"loss": 0.728, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.8791208791208791, |
|
"grad_norm": 0.7471110224723816, |
|
"learning_rate": 6.635339816587109e-05, |
|
"loss": 0.7831, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.9377289377289377, |
|
"grad_norm": 1.1943230628967285, |
|
"learning_rate": 6.178794677547137e-05, |
|
"loss": 0.828, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.9963369963369964, |
|
"grad_norm": 1.1461800336837769, |
|
"learning_rate": 5.7115741913664264e-05, |
|
"loss": 0.8597, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 1.054945054945055, |
|
"grad_norm": 1.492602825164795, |
|
"learning_rate": 5.2379095791187124e-05, |
|
"loss": 1.5238, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 1.1135531135531136, |
|
"grad_norm": 1.4935457706451416, |
|
"learning_rate": 4.762090420881289e-05, |
|
"loss": 0.7688, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 1.1721611721611722, |
|
"grad_norm": 1.063358187675476, |
|
"learning_rate": 4.288425808633575e-05, |
|
"loss": 0.8187, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.2307692307692308, |
|
"grad_norm": 0.6111706495285034, |
|
"learning_rate": 3.821205322452863e-05, |
|
"loss": 0.8065, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 1.2893772893772895, |
|
"grad_norm": 0.592510998249054, |
|
"learning_rate": 3.364660183412892e-05, |
|
"loss": 0.7727, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 1.347985347985348, |
|
"grad_norm": 0.515056312084198, |
|
"learning_rate": 2.9229249349905684e-05, |
|
"loss": 0.7482, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 1.4065934065934065, |
|
"grad_norm": 0.6317391395568848, |
|
"learning_rate": 2.500000000000001e-05, |
|
"loss": 0.7803, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 1.4652014652014653, |
|
"grad_norm": 1.648141860961914, |
|
"learning_rate": 2.09971545214401e-05, |
|
"loss": 0.8176, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.4652014652014653, |
|
"eval_loss": 0.7776047587394714, |
|
"eval_runtime": 11.7656, |
|
"eval_samples_per_second": 19.549, |
|
"eval_steps_per_second": 1.275, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.5238095238095237, |
|
"grad_norm": 0.7313464879989624, |
|
"learning_rate": 1.725696330273575e-05, |
|
"loss": 0.7806, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 1.5824175824175826, |
|
"grad_norm": 0.6874339580535889, |
|
"learning_rate": 1.3813298094746491e-05, |
|
"loss": 0.744, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 1.641025641025641, |
|
"grad_norm": 0.6659790277481079, |
|
"learning_rate": 1.0697345262860636e-05, |
|
"loss": 0.8011, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 1.6996336996336996, |
|
"grad_norm": 0.8043178915977478, |
|
"learning_rate": 7.937323358440935e-06, |
|
"loss": 0.8361, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 1.7582417582417582, |
|
"grad_norm": 0.5227417349815369, |
|
"learning_rate": 5.558227567253832e-06, |
|
"loss": 0.7979, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.8168498168498168, |
|
"grad_norm": 0.5979397892951965, |
|
"learning_rate": 3.581603349196372e-06, |
|
"loss": 0.7131, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 1.8754578754578755, |
|
"grad_norm": 0.43952739238739014, |
|
"learning_rate": 2.0253513192751373e-06, |
|
"loss": 0.7307, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 1.934065934065934, |
|
"grad_norm": 0.8428590297698975, |
|
"learning_rate": 9.035651368646648e-07, |
|
"loss": 0.7661, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 1.9926739926739927, |
|
"grad_norm": 0.6554544568061829, |
|
"learning_rate": 2.2640387134577058e-07, |
|
"loss": 0.8265, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 2.051282051282051, |
|
"grad_norm": 2.641814708709717, |
|
"learning_rate": 0.0, |
|
"loss": 1.6606, |
|
"step": 35 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 35, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.283934412917965e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|