|
{ |
|
"best_metric": NaN, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-25", |
|
"epoch": 2.905525846702317, |
|
"eval_steps": 25, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0570409982174688, |
|
"grad_norm": NaN, |
|
"learning_rate": 5e-05, |
|
"loss": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0570409982174688, |
|
"eval_loss": NaN, |
|
"eval_runtime": 4.318, |
|
"eval_samples_per_second": 11.579, |
|
"eval_steps_per_second": 3.011, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.1140819964349376, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.1711229946524064, |
|
"grad_norm": NaN, |
|
"learning_rate": 9.991464979316699e-05, |
|
"loss": 0.0, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.2281639928698752, |
|
"grad_norm": NaN, |
|
"learning_rate": 9.965892293523712e-05, |
|
"loss": 0.0, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.28520499108734404, |
|
"grad_norm": NaN, |
|
"learning_rate": 9.923378948577559e-05, |
|
"loss": 0.0, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.3422459893048128, |
|
"grad_norm": NaN, |
|
"learning_rate": 9.864086212157544e-05, |
|
"loss": 0.0, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.39928698752228164, |
|
"grad_norm": NaN, |
|
"learning_rate": 9.788239001922206e-05, |
|
"loss": 0.0, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.4563279857397504, |
|
"grad_norm": NaN, |
|
"learning_rate": 9.6961250323196e-05, |
|
"loss": 0.0, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.5133689839572193, |
|
"grad_norm": NaN, |
|
"learning_rate": 9.588093723187857e-05, |
|
"loss": 0.0, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.5704099821746881, |
|
"grad_norm": NaN, |
|
"learning_rate": 9.46455487428603e-05, |
|
"loss": 0.0, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.6274509803921569, |
|
"grad_norm": NaN, |
|
"learning_rate": 9.325977110783264e-05, |
|
"loss": 0.0, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.6844919786096256, |
|
"grad_norm": NaN, |
|
"learning_rate": 9.172886105602998e-05, |
|
"loss": 0.0, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.7415329768270945, |
|
"grad_norm": NaN, |
|
"learning_rate": 9.005862585365517e-05, |
|
"loss": 0.0, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.7985739750445633, |
|
"grad_norm": NaN, |
|
"learning_rate": 8.825540127492967e-05, |
|
"loss": 0.0, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.8556149732620321, |
|
"grad_norm": NaN, |
|
"learning_rate": 8.632602756833172e-05, |
|
"loss": 0.0, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.9126559714795008, |
|
"grad_norm": NaN, |
|
"learning_rate": 8.42778235091909e-05, |
|
"loss": 0.0, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.9696969696969697, |
|
"grad_norm": NaN, |
|
"learning_rate": 8.211855863706654e-05, |
|
"loss": 0.0, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 1.053475935828877, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.985642378322276e-05, |
|
"loss": 0.0, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 1.1105169340463459, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.75e-05, |
|
"loss": 0.0, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 1.1675579322638145, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.505822600994424e-05, |
|
"loss": 0.0, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.2245989304812834, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.254036429817058e-05, |
|
"loss": 0.0, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 1.2816399286987523, |
|
"grad_norm": NaN, |
|
"learning_rate": 6.995596597658468e-05, |
|
"loss": 0.0, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 1.338680926916221, |
|
"grad_norm": NaN, |
|
"learning_rate": 6.731483455324374e-05, |
|
"loss": 0.0, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 1.3957219251336899, |
|
"grad_norm": NaN, |
|
"learning_rate": 6.462698874429239e-05, |
|
"loss": 0.0, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 1.4527629233511585, |
|
"grad_norm": NaN, |
|
"learning_rate": 6.190262446954085e-05, |
|
"loss": 0.0, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.4527629233511585, |
|
"eval_loss": NaN, |
|
"eval_runtime": 4.3166, |
|
"eval_samples_per_second": 11.583, |
|
"eval_steps_per_second": 3.012, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.5098039215686274, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.9152076175848594e-05, |
|
"loss": 0.0, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 1.5668449197860963, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.6385777635027684e-05, |
|
"loss": 0.0, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 1.6238859180035652, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.361422236497235e-05, |
|
"loss": 0.0, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 1.6809269162210339, |
|
"grad_norm": NaN, |
|
"learning_rate": 5.0847923824151424e-05, |
|
"loss": 0.0, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 1.7379679144385025, |
|
"grad_norm": NaN, |
|
"learning_rate": 4.809737553045916e-05, |
|
"loss": 0.0, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.7950089126559714, |
|
"grad_norm": NaN, |
|
"learning_rate": 4.537301125570763e-05, |
|
"loss": 0.0, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 1.8520499108734403, |
|
"grad_norm": NaN, |
|
"learning_rate": 4.268516544675628e-05, |
|
"loss": 0.0, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 1.9090909090909092, |
|
"grad_norm": NaN, |
|
"learning_rate": 4.004403402341532e-05, |
|
"loss": 0.0, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 1.966131907308378, |
|
"grad_norm": NaN, |
|
"learning_rate": 3.7459635701829435e-05, |
|
"loss": 0.0, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 2.049910873440285, |
|
"grad_norm": NaN, |
|
"learning_rate": 3.494177399005578e-05, |
|
"loss": 0.0, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 2.106951871657754, |
|
"grad_norm": NaN, |
|
"learning_rate": 3.250000000000001e-05, |
|
"loss": 0.0, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 2.163992869875223, |
|
"grad_norm": NaN, |
|
"learning_rate": 3.014357621677724e-05, |
|
"loss": 0.0, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 2.2210338680926918, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.7881441362933468e-05, |
|
"loss": 0.0, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 2.2780748663101607, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.5722176490809118e-05, |
|
"loss": 0.0, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 2.335115864527629, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.3673972431668306e-05, |
|
"loss": 0.0, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.392156862745098, |
|
"grad_norm": NaN, |
|
"learning_rate": 2.1744598725070347e-05, |
|
"loss": 0.0, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 2.449197860962567, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.994137414634483e-05, |
|
"loss": 0.0, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 2.5062388591800357, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.827113894397003e-05, |
|
"loss": 0.0, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 2.5632798573975046, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.674022889216737e-05, |
|
"loss": 0.0, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 2.620320855614973, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.53544512571397e-05, |
|
"loss": 0.0, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 2.677361853832442, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.4119062768121433e-05, |
|
"loss": 0.0, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 2.734402852049911, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.3038749676803994e-05, |
|
"loss": 0.0, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 2.7914438502673797, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.2117609980777959e-05, |
|
"loss": 0.0, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 2.8484848484848486, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.1359137878424578e-05, |
|
"loss": 0.0, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 2.905525846702317, |
|
"grad_norm": NaN, |
|
"learning_rate": 1.076621051422442e-05, |
|
"loss": 0.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.905525846702317, |
|
"eval_loss": NaN, |
|
"eval_runtime": 4.3476, |
|
"eval_samples_per_second": 11.501, |
|
"eval_steps_per_second": 2.99, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 53, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 1 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.6711086619806925e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|