|
{ |
|
"best_metric": 0.4162628650665283, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 0.7586533902323376, |
|
"eval_steps": 25, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.015173067804646752, |
|
"grad_norm": 17.388423919677734, |
|
"learning_rate": 2.9999999999999997e-05, |
|
"loss": 30.457, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.015173067804646752, |
|
"eval_loss": 0.8401981592178345, |
|
"eval_runtime": 4.1216, |
|
"eval_samples_per_second": 12.131, |
|
"eval_steps_per_second": 12.131, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.030346135609293504, |
|
"grad_norm": 10.178067207336426, |
|
"learning_rate": 5.9999999999999995e-05, |
|
"loss": 30.335, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.04551920341394026, |
|
"grad_norm": 10.258736610412598, |
|
"learning_rate": 8.999999999999999e-05, |
|
"loss": 28.3123, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.06069227121858701, |
|
"grad_norm": 10.296838760375977, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 27.9287, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.07586533902323377, |
|
"grad_norm": 12.867237091064453, |
|
"learning_rate": 0.00015, |
|
"loss": 25.3598, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.09103840682788052, |
|
"grad_norm": 12.431685447692871, |
|
"learning_rate": 0.00017999999999999998, |
|
"loss": 22.2146, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.10621147463252727, |
|
"grad_norm": 13.944931983947754, |
|
"learning_rate": 0.00020999999999999998, |
|
"loss": 19.7453, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.12138454243717402, |
|
"grad_norm": 23.274085998535156, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 19.6562, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.13655761024182078, |
|
"grad_norm": 23.921234130859375, |
|
"learning_rate": 0.00027, |
|
"loss": 15.7789, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.15173067804646753, |
|
"grad_norm": 31.0799560546875, |
|
"learning_rate": 0.0003, |
|
"loss": 18.6639, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.16690374585111428, |
|
"grad_norm": 27.46465301513672, |
|
"learning_rate": 0.00029995027012714694, |
|
"loss": 15.0467, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.18207681365576103, |
|
"grad_norm": 21.228845596313477, |
|
"learning_rate": 0.00029980111348272456, |
|
"loss": 16.0005, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.19724988146040778, |
|
"grad_norm": 17.686410903930664, |
|
"learning_rate": 0.00029955262896727894, |
|
"loss": 15.2921, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.21242294926505453, |
|
"grad_norm": 16.29988670349121, |
|
"learning_rate": 0.00029920498134218835, |
|
"loss": 14.4487, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.22759601706970128, |
|
"grad_norm": 18.719757080078125, |
|
"learning_rate": 0.0002987584011204152, |
|
"loss": 12.782, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.24276908487434803, |
|
"grad_norm": 19.07723617553711, |
|
"learning_rate": 0.0002982131844136615, |
|
"loss": 12.9955, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.2579421526789948, |
|
"grad_norm": 45.31391143798828, |
|
"learning_rate": 0.0002975696927360274, |
|
"loss": 23.8883, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.27311522048364156, |
|
"grad_norm": 27.076311111450195, |
|
"learning_rate": 0.0002968283527643036, |
|
"loss": 21.5994, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.2882882882882883, |
|
"grad_norm": 15.616254806518555, |
|
"learning_rate": 0.00029598965605505737, |
|
"loss": 18.592, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.30346135609293506, |
|
"grad_norm": 18.812135696411133, |
|
"learning_rate": 0.000295054158718698, |
|
"loss": 16.3996, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3186344238975818, |
|
"grad_norm": 14.7747163772583, |
|
"learning_rate": 0.0002940224810507402, |
|
"loss": 16.5481, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.33380749170222856, |
|
"grad_norm": 13.129542350769043, |
|
"learning_rate": 0.00029289530712050735, |
|
"loss": 14.6397, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.3489805595068753, |
|
"grad_norm": 13.189936637878418, |
|
"learning_rate": 0.0002916733843175492, |
|
"loss": 14.889, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.36415362731152207, |
|
"grad_norm": 11.230732917785645, |
|
"learning_rate": 0.000290357522856074, |
|
"loss": 13.3495, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.3793266951161688, |
|
"grad_norm": 10.45602798461914, |
|
"learning_rate": 0.0002889485952377242, |
|
"loss": 13.97, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.3793266951161688, |
|
"eval_loss": 0.4615178406238556, |
|
"eval_runtime": 4.1945, |
|
"eval_samples_per_second": 11.92, |
|
"eval_steps_per_second": 11.92, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.39449976292081557, |
|
"grad_norm": 9.400847434997559, |
|
"learning_rate": 0.0002874475356730507, |
|
"loss": 13.0953, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.4096728307254623, |
|
"grad_norm": 10.215126037597656, |
|
"learning_rate": 0.0002858553394620707, |
|
"loss": 14.0127, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.42484589853010907, |
|
"grad_norm": 8.064435958862305, |
|
"learning_rate": 0.0002841730623343193, |
|
"loss": 12.4928, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.4400189663347558, |
|
"grad_norm": 10.052416801452637, |
|
"learning_rate": 0.00028240181974883207, |
|
"loss": 14.195, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.45519203413940257, |
|
"grad_norm": 9.893036842346191, |
|
"learning_rate": 0.00028054278615452326, |
|
"loss": 12.9534, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.4703651019440493, |
|
"grad_norm": 9.579009056091309, |
|
"learning_rate": 0.0002785971942114498, |
|
"loss": 11.9894, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.48553816974869607, |
|
"grad_norm": 10.615213394165039, |
|
"learning_rate": 0.0002765663339734778, |
|
"loss": 11.5054, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.5007112375533428, |
|
"grad_norm": 18.542469024658203, |
|
"learning_rate": 0.0002744515520328928, |
|
"loss": 17.3979, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.5158843053579896, |
|
"grad_norm": 15.90185260772705, |
|
"learning_rate": 0.00027225425062752165, |
|
"loss": 16.0495, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.5310573731626363, |
|
"grad_norm": 12.792567253112793, |
|
"learning_rate": 0.0002699758867109579, |
|
"loss": 15.2643, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.5462304409672831, |
|
"grad_norm": 9.845219612121582, |
|
"learning_rate": 0.0002676179709865066, |
|
"loss": 13.5142, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.5614035087719298, |
|
"grad_norm": 12.22781753540039, |
|
"learning_rate": 0.00026518206690549, |
|
"loss": 14.2178, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.5765765765765766, |
|
"grad_norm": 10.745471954345703, |
|
"learning_rate": 0.0002626697896305779, |
|
"loss": 12.9431, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.5917496443812233, |
|
"grad_norm": 10.67679500579834, |
|
"learning_rate": 0.00026008280496482984, |
|
"loss": 15.1994, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.6069227121858701, |
|
"grad_norm": 8.882881164550781, |
|
"learning_rate": 0.000257422828247159, |
|
"loss": 14.0005, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.6220957799905168, |
|
"grad_norm": 9.826411247253418, |
|
"learning_rate": 0.00025469162321495147, |
|
"loss": 12.5264, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.6372688477951636, |
|
"grad_norm": 8.515768051147461, |
|
"learning_rate": 0.00025189100083459397, |
|
"loss": 11.0442, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.6524419155998104, |
|
"grad_norm": 9.0559720993042, |
|
"learning_rate": 0.00024902281810068475, |
|
"loss": 11.7402, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.6676149834044571, |
|
"grad_norm": 9.556861877441406, |
|
"learning_rate": 0.0002460889768047263, |
|
"loss": 12.4502, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.6827880512091038, |
|
"grad_norm": 8.837124824523926, |
|
"learning_rate": 0.0002430914222741134, |
|
"loss": 12.4087, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.6979611190137506, |
|
"grad_norm": 15.878302574157715, |
|
"learning_rate": 0.00024003214208225522, |
|
"loss": 11.0582, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.7131341868183974, |
|
"grad_norm": 9.87769603729248, |
|
"learning_rate": 0.00023691316473068452, |
|
"loss": 11.3891, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.7283072546230441, |
|
"grad_norm": 8.35490608215332, |
|
"learning_rate": 0.00023373655830402968, |
|
"loss": 9.9121, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.7434803224276908, |
|
"grad_norm": 17.935434341430664, |
|
"learning_rate": 0.00023050442909874007, |
|
"loss": 16.9965, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.7586533902323376, |
|
"grad_norm": 15.285179138183594, |
|
"learning_rate": 0.00022721892022647462, |
|
"loss": 16.4137, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.7586533902323376, |
|
"eval_loss": 0.4162628650665283, |
|
"eval_runtime": 4.1979, |
|
"eval_samples_per_second": 11.911, |
|
"eval_steps_per_second": 11.911, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 132, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.95507593986048e+16, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|