|
{ |
|
"best_metric": 3.390733003616333, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 2.0373831775700935, |
|
"eval_steps": 25, |
|
"global_step": 54, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.037383177570093455, |
|
"grad_norm": 2.831024646759033, |
|
"learning_rate": 2.9999999999999997e-05, |
|
"loss": 7.8351, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.037383177570093455, |
|
"eval_loss": 8.958544731140137, |
|
"eval_runtime": 4.6614, |
|
"eval_samples_per_second": 10.727, |
|
"eval_steps_per_second": 1.502, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.07476635514018691, |
|
"grad_norm": 3.0343971252441406, |
|
"learning_rate": 5.9999999999999995e-05, |
|
"loss": 8.5717, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.11214953271028037, |
|
"grad_norm": 3.345459461212158, |
|
"learning_rate": 8.999999999999999e-05, |
|
"loss": 8.8921, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.14953271028037382, |
|
"grad_norm": 3.910405397415161, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 9.2598, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.18691588785046728, |
|
"grad_norm": 5.217698574066162, |
|
"learning_rate": 0.00015, |
|
"loss": 9.0714, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.22429906542056074, |
|
"grad_norm": 10.640775680541992, |
|
"learning_rate": 0.00017999999999999998, |
|
"loss": 8.8201, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.2616822429906542, |
|
"grad_norm": 4.532738208770752, |
|
"learning_rate": 0.00020999999999999998, |
|
"loss": 6.1276, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.29906542056074764, |
|
"grad_norm": 6.563399791717529, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 6.5805, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.3364485981308411, |
|
"grad_norm": 5.796512603759766, |
|
"learning_rate": 0.00027, |
|
"loss": 5.9074, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.37383177570093457, |
|
"grad_norm": 11.563618659973145, |
|
"learning_rate": 0.0003, |
|
"loss": 5.4115, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.411214953271028, |
|
"grad_norm": 9.329169273376465, |
|
"learning_rate": 0.000299617817191538, |
|
"loss": 4.9419, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.4485981308411215, |
|
"grad_norm": 8.141528129577637, |
|
"learning_rate": 0.0002984732162821399, |
|
"loss": 4.6769, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.48598130841121495, |
|
"grad_norm": 4.846505641937256, |
|
"learning_rate": 0.00029657202989567393, |
|
"loss": 4.008, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.5233644859813084, |
|
"grad_norm": 3.690450429916382, |
|
"learning_rate": 0.0002939239460421746, |
|
"loss": 4.4243, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.5607476635514018, |
|
"grad_norm": 3.435105085372925, |
|
"learning_rate": 0.00029054245874996426, |
|
"loss": 3.9618, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.5981308411214953, |
|
"grad_norm": 3.6001131534576416, |
|
"learning_rate": 0.00028644479930317775, |
|
"loss": 4.1266, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.6355140186915887, |
|
"grad_norm": 3.0784387588500977, |
|
"learning_rate": 0.0002816518484350883, |
|
"loss": 3.7033, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.6728971962616822, |
|
"grad_norm": 3.397775173187256, |
|
"learning_rate": 0.0002761880299246772, |
|
"loss": 3.7701, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.7102803738317757, |
|
"grad_norm": 3.662536859512329, |
|
"learning_rate": 0.00027008118613865406, |
|
"loss": 3.6325, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.7476635514018691, |
|
"grad_norm": 3.3628623485565186, |
|
"learning_rate": 0.00026336243615313873, |
|
"loss": 3.7407, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.7850467289719626, |
|
"grad_norm": 2.746609687805176, |
|
"learning_rate": 0.00025606601717798207, |
|
"loss": 3.3609, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.822429906542056, |
|
"grad_norm": 2.6766834259033203, |
|
"learning_rate": 0.00024822911009179276, |
|
"loss": 3.4941, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.8598130841121495, |
|
"grad_norm": 3.1183571815490723, |
|
"learning_rate": 0.00023989164997670202, |
|
"loss": 3.4918, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.897196261682243, |
|
"grad_norm": 3.998292922973633, |
|
"learning_rate": 0.00023109612261833963, |
|
"loss": 3.5491, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.9345794392523364, |
|
"grad_norm": 2.7714507579803467, |
|
"learning_rate": 0.00022188734800800852, |
|
"loss": 3.1514, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.9345794392523364, |
|
"eval_loss": 3.464606285095215, |
|
"eval_runtime": 4.1979, |
|
"eval_samples_per_second": 11.911, |
|
"eval_steps_per_second": 1.668, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.9719626168224299, |
|
"grad_norm": 3.2895798683166504, |
|
"learning_rate": 0.00021231225195028297, |
|
"loss": 3.472, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 1.0186915887850467, |
|
"grad_norm": 5.035872936248779, |
|
"learning_rate": 0.00020241962693986476, |
|
"loss": 5.0945, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 1.0560747663551402, |
|
"grad_norm": 2.4314322471618652, |
|
"learning_rate": 0.00019225988352621445, |
|
"loss": 2.8699, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 1.0934579439252337, |
|
"grad_norm": 3.1479110717773438, |
|
"learning_rate": 0.00018188479343294648, |
|
"loss": 3.0123, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 1.1308411214953271, |
|
"grad_norm": 3.4593069553375244, |
|
"learning_rate": 0.00017134722574099276, |
|
"loss": 3.2986, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.1682242990654206, |
|
"grad_norm": 2.846343517303467, |
|
"learning_rate": 0.00016070087747988482, |
|
"loss": 2.8362, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 1.205607476635514, |
|
"grad_norm": 3.118037700653076, |
|
"learning_rate": 0.00015, |
|
"loss": 2.8713, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 1.2429906542056075, |
|
"grad_norm": 2.8634281158447266, |
|
"learning_rate": 0.00013929912252011516, |
|
"loss": 2.7965, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 1.280373831775701, |
|
"grad_norm": 2.5531721115112305, |
|
"learning_rate": 0.00012865277425900724, |
|
"loss": 2.4958, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 1.3177570093457944, |
|
"grad_norm": 3.3842477798461914, |
|
"learning_rate": 0.00011811520656705348, |
|
"loss": 2.7512, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 1.355140186915888, |
|
"grad_norm": 3.1077632904052734, |
|
"learning_rate": 0.00010774011647378553, |
|
"loss": 2.7183, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 1.3925233644859814, |
|
"grad_norm": 3.4125425815582275, |
|
"learning_rate": 9.758037306013526e-05, |
|
"loss": 2.7866, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 1.4299065420560748, |
|
"grad_norm": 3.162402629852295, |
|
"learning_rate": 8.768774804971705e-05, |
|
"loss": 2.5485, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 1.4672897196261683, |
|
"grad_norm": 3.4466617107391357, |
|
"learning_rate": 7.811265199199152e-05, |
|
"loss": 2.8969, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 1.5046728971962615, |
|
"grad_norm": 3.4401187896728516, |
|
"learning_rate": 6.890387738166041e-05, |
|
"loss": 2.5463, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.542056074766355, |
|
"grad_norm": 3.6022398471832275, |
|
"learning_rate": 6.010835002329795e-05, |
|
"loss": 2.5524, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 1.5794392523364484, |
|
"grad_norm": 3.837542772293091, |
|
"learning_rate": 5.1770889908207245e-05, |
|
"loss": 2.5875, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 1.616822429906542, |
|
"grad_norm": 3.5455305576324463, |
|
"learning_rate": 4.3933982822017876e-05, |
|
"loss": 2.8521, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 1.6542056074766354, |
|
"grad_norm": 3.434196710586548, |
|
"learning_rate": 3.663756384686127e-05, |
|
"loss": 2.5427, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 1.6915887850467288, |
|
"grad_norm": 3.0461819171905518, |
|
"learning_rate": 2.9918813861345952e-05, |
|
"loss": 2.4531, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.7289719626168223, |
|
"grad_norm": 3.3924200534820557, |
|
"learning_rate": 2.38119700753228e-05, |
|
"loss": 2.4146, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 1.7663551401869158, |
|
"grad_norm": 3.772728681564331, |
|
"learning_rate": 1.834815156491165e-05, |
|
"loss": 2.5594, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 1.8037383177570092, |
|
"grad_norm": 3.1197235584259033, |
|
"learning_rate": 1.3555200696822232e-05, |
|
"loss": 2.5783, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.8411214953271027, |
|
"grad_norm": 3.5549519062042236, |
|
"learning_rate": 9.45754125003576e-06, |
|
"loss": 2.6462, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 1.8785046728971961, |
|
"grad_norm": 4.010706901550293, |
|
"learning_rate": 6.076053957825411e-06, |
|
"loss": 2.9153, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.8785046728971961, |
|
"eval_loss": 3.390733003616333, |
|
"eval_runtime": 4.1949, |
|
"eval_samples_per_second": 11.919, |
|
"eval_steps_per_second": 1.669, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.9158878504672896, |
|
"grad_norm": 3.5007739067077637, |
|
"learning_rate": 3.4279701043260886e-06, |
|
"loss": 2.5603, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 1.953271028037383, |
|
"grad_norm": 3.4901020526885986, |
|
"learning_rate": 1.5267837178600972e-06, |
|
"loss": 2.6862, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 1.9906542056074765, |
|
"grad_norm": 5.5853047370910645, |
|
"learning_rate": 3.821828084619727e-07, |
|
"loss": 3.6647, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 2.0373831775700935, |
|
"grad_norm": 2.95920729637146, |
|
"learning_rate": 0.0, |
|
"loss": 2.2325, |
|
"step": 54 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 54, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.51315657885614e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|