|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 17799, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.08427439743805831, |
|
"grad_norm": 2.7859835624694824, |
|
"learning_rate": 4.85954267093657e-05, |
|
"loss": 1.5305, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.16854879487611663, |
|
"grad_norm": 2.8764379024505615, |
|
"learning_rate": 4.719085341873139e-05, |
|
"loss": 1.392, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.25282319231417494, |
|
"grad_norm": 2.9515156745910645, |
|
"learning_rate": 4.578628012809709e-05, |
|
"loss": 1.3312, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.33709758975223325, |
|
"grad_norm": 2.6709630489349365, |
|
"learning_rate": 4.438170683746278e-05, |
|
"loss": 1.294, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.42137198719029156, |
|
"grad_norm": 2.601968765258789, |
|
"learning_rate": 4.2977133546828475e-05, |
|
"loss": 1.2561, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.5056463846283499, |
|
"grad_norm": 2.6964409351348877, |
|
"learning_rate": 4.157256025619417e-05, |
|
"loss": 1.2364, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.5899207820664082, |
|
"grad_norm": 2.479663610458374, |
|
"learning_rate": 4.016798696555987e-05, |
|
"loss": 1.215, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.6741951795044665, |
|
"grad_norm": 2.6350433826446533, |
|
"learning_rate": 3.876341367492556e-05, |
|
"loss": 1.1925, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.7584695769425248, |
|
"grad_norm": 2.7347450256347656, |
|
"learning_rate": 3.7358840384291254e-05, |
|
"loss": 1.1831, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.8427439743805831, |
|
"grad_norm": 2.7404720783233643, |
|
"learning_rate": 3.5954267093656953e-05, |
|
"loss": 1.1657, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.9270183718186416, |
|
"grad_norm": 2.71294903755188, |
|
"learning_rate": 3.454969380302264e-05, |
|
"loss": 1.1629, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.0112927692566998, |
|
"grad_norm": 2.6705377101898193, |
|
"learning_rate": 3.314512051238834e-05, |
|
"loss": 1.1435, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.0955671666947582, |
|
"grad_norm": 2.524048089981079, |
|
"learning_rate": 3.174054722175403e-05, |
|
"loss": 1.1304, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.1798415641328164, |
|
"grad_norm": 2.6250593662261963, |
|
"learning_rate": 3.0335973931119726e-05, |
|
"loss": 1.1167, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.2641159615708748, |
|
"grad_norm": 2.3546817302703857, |
|
"learning_rate": 2.8931400640485422e-05, |
|
"loss": 1.1121, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.348390359008933, |
|
"grad_norm": 2.7167067527770996, |
|
"learning_rate": 2.7526827349851115e-05, |
|
"loss": 1.1065, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.4326647564469914, |
|
"grad_norm": 2.4628195762634277, |
|
"learning_rate": 2.6122254059216812e-05, |
|
"loss": 1.0953, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.5169391538850499, |
|
"grad_norm": 2.47514009475708, |
|
"learning_rate": 2.4717680768582505e-05, |
|
"loss": 1.0898, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.601213551323108, |
|
"grad_norm": 2.240252733230591, |
|
"learning_rate": 2.33131074779482e-05, |
|
"loss": 1.0734, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.6854879487611663, |
|
"grad_norm": 2.6827552318573, |
|
"learning_rate": 2.1908534187313894e-05, |
|
"loss": 1.0771, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.7697623461992247, |
|
"grad_norm": 2.446568012237549, |
|
"learning_rate": 2.0503960896679587e-05, |
|
"loss": 1.0727, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.854036743637283, |
|
"grad_norm": 2.437731981277466, |
|
"learning_rate": 1.9099387606045284e-05, |
|
"loss": 1.0573, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.9383111410753413, |
|
"grad_norm": 2.6201038360595703, |
|
"learning_rate": 1.769481431541098e-05, |
|
"loss": 1.0561, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.0225855385133995, |
|
"grad_norm": 2.4613335132598877, |
|
"learning_rate": 1.6290241024776673e-05, |
|
"loss": 1.0531, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.106859935951458, |
|
"grad_norm": 2.459319591522217, |
|
"learning_rate": 1.488566773414237e-05, |
|
"loss": 1.0409, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.1911343333895164, |
|
"grad_norm": 2.536198616027832, |
|
"learning_rate": 1.3481094443508063e-05, |
|
"loss": 1.0388, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.2754087308275746, |
|
"grad_norm": 2.503535270690918, |
|
"learning_rate": 1.2076521152873758e-05, |
|
"loss": 1.0326, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.3596831282656328, |
|
"grad_norm": 2.47710919380188, |
|
"learning_rate": 1.0671947862239452e-05, |
|
"loss": 1.0246, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.4439575257036914, |
|
"grad_norm": 2.4275453090667725, |
|
"learning_rate": 9.267374571605147e-06, |
|
"loss": 1.0228, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.5282319231417496, |
|
"grad_norm": 2.4777779579162598, |
|
"learning_rate": 7.862801280970842e-06, |
|
"loss": 1.029, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.612506320579808, |
|
"grad_norm": 2.574960470199585, |
|
"learning_rate": 6.458227990336537e-06, |
|
"loss": 1.0215, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.696780718017866, |
|
"grad_norm": 2.5446507930755615, |
|
"learning_rate": 5.053654699702231e-06, |
|
"loss": 1.0206, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.7810551154559247, |
|
"grad_norm": 2.7360856533050537, |
|
"learning_rate": 3.6490814090679254e-06, |
|
"loss": 1.0215, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.865329512893983, |
|
"grad_norm": 2.4522464275360107, |
|
"learning_rate": 2.2445081184336198e-06, |
|
"loss": 1.017, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.949603910332041, |
|
"grad_norm": 2.5468955039978027, |
|
"learning_rate": 8.399348277993145e-07, |
|
"loss": 1.0105, |
|
"step": 17500 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 17799, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.4989245699264307e+17, |
|
"train_batch_size": 128, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|