|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.06521030322791001, |
|
"eval_steps": 9, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0006521030322791001, |
|
"eval_loss": 1.4430840015411377, |
|
"eval_runtime": 35.4265, |
|
"eval_samples_per_second": 72.911, |
|
"eval_steps_per_second": 9.117, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0019563090968373, |
|
"grad_norm": 0.13245901465415955, |
|
"learning_rate": 1.5e-05, |
|
"loss": 1.3922, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0039126181936746, |
|
"grad_norm": 0.13980551064014435, |
|
"learning_rate": 3e-05, |
|
"loss": 1.35, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0058689272905119005, |
|
"grad_norm": 0.21757693588733673, |
|
"learning_rate": 4.5e-05, |
|
"loss": 1.4342, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0058689272905119005, |
|
"eval_loss": 1.4398877620697021, |
|
"eval_runtime": 35.4144, |
|
"eval_samples_per_second": 72.936, |
|
"eval_steps_per_second": 9.121, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0078252363873492, |
|
"grad_norm": 0.16242149472236633, |
|
"learning_rate": 4.993910125649561e-05, |
|
"loss": 1.3931, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0097815454841865, |
|
"grad_norm": 0.15830300748348236, |
|
"learning_rate": 4.962019382530521e-05, |
|
"loss": 1.367, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.011737854581023801, |
|
"grad_norm": 0.16229018568992615, |
|
"learning_rate": 4.9031542398457974e-05, |
|
"loss": 1.4666, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.011737854581023801, |
|
"eval_loss": 1.4245867729187012, |
|
"eval_runtime": 35.2817, |
|
"eval_samples_per_second": 73.211, |
|
"eval_steps_per_second": 9.155, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.013694163677861103, |
|
"grad_norm": 0.14549140632152557, |
|
"learning_rate": 4.817959636416969e-05, |
|
"loss": 1.4435, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0156504727746984, |
|
"grad_norm": 0.12280628830194473, |
|
"learning_rate": 4.707368982147318e-05, |
|
"loss": 1.4289, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.017606781871535703, |
|
"grad_norm": 0.1361985057592392, |
|
"learning_rate": 4.572593931387604e-05, |
|
"loss": 1.4174, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.017606781871535703, |
|
"eval_loss": 1.4081462621688843, |
|
"eval_runtime": 35.2327, |
|
"eval_samples_per_second": 73.313, |
|
"eval_steps_per_second": 9.168, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.019563090968373, |
|
"grad_norm": 0.1305856853723526, |
|
"learning_rate": 4.415111107797445e-05, |
|
"loss": 1.3956, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.021519400065210303, |
|
"grad_norm": 0.13506704568862915, |
|
"learning_rate": 4.2366459261474933e-05, |
|
"loss": 1.4642, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.023475709162047602, |
|
"grad_norm": 0.1571059674024582, |
|
"learning_rate": 4.039153688314145e-05, |
|
"loss": 1.4487, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.023475709162047602, |
|
"eval_loss": 1.3942054510116577, |
|
"eval_runtime": 35.4024, |
|
"eval_samples_per_second": 72.961, |
|
"eval_steps_per_second": 9.124, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.025432018258884904, |
|
"grad_norm": 0.12497388571500778, |
|
"learning_rate": 3.824798160583012e-05, |
|
"loss": 1.345, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.027388327355722206, |
|
"grad_norm": 0.1353573054075241, |
|
"learning_rate": 3.5959278669726935e-05, |
|
"loss": 1.3613, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.029344636452559504, |
|
"grad_norm": 0.11737760901451111, |
|
"learning_rate": 3.355050358314172e-05, |
|
"loss": 1.3878, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.029344636452559504, |
|
"eval_loss": 1.383650302886963, |
|
"eval_runtime": 35.4483, |
|
"eval_samples_per_second": 72.867, |
|
"eval_steps_per_second": 9.112, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0313009455493968, |
|
"grad_norm": 0.11926010251045227, |
|
"learning_rate": 3.104804738999169e-05, |
|
"loss": 1.4205, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.03325725464623411, |
|
"grad_norm": 0.1115192323923111, |
|
"learning_rate": 2.8479327524001636e-05, |
|
"loss": 1.3493, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.035213563743071406, |
|
"grad_norm": 0.11480339616537094, |
|
"learning_rate": 2.587248741756253e-05, |
|
"loss": 1.3904, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.035213563743071406, |
|
"eval_loss": 1.376178503036499, |
|
"eval_runtime": 35.4933, |
|
"eval_samples_per_second": 72.774, |
|
"eval_steps_per_second": 9.1, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.037169872839908705, |
|
"grad_norm": 0.11822472512722015, |
|
"learning_rate": 2.3256088156396868e-05, |
|
"loss": 1.3313, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.039126181936746, |
|
"grad_norm": 0.1461370587348938, |
|
"learning_rate": 2.0658795558326743e-05, |
|
"loss": 1.3558, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04108249103358331, |
|
"grad_norm": 0.13045533001422882, |
|
"learning_rate": 1.8109066104575023e-05, |
|
"loss": 1.2992, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.04108249103358331, |
|
"eval_loss": 1.3712804317474365, |
|
"eval_runtime": 35.3891, |
|
"eval_samples_per_second": 72.989, |
|
"eval_steps_per_second": 9.127, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.04303880013042061, |
|
"grad_norm": 0.13522818684577942, |
|
"learning_rate": 1.56348351646022e-05, |
|
"loss": 1.244, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.044995109227257905, |
|
"grad_norm": 0.1401805877685547, |
|
"learning_rate": 1.3263210930352737e-05, |
|
"loss": 1.3158, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.046951418324095204, |
|
"grad_norm": 0.16507139801979065, |
|
"learning_rate": 1.1020177413231334e-05, |
|
"loss": 1.3923, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.046951418324095204, |
|
"eval_loss": 1.368349313735962, |
|
"eval_runtime": 35.4333, |
|
"eval_samples_per_second": 72.898, |
|
"eval_steps_per_second": 9.116, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.04890772742093251, |
|
"grad_norm": 0.1617535650730133, |
|
"learning_rate": 8.930309757836517e-06, |
|
"loss": 1.4659, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.05086403651776981, |
|
"grad_norm": 0.15123188495635986, |
|
"learning_rate": 7.016504991533726e-06, |
|
"loss": 1.4194, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.052820345614607106, |
|
"grad_norm": 0.1255597174167633, |
|
"learning_rate": 5.299731159831953e-06, |
|
"loss": 1.3503, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.052820345614607106, |
|
"eval_loss": 1.3665692806243896, |
|
"eval_runtime": 35.4134, |
|
"eval_samples_per_second": 72.938, |
|
"eval_steps_per_second": 9.121, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.05477665471144441, |
|
"grad_norm": 0.1431252807378769, |
|
"learning_rate": 3.798797596089351e-06, |
|
"loss": 1.3153, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.05673296380828171, |
|
"grad_norm": 0.1532163769006729, |
|
"learning_rate": 2.5301488425208296e-06, |
|
"loss": 1.3877, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.05868927290511901, |
|
"grad_norm": 0.15569448471069336, |
|
"learning_rate": 1.5076844803522922e-06, |
|
"loss": 1.4249, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.05868927290511901, |
|
"eval_loss": 1.3660001754760742, |
|
"eval_runtime": 35.493, |
|
"eval_samples_per_second": 72.775, |
|
"eval_steps_per_second": 9.1, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.06064558200195631, |
|
"grad_norm": 0.1278517246246338, |
|
"learning_rate": 7.426068431000882e-07, |
|
"loss": 1.4409, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.0626018910987936, |
|
"grad_norm": 0.10912717878818512, |
|
"learning_rate": 2.4329828146074095e-07, |
|
"loss": 1.331, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.06455820019563091, |
|
"grad_norm": 0.143110990524292, |
|
"learning_rate": 1.522932452260595e-08, |
|
"loss": 1.3777, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.06455820019563091, |
|
"eval_loss": 1.3658305406570435, |
|
"eval_runtime": 35.5981, |
|
"eval_samples_per_second": 72.56, |
|
"eval_steps_per_second": 9.074, |
|
"step": 99 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 9, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3352559257387008.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|