|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.009048545446319504, |
|
"eval_steps": 9, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 9.048545446319505e-05, |
|
"eval_loss": 2.0614755153656006, |
|
"eval_runtime": 1431.9492, |
|
"eval_samples_per_second": 12.998, |
|
"eval_steps_per_second": 1.625, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00027145636338958513, |
|
"grad_norm": 3.0326344966888428, |
|
"learning_rate": 1.5e-05, |
|
"loss": 1.1455, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0005429127267791703, |
|
"grad_norm": 2.6346302032470703, |
|
"learning_rate": 3e-05, |
|
"loss": 1.2541, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0008143690901687554, |
|
"grad_norm": 2.24157977104187, |
|
"learning_rate": 4.5e-05, |
|
"loss": 1.1746, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0008143690901687554, |
|
"eval_loss": 1.376136302947998, |
|
"eval_runtime": 1439.157, |
|
"eval_samples_per_second": 12.933, |
|
"eval_steps_per_second": 1.617, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0010858254535583405, |
|
"grad_norm": 2.4520862102508545, |
|
"learning_rate": 4.993910125649561e-05, |
|
"loss": 0.9936, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0013572818169479257, |
|
"grad_norm": 2.1917343139648438, |
|
"learning_rate": 4.962019382530521e-05, |
|
"loss": 0.7332, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0016287381803375108, |
|
"grad_norm": 2.0286247730255127, |
|
"learning_rate": 4.9031542398457974e-05, |
|
"loss": 0.6707, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0016287381803375108, |
|
"eval_loss": 0.708565354347229, |
|
"eval_runtime": 1438.9184, |
|
"eval_samples_per_second": 12.935, |
|
"eval_steps_per_second": 1.617, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.001900194543727096, |
|
"grad_norm": 2.2299928665161133, |
|
"learning_rate": 4.817959636416969e-05, |
|
"loss": 0.5779, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.002171650907116681, |
|
"grad_norm": 3.2380049228668213, |
|
"learning_rate": 4.707368982147318e-05, |
|
"loss": 0.6919, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.002443107270506266, |
|
"grad_norm": 2.030574321746826, |
|
"learning_rate": 4.572593931387604e-05, |
|
"loss": 0.4923, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.002443107270506266, |
|
"eval_loss": 0.5967397689819336, |
|
"eval_runtime": 1439.4951, |
|
"eval_samples_per_second": 12.93, |
|
"eval_steps_per_second": 1.617, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0027145636338958513, |
|
"grad_norm": 3.188636541366577, |
|
"learning_rate": 4.415111107797445e-05, |
|
"loss": 0.4312, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0029860199972854364, |
|
"grad_norm": 2.3065080642700195, |
|
"learning_rate": 4.2366459261474933e-05, |
|
"loss": 0.6207, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0032574763606750216, |
|
"grad_norm": 1.9325222969055176, |
|
"learning_rate": 4.039153688314145e-05, |
|
"loss": 0.5588, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0032574763606750216, |
|
"eval_loss": 0.5455918908119202, |
|
"eval_runtime": 1439.2701, |
|
"eval_samples_per_second": 12.932, |
|
"eval_steps_per_second": 1.617, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0035289327240646067, |
|
"grad_norm": 2.0116512775421143, |
|
"learning_rate": 3.824798160583012e-05, |
|
"loss": 0.5004, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.003800389087454192, |
|
"grad_norm": 2.0821337699890137, |
|
"learning_rate": 3.5959278669726935e-05, |
|
"loss": 0.4352, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.0040718454508437765, |
|
"grad_norm": 1.7970950603485107, |
|
"learning_rate": 3.355050358314172e-05, |
|
"loss": 0.533, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0040718454508437765, |
|
"eval_loss": 0.5220184326171875, |
|
"eval_runtime": 1438.7352, |
|
"eval_samples_per_second": 12.937, |
|
"eval_steps_per_second": 1.617, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.004343301814233362, |
|
"grad_norm": 2.69805645942688, |
|
"learning_rate": 3.104804738999169e-05, |
|
"loss": 0.4621, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.004614758177622947, |
|
"grad_norm": 1.981372594833374, |
|
"learning_rate": 2.8479327524001636e-05, |
|
"loss": 0.6349, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.004886214541012532, |
|
"grad_norm": 2.258195161819458, |
|
"learning_rate": 2.587248741756253e-05, |
|
"loss": 0.5486, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.004886214541012532, |
|
"eval_loss": 0.5081658363342285, |
|
"eval_runtime": 1439.3002, |
|
"eval_samples_per_second": 12.932, |
|
"eval_steps_per_second": 1.617, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.005157670904402117, |
|
"grad_norm": 1.517822504043579, |
|
"learning_rate": 2.3256088156396868e-05, |
|
"loss": 0.5922, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.005429127267791703, |
|
"grad_norm": 1.7860591411590576, |
|
"learning_rate": 2.0658795558326743e-05, |
|
"loss": 0.437, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.005700583631181287, |
|
"grad_norm": 4.32674503326416, |
|
"learning_rate": 1.8109066104575023e-05, |
|
"loss": 0.5061, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.005700583631181287, |
|
"eval_loss": 0.4976564645767212, |
|
"eval_runtime": 1439.5584, |
|
"eval_samples_per_second": 12.93, |
|
"eval_steps_per_second": 1.616, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.005972039994570873, |
|
"grad_norm": 1.9437718391418457, |
|
"learning_rate": 1.56348351646022e-05, |
|
"loss": 0.5445, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.006243496357960458, |
|
"grad_norm": 1.3337701559066772, |
|
"learning_rate": 1.3263210930352737e-05, |
|
"loss": 0.4558, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.006514952721350043, |
|
"grad_norm": 2.283390998840332, |
|
"learning_rate": 1.1020177413231334e-05, |
|
"loss": 0.5451, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.006514952721350043, |
|
"eval_loss": 0.4924093782901764, |
|
"eval_runtime": 1439.0551, |
|
"eval_samples_per_second": 12.934, |
|
"eval_steps_per_second": 1.617, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.006786409084739628, |
|
"grad_norm": 2.103365421295166, |
|
"learning_rate": 8.930309757836517e-06, |
|
"loss": 0.5615, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.007057865448129213, |
|
"grad_norm": 1.8247758150100708, |
|
"learning_rate": 7.016504991533726e-06, |
|
"loss": 0.4278, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.007329321811518798, |
|
"grad_norm": 2.0524744987487793, |
|
"learning_rate": 5.299731159831953e-06, |
|
"loss": 0.5058, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.007329321811518798, |
|
"eval_loss": 0.48786625266075134, |
|
"eval_runtime": 1439.1036, |
|
"eval_samples_per_second": 12.934, |
|
"eval_steps_per_second": 1.617, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.007600778174908384, |
|
"grad_norm": 2.635622501373291, |
|
"learning_rate": 3.798797596089351e-06, |
|
"loss": 0.5012, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.00787223453829797, |
|
"grad_norm": 1.4843945503234863, |
|
"learning_rate": 2.5301488425208296e-06, |
|
"loss": 0.4902, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.008143690901687553, |
|
"grad_norm": 1.9264837503433228, |
|
"learning_rate": 1.5076844803522922e-06, |
|
"loss": 0.4289, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.008143690901687553, |
|
"eval_loss": 0.48586252331733704, |
|
"eval_runtime": 1438.3431, |
|
"eval_samples_per_second": 12.941, |
|
"eval_steps_per_second": 1.618, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.008415147265077139, |
|
"grad_norm": 2.084927797317505, |
|
"learning_rate": 7.426068431000882e-07, |
|
"loss": 0.5326, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.008686603628466724, |
|
"grad_norm": 1.7900018692016602, |
|
"learning_rate": 2.4329828146074095e-07, |
|
"loss": 0.4558, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.00895805999185631, |
|
"grad_norm": 1.933836817741394, |
|
"learning_rate": 1.522932452260595e-08, |
|
"loss": 0.5505, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.00895805999185631, |
|
"eval_loss": 0.4853900074958801, |
|
"eval_runtime": 1438.1766, |
|
"eval_samples_per_second": 12.942, |
|
"eval_steps_per_second": 1.618, |
|
"step": 99 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 9, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.44418812198912e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|