|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.0040718454508437765, |
|
"eval_steps": 9, |
|
"global_step": 45, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 9.048545446319505e-05, |
|
"eval_loss": 2.0614755153656006, |
|
"eval_runtime": 1431.9492, |
|
"eval_samples_per_second": 12.998, |
|
"eval_steps_per_second": 1.625, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00027145636338958513, |
|
"grad_norm": 3.0326344966888428, |
|
"learning_rate": 1.5e-05, |
|
"loss": 1.1455, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0005429127267791703, |
|
"grad_norm": 2.6346302032470703, |
|
"learning_rate": 3e-05, |
|
"loss": 1.2541, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0008143690901687554, |
|
"grad_norm": 2.24157977104187, |
|
"learning_rate": 4.5e-05, |
|
"loss": 1.1746, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0008143690901687554, |
|
"eval_loss": 1.376136302947998, |
|
"eval_runtime": 1439.157, |
|
"eval_samples_per_second": 12.933, |
|
"eval_steps_per_second": 1.617, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0010858254535583405, |
|
"grad_norm": 2.4520862102508545, |
|
"learning_rate": 4.993910125649561e-05, |
|
"loss": 0.9936, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0013572818169479257, |
|
"grad_norm": 2.1917343139648438, |
|
"learning_rate": 4.962019382530521e-05, |
|
"loss": 0.7332, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0016287381803375108, |
|
"grad_norm": 2.0286247730255127, |
|
"learning_rate": 4.9031542398457974e-05, |
|
"loss": 0.6707, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0016287381803375108, |
|
"eval_loss": 0.708565354347229, |
|
"eval_runtime": 1438.9184, |
|
"eval_samples_per_second": 12.935, |
|
"eval_steps_per_second": 1.617, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.001900194543727096, |
|
"grad_norm": 2.2299928665161133, |
|
"learning_rate": 4.817959636416969e-05, |
|
"loss": 0.5779, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.002171650907116681, |
|
"grad_norm": 3.2380049228668213, |
|
"learning_rate": 4.707368982147318e-05, |
|
"loss": 0.6919, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.002443107270506266, |
|
"grad_norm": 2.030574321746826, |
|
"learning_rate": 4.572593931387604e-05, |
|
"loss": 0.4923, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.002443107270506266, |
|
"eval_loss": 0.5967397689819336, |
|
"eval_runtime": 1439.4951, |
|
"eval_samples_per_second": 12.93, |
|
"eval_steps_per_second": 1.617, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0027145636338958513, |
|
"grad_norm": 3.188636541366577, |
|
"learning_rate": 4.415111107797445e-05, |
|
"loss": 0.4312, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0029860199972854364, |
|
"grad_norm": 2.3065080642700195, |
|
"learning_rate": 4.2366459261474933e-05, |
|
"loss": 0.6207, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0032574763606750216, |
|
"grad_norm": 1.9325222969055176, |
|
"learning_rate": 4.039153688314145e-05, |
|
"loss": 0.5588, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0032574763606750216, |
|
"eval_loss": 0.5455918908119202, |
|
"eval_runtime": 1439.2701, |
|
"eval_samples_per_second": 12.932, |
|
"eval_steps_per_second": 1.617, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0035289327240646067, |
|
"grad_norm": 2.0116512775421143, |
|
"learning_rate": 3.824798160583012e-05, |
|
"loss": 0.5004, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.003800389087454192, |
|
"grad_norm": 2.0821337699890137, |
|
"learning_rate": 3.5959278669726935e-05, |
|
"loss": 0.4352, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.0040718454508437765, |
|
"grad_norm": 1.7970950603485107, |
|
"learning_rate": 3.355050358314172e-05, |
|
"loss": 0.533, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0040718454508437765, |
|
"eval_loss": 0.5220184326171875, |
|
"eval_runtime": 1438.7352, |
|
"eval_samples_per_second": 12.937, |
|
"eval_steps_per_second": 1.617, |
|
"step": 45 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 9, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.49884654895104e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|