|
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 5.917808219178082,
|
|
"eval_steps": 9,
|
|
"global_step": 108,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.4931506849315068,
|
|
"grad_norm": 0.10725488513708115,
|
|
"learning_rate": 4e-05,
|
|
"loss": 1.419,
|
|
"step": 9
|
|
},
|
|
{
|
|
"epoch": 0.4931506849315068,
|
|
"eval_runtime": 28.5255,
|
|
"eval_samples_per_second": 2.209,
|
|
"eval_steps_per_second": 1.122,
|
|
"step": 9
|
|
},
|
|
{
|
|
"epoch": 0.9863013698630136,
|
|
"grad_norm": 0.15689760446548462,
|
|
"learning_rate": 4e-05,
|
|
"loss": 1.344,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 0.9863013698630136,
|
|
"eval_runtime": 28.4389,
|
|
"eval_samples_per_second": 2.215,
|
|
"eval_steps_per_second": 1.125,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 1.4794520547945205,
|
|
"grad_norm": 0.19748298823833466,
|
|
"learning_rate": 4e-05,
|
|
"loss": 1.3077,
|
|
"step": 27
|
|
},
|
|
{
|
|
"epoch": 1.4794520547945205,
|
|
"eval_runtime": 28.4449,
|
|
"eval_samples_per_second": 2.215,
|
|
"eval_steps_per_second": 1.125,
|
|
"step": 27
|
|
},
|
|
{
|
|
"epoch": 1.9726027397260273,
|
|
"grad_norm": 0.2421897053718567,
|
|
"learning_rate": 4e-05,
|
|
"loss": 1.1656,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 1.9726027397260273,
|
|
"eval_runtime": 28.4768,
|
|
"eval_samples_per_second": 2.212,
|
|
"eval_steps_per_second": 1.124,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 2.4657534246575343,
|
|
"grad_norm": 0.30667445063591003,
|
|
"learning_rate": 4e-05,
|
|
"loss": 1.0709,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 2.4657534246575343,
|
|
"eval_runtime": 28.4429,
|
|
"eval_samples_per_second": 2.215,
|
|
"eval_steps_per_second": 1.125,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 2.958904109589041,
|
|
"grad_norm": 0.43574613332748413,
|
|
"learning_rate": 4e-05,
|
|
"loss": 0.9364,
|
|
"step": 54
|
|
},
|
|
{
|
|
"epoch": 2.958904109589041,
|
|
"eval_runtime": 28.42,
|
|
"eval_samples_per_second": 2.217,
|
|
"eval_steps_per_second": 1.126,
|
|
"step": 54
|
|
},
|
|
{
|
|
"epoch": 3.452054794520548,
|
|
"grad_norm": 0.5465545058250427,
|
|
"learning_rate": 4e-05,
|
|
"loss": 0.7984,
|
|
"step": 63
|
|
},
|
|
{
|
|
"epoch": 3.452054794520548,
|
|
"eval_runtime": 28.395,
|
|
"eval_samples_per_second": 2.219,
|
|
"eval_steps_per_second": 1.127,
|
|
"step": 63
|
|
},
|
|
{
|
|
"epoch": 3.9452054794520546,
|
|
"grad_norm": 0.3935684561729431,
|
|
"learning_rate": 4e-05,
|
|
"loss": 0.6656,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 3.9452054794520546,
|
|
"eval_runtime": 28.3262,
|
|
"eval_samples_per_second": 2.224,
|
|
"eval_steps_per_second": 1.13,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 4.438356164383562,
|
|
"grad_norm": 0.2613831162452698,
|
|
"learning_rate": 4e-05,
|
|
"loss": 0.6008,
|
|
"step": 81
|
|
},
|
|
{
|
|
"epoch": 4.438356164383562,
|
|
"eval_runtime": 28.3252,
|
|
"eval_samples_per_second": 2.224,
|
|
"eval_steps_per_second": 1.13,
|
|
"step": 81
|
|
},
|
|
{
|
|
"epoch": 4.931506849315069,
|
|
"grad_norm": 0.3327048718929291,
|
|
"learning_rate": 4e-05,
|
|
"loss": 0.5703,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 4.931506849315069,
|
|
"eval_runtime": 28.3611,
|
|
"eval_samples_per_second": 2.221,
|
|
"eval_steps_per_second": 1.128,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 5.424657534246576,
|
|
"grad_norm": 0.26965251564979553,
|
|
"learning_rate": 4e-05,
|
|
"loss": 0.5243,
|
|
"step": 99
|
|
},
|
|
{
|
|
"epoch": 5.424657534246576,
|
|
"eval_runtime": 28.3252,
|
|
"eval_samples_per_second": 2.224,
|
|
"eval_steps_per_second": 1.13,
|
|
"step": 99
|
|
},
|
|
{
|
|
"epoch": 5.917808219178082,
|
|
"grad_norm": 0.2745685577392578,
|
|
"learning_rate": 4e-05,
|
|
"loss": 0.5232,
|
|
"step": 108
|
|
},
|
|
{
|
|
"epoch": 5.917808219178082,
|
|
"eval_runtime": 28.3761,
|
|
"eval_samples_per_second": 2.22,
|
|
"eval_steps_per_second": 1.128,
|
|
"step": 108
|
|
}
|
|
],
|
|
"logging_steps": 9,
|
|
"max_steps": 108,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 6,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 8.763528609792e+16,
|
|
"train_batch_size": 2,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|
|
|