albertoq's picture
Upload folder using huggingface_hub
0ea9146 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.917808219178082,
"eval_steps": 9,
"global_step": 108,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.4931506849315068,
"grad_norm": 0.10725488513708115,
"learning_rate": 4e-05,
"loss": 1.419,
"step": 9
},
{
"epoch": 0.4931506849315068,
"eval_runtime": 28.5255,
"eval_samples_per_second": 2.209,
"eval_steps_per_second": 1.122,
"step": 9
},
{
"epoch": 0.9863013698630136,
"grad_norm": 0.15689760446548462,
"learning_rate": 4e-05,
"loss": 1.344,
"step": 18
},
{
"epoch": 0.9863013698630136,
"eval_runtime": 28.4389,
"eval_samples_per_second": 2.215,
"eval_steps_per_second": 1.125,
"step": 18
},
{
"epoch": 1.4794520547945205,
"grad_norm": 0.19748298823833466,
"learning_rate": 4e-05,
"loss": 1.3077,
"step": 27
},
{
"epoch": 1.4794520547945205,
"eval_runtime": 28.4449,
"eval_samples_per_second": 2.215,
"eval_steps_per_second": 1.125,
"step": 27
},
{
"epoch": 1.9726027397260273,
"grad_norm": 0.2421897053718567,
"learning_rate": 4e-05,
"loss": 1.1656,
"step": 36
},
{
"epoch": 1.9726027397260273,
"eval_runtime": 28.4768,
"eval_samples_per_second": 2.212,
"eval_steps_per_second": 1.124,
"step": 36
},
{
"epoch": 2.4657534246575343,
"grad_norm": 0.30667445063591003,
"learning_rate": 4e-05,
"loss": 1.0709,
"step": 45
},
{
"epoch": 2.4657534246575343,
"eval_runtime": 28.4429,
"eval_samples_per_second": 2.215,
"eval_steps_per_second": 1.125,
"step": 45
},
{
"epoch": 2.958904109589041,
"grad_norm": 0.43574613332748413,
"learning_rate": 4e-05,
"loss": 0.9364,
"step": 54
},
{
"epoch": 2.958904109589041,
"eval_runtime": 28.42,
"eval_samples_per_second": 2.217,
"eval_steps_per_second": 1.126,
"step": 54
},
{
"epoch": 3.452054794520548,
"grad_norm": 0.5465545058250427,
"learning_rate": 4e-05,
"loss": 0.7984,
"step": 63
},
{
"epoch": 3.452054794520548,
"eval_runtime": 28.395,
"eval_samples_per_second": 2.219,
"eval_steps_per_second": 1.127,
"step": 63
},
{
"epoch": 3.9452054794520546,
"grad_norm": 0.3935684561729431,
"learning_rate": 4e-05,
"loss": 0.6656,
"step": 72
},
{
"epoch": 3.9452054794520546,
"eval_runtime": 28.3262,
"eval_samples_per_second": 2.224,
"eval_steps_per_second": 1.13,
"step": 72
},
{
"epoch": 4.438356164383562,
"grad_norm": 0.2613831162452698,
"learning_rate": 4e-05,
"loss": 0.6008,
"step": 81
},
{
"epoch": 4.438356164383562,
"eval_runtime": 28.3252,
"eval_samples_per_second": 2.224,
"eval_steps_per_second": 1.13,
"step": 81
},
{
"epoch": 4.931506849315069,
"grad_norm": 0.3327048718929291,
"learning_rate": 4e-05,
"loss": 0.5703,
"step": 90
},
{
"epoch": 4.931506849315069,
"eval_runtime": 28.3611,
"eval_samples_per_second": 2.221,
"eval_steps_per_second": 1.128,
"step": 90
},
{
"epoch": 5.424657534246576,
"grad_norm": 0.26965251564979553,
"learning_rate": 4e-05,
"loss": 0.5243,
"step": 99
},
{
"epoch": 5.424657534246576,
"eval_runtime": 28.3252,
"eval_samples_per_second": 2.224,
"eval_steps_per_second": 1.13,
"step": 99
},
{
"epoch": 5.917808219178082,
"grad_norm": 0.2745685577392578,
"learning_rate": 4e-05,
"loss": 0.5232,
"step": 108
},
{
"epoch": 5.917808219178082,
"eval_runtime": 28.3761,
"eval_samples_per_second": 2.22,
"eval_steps_per_second": 1.128,
"step": 108
}
],
"logging_steps": 9,
"max_steps": 108,
"num_input_tokens_seen": 0,
"num_train_epochs": 6,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 8.763528609792e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}