jonbesga's picture
Upload folder using huggingface_hub
1f7eddd verified
raw
history blame
9.58 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9959579628132579,
"eval_steps": 500,
"global_step": 154,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.019401778496362168,
"grad_norm": 139.02731323242188,
"learning_rate": 1.9610389610389612e-05,
"loss": 1.8718,
"step": 3
},
{
"epoch": 0.038803556992724336,
"grad_norm": 31.607357025146484,
"learning_rate": 1.9220779220779222e-05,
"loss": 1.6642,
"step": 6
},
{
"epoch": 0.0582053354890865,
"grad_norm": 118.545166015625,
"learning_rate": 1.8831168831168833e-05,
"loss": 1.5334,
"step": 9
},
{
"epoch": 0.07760711398544867,
"grad_norm": 180.7499542236328,
"learning_rate": 1.8441558441558443e-05,
"loss": 1.396,
"step": 12
},
{
"epoch": 0.09700889248181083,
"grad_norm": 199.1714324951172,
"learning_rate": 1.8051948051948053e-05,
"loss": 1.3652,
"step": 15
},
{
"epoch": 0.116410670978173,
"grad_norm": 136.60414123535156,
"learning_rate": 1.7662337662337664e-05,
"loss": 1.3432,
"step": 18
},
{
"epoch": 0.13581244947453516,
"grad_norm": 82.5650634765625,
"learning_rate": 1.7272727272727274e-05,
"loss": 1.2614,
"step": 21
},
{
"epoch": 0.15521422797089734,
"grad_norm": 18.96686553955078,
"learning_rate": 1.6883116883116884e-05,
"loss": 1.2303,
"step": 24
},
{
"epoch": 0.1746160064672595,
"grad_norm": 7.933801174163818,
"learning_rate": 1.6493506493506495e-05,
"loss": 1.1984,
"step": 27
},
{
"epoch": 0.19401778496362165,
"grad_norm": 2.686699390411377,
"learning_rate": 1.6103896103896105e-05,
"loss": 1.1016,
"step": 30
},
{
"epoch": 0.21341956345998384,
"grad_norm": 1.455581545829773,
"learning_rate": 1.5714285714285715e-05,
"loss": 1.0671,
"step": 33
},
{
"epoch": 0.232821341956346,
"grad_norm": 0.5924062132835388,
"learning_rate": 1.5324675324675326e-05,
"loss": 1.012,
"step": 36
},
{
"epoch": 0.25222312045270817,
"grad_norm": 0.3087107837200165,
"learning_rate": 1.4935064935064936e-05,
"loss": 0.9758,
"step": 39
},
{
"epoch": 0.2716248989490703,
"grad_norm": 0.2992459535598755,
"learning_rate": 1.4545454545454546e-05,
"loss": 0.9262,
"step": 42
},
{
"epoch": 0.2910266774454325,
"grad_norm": 0.2895904779434204,
"learning_rate": 1.4155844155844157e-05,
"loss": 0.8271,
"step": 45
},
{
"epoch": 0.3104284559417947,
"grad_norm": 0.2948096692562103,
"learning_rate": 1.3766233766233767e-05,
"loss": 0.7895,
"step": 48
},
{
"epoch": 0.32983023443815684,
"grad_norm": 0.31464704871177673,
"learning_rate": 1.3376623376623377e-05,
"loss": 0.7299,
"step": 51
},
{
"epoch": 0.349232012934519,
"grad_norm": 0.3038002550601959,
"learning_rate": 1.2987012987012988e-05,
"loss": 0.6857,
"step": 54
},
{
"epoch": 0.36863379143088115,
"grad_norm": 0.33729803562164307,
"learning_rate": 1.25974025974026e-05,
"loss": 0.5946,
"step": 57
},
{
"epoch": 0.3880355699272433,
"grad_norm": 0.39213827252388,
"learning_rate": 1.2207792207792208e-05,
"loss": 0.5636,
"step": 60
},
{
"epoch": 0.4074373484236055,
"grad_norm": 0.3482286334037781,
"learning_rate": 1.181818181818182e-05,
"loss": 0.5094,
"step": 63
},
{
"epoch": 0.42683912691996767,
"grad_norm": 0.3112964630126953,
"learning_rate": 1.1428571428571429e-05,
"loss": 0.4541,
"step": 66
},
{
"epoch": 0.4462409054163298,
"grad_norm": 0.26819908618927,
"learning_rate": 1.1038961038961041e-05,
"loss": 0.4181,
"step": 69
},
{
"epoch": 0.465642683912692,
"grad_norm": 0.28413137793540955,
"learning_rate": 1.064935064935065e-05,
"loss": 0.4095,
"step": 72
},
{
"epoch": 0.4850444624090542,
"grad_norm": 0.3022381365299225,
"learning_rate": 1.025974025974026e-05,
"loss": 0.3623,
"step": 75
},
{
"epoch": 0.5044462409054163,
"grad_norm": 0.29346349835395813,
"learning_rate": 9.87012987012987e-06,
"loss": 0.3334,
"step": 78
},
{
"epoch": 0.5238480194017785,
"grad_norm": 0.2659854292869568,
"learning_rate": 9.48051948051948e-06,
"loss": 0.3115,
"step": 81
},
{
"epoch": 0.5432497978981407,
"grad_norm": 0.23122940957546234,
"learning_rate": 9.090909090909091e-06,
"loss": 0.2817,
"step": 84
},
{
"epoch": 0.5626515763945028,
"grad_norm": 0.2369256317615509,
"learning_rate": 8.701298701298701e-06,
"loss": 0.2809,
"step": 87
},
{
"epoch": 0.582053354890865,
"grad_norm": 0.2082873433828354,
"learning_rate": 8.311688311688313e-06,
"loss": 0.2455,
"step": 90
},
{
"epoch": 0.6014551333872271,
"grad_norm": 0.21645894646644592,
"learning_rate": 7.922077922077924e-06,
"loss": 0.2503,
"step": 93
},
{
"epoch": 0.6208569118835894,
"grad_norm": 0.19337739050388336,
"learning_rate": 7.532467532467533e-06,
"loss": 0.2286,
"step": 96
},
{
"epoch": 0.6402586903799515,
"grad_norm": 0.1808944046497345,
"learning_rate": 7.1428571428571436e-06,
"loss": 0.2401,
"step": 99
},
{
"epoch": 0.6596604688763137,
"grad_norm": 0.1630856841802597,
"learning_rate": 6.753246753246754e-06,
"loss": 0.2251,
"step": 102
},
{
"epoch": 0.6790622473726758,
"grad_norm": 0.16326990723609924,
"learning_rate": 6.363636363636364e-06,
"loss": 0.2291,
"step": 105
},
{
"epoch": 0.698464025869038,
"grad_norm": 0.16061735153198242,
"learning_rate": 5.9740259740259746e-06,
"loss": 0.2331,
"step": 108
},
{
"epoch": 0.7178658043654002,
"grad_norm": 0.17352429032325745,
"learning_rate": 5.584415584415585e-06,
"loss": 0.2149,
"step": 111
},
{
"epoch": 0.7372675828617623,
"grad_norm": 0.17043530941009521,
"learning_rate": 5.194805194805194e-06,
"loss": 0.2187,
"step": 114
},
{
"epoch": 0.7566693613581245,
"grad_norm": 0.16479559242725372,
"learning_rate": 4.805194805194806e-06,
"loss": 0.2218,
"step": 117
},
{
"epoch": 0.7760711398544866,
"grad_norm": 0.17882439494132996,
"learning_rate": 4.415584415584416e-06,
"loss": 0.205,
"step": 120
},
{
"epoch": 0.7954729183508489,
"grad_norm": 0.1911778748035431,
"learning_rate": 4.025974025974026e-06,
"loss": 0.2172,
"step": 123
},
{
"epoch": 0.814874696847211,
"grad_norm": 0.17751498520374298,
"learning_rate": 3.6363636363636366e-06,
"loss": 0.2096,
"step": 126
},
{
"epoch": 0.8342764753435732,
"grad_norm": 0.1702156662940979,
"learning_rate": 3.246753246753247e-06,
"loss": 0.1911,
"step": 129
},
{
"epoch": 0.8536782538399353,
"grad_norm": 0.1764981597661972,
"learning_rate": 2.8571428571428573e-06,
"loss": 0.2103,
"step": 132
},
{
"epoch": 0.8730800323362975,
"grad_norm": 0.1592799872159958,
"learning_rate": 2.4675324675324676e-06,
"loss": 0.2053,
"step": 135
},
{
"epoch": 0.8924818108326596,
"grad_norm": 0.21512138843536377,
"learning_rate": 2.0779220779220784e-06,
"loss": 0.2197,
"step": 138
},
{
"epoch": 0.9118835893290218,
"grad_norm": 0.17707495391368866,
"learning_rate": 1.6883116883116885e-06,
"loss": 0.2051,
"step": 141
},
{
"epoch": 0.931285367825384,
"grad_norm": 0.1585138887166977,
"learning_rate": 1.2987012987012986e-06,
"loss": 0.1984,
"step": 144
},
{
"epoch": 0.9506871463217461,
"grad_norm": 0.15231232345104218,
"learning_rate": 9.090909090909091e-07,
"loss": 0.1774,
"step": 147
},
{
"epoch": 0.9700889248181084,
"grad_norm": 0.15338800847530365,
"learning_rate": 5.194805194805196e-07,
"loss": 0.2046,
"step": 150
},
{
"epoch": 0.9894907033144705,
"grad_norm": 0.16579587757587433,
"learning_rate": 1.298701298701299e-07,
"loss": 0.1871,
"step": 153
}
],
"logging_steps": 3,
"max_steps": 154,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 10,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.5705942959542764e+18,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}