|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9959579628132579, |
|
"eval_steps": 500, |
|
"global_step": 154, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.019401778496362168, |
|
"grad_norm": 139.02731323242188, |
|
"learning_rate": 1.9610389610389612e-05, |
|
"loss": 1.8718, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.038803556992724336, |
|
"grad_norm": 31.607357025146484, |
|
"learning_rate": 1.9220779220779222e-05, |
|
"loss": 1.6642, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0582053354890865, |
|
"grad_norm": 118.545166015625, |
|
"learning_rate": 1.8831168831168833e-05, |
|
"loss": 1.5334, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.07760711398544867, |
|
"grad_norm": 180.7499542236328, |
|
"learning_rate": 1.8441558441558443e-05, |
|
"loss": 1.396, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.09700889248181083, |
|
"grad_norm": 199.1714324951172, |
|
"learning_rate": 1.8051948051948053e-05, |
|
"loss": 1.3652, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.116410670978173, |
|
"grad_norm": 136.60414123535156, |
|
"learning_rate": 1.7662337662337664e-05, |
|
"loss": 1.3432, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.13581244947453516, |
|
"grad_norm": 82.5650634765625, |
|
"learning_rate": 1.7272727272727274e-05, |
|
"loss": 1.2614, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.15521422797089734, |
|
"grad_norm": 18.96686553955078, |
|
"learning_rate": 1.6883116883116884e-05, |
|
"loss": 1.2303, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.1746160064672595, |
|
"grad_norm": 7.933801174163818, |
|
"learning_rate": 1.6493506493506495e-05, |
|
"loss": 1.1984, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.19401778496362165, |
|
"grad_norm": 2.686699390411377, |
|
"learning_rate": 1.6103896103896105e-05, |
|
"loss": 1.1016, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.21341956345998384, |
|
"grad_norm": 1.455581545829773, |
|
"learning_rate": 1.5714285714285715e-05, |
|
"loss": 1.0671, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.232821341956346, |
|
"grad_norm": 0.5924062132835388, |
|
"learning_rate": 1.5324675324675326e-05, |
|
"loss": 1.012, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.25222312045270817, |
|
"grad_norm": 0.3087107837200165, |
|
"learning_rate": 1.4935064935064936e-05, |
|
"loss": 0.9758, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.2716248989490703, |
|
"grad_norm": 0.2992459535598755, |
|
"learning_rate": 1.4545454545454546e-05, |
|
"loss": 0.9262, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.2910266774454325, |
|
"grad_norm": 0.2895904779434204, |
|
"learning_rate": 1.4155844155844157e-05, |
|
"loss": 0.8271, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.3104284559417947, |
|
"grad_norm": 0.2948096692562103, |
|
"learning_rate": 1.3766233766233767e-05, |
|
"loss": 0.7895, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.32983023443815684, |
|
"grad_norm": 0.31464704871177673, |
|
"learning_rate": 1.3376623376623377e-05, |
|
"loss": 0.7299, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.349232012934519, |
|
"grad_norm": 0.3038002550601959, |
|
"learning_rate": 1.2987012987012988e-05, |
|
"loss": 0.6857, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.36863379143088115, |
|
"grad_norm": 0.33729803562164307, |
|
"learning_rate": 1.25974025974026e-05, |
|
"loss": 0.5946, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.3880355699272433, |
|
"grad_norm": 0.39213827252388, |
|
"learning_rate": 1.2207792207792208e-05, |
|
"loss": 0.5636, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.4074373484236055, |
|
"grad_norm": 0.3482286334037781, |
|
"learning_rate": 1.181818181818182e-05, |
|
"loss": 0.5094, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.42683912691996767, |
|
"grad_norm": 0.3112964630126953, |
|
"learning_rate": 1.1428571428571429e-05, |
|
"loss": 0.4541, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.4462409054163298, |
|
"grad_norm": 0.26819908618927, |
|
"learning_rate": 1.1038961038961041e-05, |
|
"loss": 0.4181, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.465642683912692, |
|
"grad_norm": 0.28413137793540955, |
|
"learning_rate": 1.064935064935065e-05, |
|
"loss": 0.4095, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.4850444624090542, |
|
"grad_norm": 0.3022381365299225, |
|
"learning_rate": 1.025974025974026e-05, |
|
"loss": 0.3623, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.5044462409054163, |
|
"grad_norm": 0.29346349835395813, |
|
"learning_rate": 9.87012987012987e-06, |
|
"loss": 0.3334, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.5238480194017785, |
|
"grad_norm": 0.2659854292869568, |
|
"learning_rate": 9.48051948051948e-06, |
|
"loss": 0.3115, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.5432497978981407, |
|
"grad_norm": 0.23122940957546234, |
|
"learning_rate": 9.090909090909091e-06, |
|
"loss": 0.2817, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.5626515763945028, |
|
"grad_norm": 0.2369256317615509, |
|
"learning_rate": 8.701298701298701e-06, |
|
"loss": 0.2809, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.582053354890865, |
|
"grad_norm": 0.2082873433828354, |
|
"learning_rate": 8.311688311688313e-06, |
|
"loss": 0.2455, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.6014551333872271, |
|
"grad_norm": 0.21645894646644592, |
|
"learning_rate": 7.922077922077924e-06, |
|
"loss": 0.2503, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.6208569118835894, |
|
"grad_norm": 0.19337739050388336, |
|
"learning_rate": 7.532467532467533e-06, |
|
"loss": 0.2286, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.6402586903799515, |
|
"grad_norm": 0.1808944046497345, |
|
"learning_rate": 7.1428571428571436e-06, |
|
"loss": 0.2401, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.6596604688763137, |
|
"grad_norm": 0.1630856841802597, |
|
"learning_rate": 6.753246753246754e-06, |
|
"loss": 0.2251, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.6790622473726758, |
|
"grad_norm": 0.16326990723609924, |
|
"learning_rate": 6.363636363636364e-06, |
|
"loss": 0.2291, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.698464025869038, |
|
"grad_norm": 0.16061735153198242, |
|
"learning_rate": 5.9740259740259746e-06, |
|
"loss": 0.2331, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.7178658043654002, |
|
"grad_norm": 0.17352429032325745, |
|
"learning_rate": 5.584415584415585e-06, |
|
"loss": 0.2149, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.7372675828617623, |
|
"grad_norm": 0.17043530941009521, |
|
"learning_rate": 5.194805194805194e-06, |
|
"loss": 0.2187, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.7566693613581245, |
|
"grad_norm": 0.16479559242725372, |
|
"learning_rate": 4.805194805194806e-06, |
|
"loss": 0.2218, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.7760711398544866, |
|
"grad_norm": 0.17882439494132996, |
|
"learning_rate": 4.415584415584416e-06, |
|
"loss": 0.205, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.7954729183508489, |
|
"grad_norm": 0.1911778748035431, |
|
"learning_rate": 4.025974025974026e-06, |
|
"loss": 0.2172, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.814874696847211, |
|
"grad_norm": 0.17751498520374298, |
|
"learning_rate": 3.6363636363636366e-06, |
|
"loss": 0.2096, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.8342764753435732, |
|
"grad_norm": 0.1702156662940979, |
|
"learning_rate": 3.246753246753247e-06, |
|
"loss": 0.1911, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.8536782538399353, |
|
"grad_norm": 0.1764981597661972, |
|
"learning_rate": 2.8571428571428573e-06, |
|
"loss": 0.2103, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.8730800323362975, |
|
"grad_norm": 0.1592799872159958, |
|
"learning_rate": 2.4675324675324676e-06, |
|
"loss": 0.2053, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.8924818108326596, |
|
"grad_norm": 0.21512138843536377, |
|
"learning_rate": 2.0779220779220784e-06, |
|
"loss": 0.2197, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.9118835893290218, |
|
"grad_norm": 0.17707495391368866, |
|
"learning_rate": 1.6883116883116885e-06, |
|
"loss": 0.2051, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.931285367825384, |
|
"grad_norm": 0.1585138887166977, |
|
"learning_rate": 1.2987012987012986e-06, |
|
"loss": 0.1984, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.9506871463217461, |
|
"grad_norm": 0.15231232345104218, |
|
"learning_rate": 9.090909090909091e-07, |
|
"loss": 0.1774, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.9700889248181084, |
|
"grad_norm": 0.15338800847530365, |
|
"learning_rate": 5.194805194805196e-07, |
|
"loss": 0.2046, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.9894907033144705, |
|
"grad_norm": 0.16579587757587433, |
|
"learning_rate": 1.298701298701299e-07, |
|
"loss": 0.1871, |
|
"step": 153 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 154, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.5705942959542764e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|