{ "best_metric": 3.141986846923828, "best_model_checkpoint": "contract1/checkpoint-1455", "epoch": 5.0, "eval_steps": 500, "global_step": 1455, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0859106529209622, "grad_norm": 32.766380310058594, "learning_rate": 6.849315068493151e-06, "loss": 7.5498, "step": 25 }, { "epoch": 0.1718213058419244, "grad_norm": 55.16777420043945, "learning_rate": 1.5068493150684931e-05, "loss": 7.7409, "step": 50 }, { "epoch": 0.25773195876288657, "grad_norm": 19.554790496826172, "learning_rate": 2.363013698630137e-05, "loss": 6.8118, "step": 75 }, { "epoch": 0.3436426116838488, "grad_norm": 11.124310493469238, "learning_rate": 3.219178082191781e-05, "loss": 5.94, "step": 100 }, { "epoch": 0.42955326460481097, "grad_norm": 9.250848770141602, "learning_rate": 4.075342465753425e-05, "loss": 5.2277, "step": 125 }, { "epoch": 0.5154639175257731, "grad_norm": 3.131469964981079, "learning_rate": 4.9315068493150684e-05, "loss": 4.6817, "step": 150 }, { "epoch": 0.6013745704467354, "grad_norm": 4.209794998168945, "learning_rate": 4.912146676852559e-05, "loss": 4.3776, "step": 175 }, { "epoch": 0.6872852233676976, "grad_norm": 3.0055902004241943, "learning_rate": 4.816653934300993e-05, "loss": 4.3974, "step": 200 }, { "epoch": 0.7731958762886598, "grad_norm": 2.923142433166504, "learning_rate": 4.7211611917494275e-05, "loss": 4.2811, "step": 225 }, { "epoch": 0.8591065292096219, "grad_norm": 3.110403060913086, "learning_rate": 4.625668449197861e-05, "loss": 4.1323, "step": 250 }, { "epoch": 0.9450171821305842, "grad_norm": 2.941375970840454, "learning_rate": 4.530175706646295e-05, "loss": 4.1902, "step": 275 }, { "epoch": 1.0, "eval_gen_len": 15.1115, "eval_loss": 3.686694622039795, "eval_rouge1": 17.9543, "eval_rouge2": 4.0352, "eval_rougeL": 16.3506, "eval_rougeLsum": 16.4818, "eval_runtime": 11.3626, "eval_samples_per_second": 25.61, "eval_steps_per_second": 3.256, "step": 291 }, { "epoch": 1.0309278350515463, "grad_norm": 3.0971412658691406, "learning_rate": 4.434682964094729e-05, "loss": 4.0055, "step": 300 }, { "epoch": 1.1168384879725086, "grad_norm": 3.2513363361358643, "learning_rate": 4.339190221543163e-05, "loss": 3.9592, "step": 325 }, { "epoch": 1.2027491408934707, "grad_norm": 4.395771026611328, "learning_rate": 4.2436974789915967e-05, "loss": 3.8709, "step": 350 }, { "epoch": 1.2886597938144329, "grad_norm": 2.7217512130737305, "learning_rate": 4.1482047364400305e-05, "loss": 3.7554, "step": 375 }, { "epoch": 1.3745704467353952, "grad_norm": 3.703568696975708, "learning_rate": 4.052711993888464e-05, "loss": 3.7343, "step": 400 }, { "epoch": 1.4604810996563573, "grad_norm": 2.7263598442077637, "learning_rate": 3.957219251336899e-05, "loss": 3.7497, "step": 425 }, { "epoch": 1.5463917525773194, "grad_norm": 2.4919683933258057, "learning_rate": 3.861726508785333e-05, "loss": 3.7073, "step": 450 }, { "epoch": 1.6323024054982818, "grad_norm": 2.5988521575927734, "learning_rate": 3.7662337662337665e-05, "loss": 3.6325, "step": 475 }, { "epoch": 1.718213058419244, "grad_norm": 3.717288017272949, "learning_rate": 3.6707410236822004e-05, "loss": 3.687, "step": 500 }, { "epoch": 1.8041237113402062, "grad_norm": 3.393786668777466, "learning_rate": 3.575248281130634e-05, "loss": 3.7349, "step": 525 }, { "epoch": 1.8900343642611683, "grad_norm": 2.5332796573638916, "learning_rate": 3.479755538579068e-05, "loss": 3.7308, "step": 550 }, { "epoch": 1.9759450171821307, "grad_norm": 3.4967894554138184, "learning_rate": 3.384262796027502e-05, "loss": 3.6033, "step": 575 }, { "epoch": 2.0, "eval_gen_len": 14.7061, "eval_loss": 3.3814778327941895, "eval_rouge1": 20.6781, "eval_rouge2": 5.109, "eval_rougeL": 17.5025, "eval_rougeLsum": 17.5956, "eval_runtime": 11.6963, "eval_samples_per_second": 24.88, "eval_steps_per_second": 3.163, "step": 582 }, { "epoch": 2.0618556701030926, "grad_norm": 3.7303099632263184, "learning_rate": 3.288770053475936e-05, "loss": 3.4857, "step": 600 }, { "epoch": 2.147766323024055, "grad_norm": 2.58085036277771, "learning_rate": 3.1932773109243696e-05, "loss": 3.7377, "step": 625 }, { "epoch": 2.2336769759450172, "grad_norm": 2.9038166999816895, "learning_rate": 3.097784568372804e-05, "loss": 3.4969, "step": 650 }, { "epoch": 2.319587628865979, "grad_norm": 1.8798184394836426, "learning_rate": 3.002291825821238e-05, "loss": 3.3667, "step": 675 }, { "epoch": 2.4054982817869415, "grad_norm": 2.5839955806732178, "learning_rate": 2.9067990832696718e-05, "loss": 3.5371, "step": 700 }, { "epoch": 2.491408934707904, "grad_norm": 14.803485870361328, "learning_rate": 2.8113063407181056e-05, "loss": 3.4758, "step": 725 }, { "epoch": 2.5773195876288657, "grad_norm": 2.901104688644409, "learning_rate": 2.7158135981665394e-05, "loss": 3.4274, "step": 750 }, { "epoch": 2.663230240549828, "grad_norm": 3.5598862171173096, "learning_rate": 2.6203208556149733e-05, "loss": 3.5939, "step": 775 }, { "epoch": 2.7491408934707904, "grad_norm": 2.656578540802002, "learning_rate": 2.524828113063407e-05, "loss": 3.5227, "step": 800 }, { "epoch": 2.8350515463917527, "grad_norm": 2.2073974609375, "learning_rate": 2.4293353705118413e-05, "loss": 3.5447, "step": 825 }, { "epoch": 2.9209621993127146, "grad_norm": 3.0660665035247803, "learning_rate": 2.333842627960275e-05, "loss": 3.4734, "step": 850 }, { "epoch": 3.0, "eval_gen_len": 16.5439, "eval_loss": 3.232574462890625, "eval_rouge1": 20.2411, "eval_rouge2": 5.2598, "eval_rougeL": 17.2676, "eval_rougeLsum": 17.4831, "eval_runtime": 12.7924, "eval_samples_per_second": 22.748, "eval_steps_per_second": 2.892, "step": 873 }, { "epoch": 3.006872852233677, "grad_norm": 2.2971296310424805, "learning_rate": 2.238349885408709e-05, "loss": 3.4626, "step": 875 }, { "epoch": 3.0927835051546393, "grad_norm": 5.520618438720703, "learning_rate": 2.1428571428571428e-05, "loss": 3.4557, "step": 900 }, { "epoch": 3.178694158075601, "grad_norm": 2.2981772422790527, "learning_rate": 2.047364400305577e-05, "loss": 3.2812, "step": 925 }, { "epoch": 3.2646048109965635, "grad_norm": 6.0153069496154785, "learning_rate": 1.951871657754011e-05, "loss": 3.4321, "step": 950 }, { "epoch": 3.350515463917526, "grad_norm": 2.2888569831848145, "learning_rate": 1.8563789152024447e-05, "loss": 3.392, "step": 975 }, { "epoch": 3.436426116838488, "grad_norm": 5.259116172790527, "learning_rate": 1.7608861726508785e-05, "loss": 3.4009, "step": 1000 }, { "epoch": 3.52233676975945, "grad_norm": 2.115800380706787, "learning_rate": 1.6653934300993127e-05, "loss": 3.3249, "step": 1025 }, { "epoch": 3.6082474226804124, "grad_norm": 2.3146419525146484, "learning_rate": 1.5699006875477465e-05, "loss": 3.2829, "step": 1050 }, { "epoch": 3.6941580756013748, "grad_norm": 2.9118130207061768, "learning_rate": 1.4744079449961804e-05, "loss": 3.4347, "step": 1075 }, { "epoch": 3.7800687285223367, "grad_norm": 2.7317888736724854, "learning_rate": 1.3789152024446142e-05, "loss": 3.2167, "step": 1100 }, { "epoch": 3.865979381443299, "grad_norm": 4.284421920776367, "learning_rate": 1.2834224598930484e-05, "loss": 3.431, "step": 1125 }, { "epoch": 3.9518900343642613, "grad_norm": 3.761094808578491, "learning_rate": 1.1879297173414822e-05, "loss": 3.4635, "step": 1150 }, { "epoch": 4.0, "eval_gen_len": 15.6284, "eval_loss": 3.164484739303589, "eval_rouge1": 20.158, "eval_rouge2": 4.9421, "eval_rougeL": 17.0338, "eval_rougeLsum": 17.2585, "eval_runtime": 11.6665, "eval_samples_per_second": 24.943, "eval_steps_per_second": 3.171, "step": 1164 }, { "epoch": 4.037800687285223, "grad_norm": 2.3253726959228516, "learning_rate": 1.092436974789916e-05, "loss": 3.3823, "step": 1175 }, { "epoch": 4.123711340206185, "grad_norm": 5.085910797119141, "learning_rate": 9.969442322383499e-06, "loss": 3.2498, "step": 1200 }, { "epoch": 4.209621993127148, "grad_norm": 2.912647008895874, "learning_rate": 9.014514896867839e-06, "loss": 3.3191, "step": 1225 }, { "epoch": 4.29553264604811, "grad_norm": 5.910384178161621, "learning_rate": 8.059587471352178e-06, "loss": 3.4222, "step": 1250 }, { "epoch": 4.381443298969073, "grad_norm": 10.643930435180664, "learning_rate": 7.104660045836517e-06, "loss": 3.4691, "step": 1275 }, { "epoch": 4.4673539518900345, "grad_norm": 2.9152700901031494, "learning_rate": 6.149732620320856e-06, "loss": 3.2257, "step": 1300 }, { "epoch": 4.553264604810996, "grad_norm": 2.8727643489837646, "learning_rate": 5.194805194805195e-06, "loss": 3.3841, "step": 1325 }, { "epoch": 4.639175257731958, "grad_norm": 8.290576934814453, "learning_rate": 4.239877769289534e-06, "loss": 3.1381, "step": 1350 }, { "epoch": 4.725085910652921, "grad_norm": 2.3321030139923096, "learning_rate": 3.2849503437738733e-06, "loss": 3.3243, "step": 1375 }, { "epoch": 4.810996563573883, "grad_norm": 3.101409912109375, "learning_rate": 2.3300229182582125e-06, "loss": 3.1536, "step": 1400 }, { "epoch": 4.896907216494846, "grad_norm": 4.4823174476623535, "learning_rate": 1.3750954927425516e-06, "loss": 3.3531, "step": 1425 }, { "epoch": 4.982817869415808, "grad_norm": 2.517242193222046, "learning_rate": 4.2016806722689076e-07, "loss": 3.4086, "step": 1450 }, { "epoch": 5.0, "eval_gen_len": 15.5, "eval_loss": 3.141986846923828, "eval_rouge1": 19.8864, "eval_rouge2": 4.9499, "eval_rougeL": 16.8946, "eval_rougeLsum": 17.1002, "eval_runtime": 12.3635, "eval_samples_per_second": 23.537, "eval_steps_per_second": 2.993, "step": 1455 } ], "logging_steps": 25, "max_steps": 1455, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9422115569664.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }