{ "best_metric": 0.8791124713083397, "best_model_checkpoint": "distilbert-base-multilingual-cased-aoe-hyper/run-5/checkpoint-654", "epoch": 2.0, "eval_steps": 500, "global_step": 654, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03058103975535168, "grad_norm": 1.6833314895629883, "learning_rate": 1.1436264471337766e-05, "loss": 0.6513, "step": 10 }, { "epoch": 0.06116207951070336, "grad_norm": 1.33408784866333, "learning_rate": 1.1258682724888423e-05, "loss": 0.5141, "step": 20 }, { "epoch": 0.09174311926605505, "grad_norm": 2.2130067348480225, "learning_rate": 1.1081100978439079e-05, "loss": 0.4869, "step": 30 }, { "epoch": 0.12232415902140673, "grad_norm": 2.5281882286071777, "learning_rate": 1.0903519231989732e-05, "loss": 0.4489, "step": 40 }, { "epoch": 0.1529051987767584, "grad_norm": 1.898231029510498, "learning_rate": 1.072593748554039e-05, "loss": 0.4336, "step": 50 }, { "epoch": 0.1834862385321101, "grad_norm": 3.3090434074401855, "learning_rate": 1.0548355739091045e-05, "loss": 0.4298, "step": 60 }, { "epoch": 0.21406727828746178, "grad_norm": 2.3698971271514893, "learning_rate": 1.03707739926417e-05, "loss": 0.4439, "step": 70 }, { "epoch": 0.24464831804281345, "grad_norm": 4.222820281982422, "learning_rate": 1.0193192246192358e-05, "loss": 0.4223, "step": 80 }, { "epoch": 0.27522935779816515, "grad_norm": 1.8396034240722656, "learning_rate": 1.0015610499743013e-05, "loss": 0.3714, "step": 90 }, { "epoch": 0.3058103975535168, "grad_norm": 8.237236976623535, "learning_rate": 9.838028753293667e-06, "loss": 0.4295, "step": 100 }, { "epoch": 0.3363914373088685, "grad_norm": 5.705390453338623, "learning_rate": 9.660447006844324e-06, "loss": 0.3417, "step": 110 }, { "epoch": 0.3669724770642202, "grad_norm": 6.750524520874023, "learning_rate": 9.48286526039498e-06, "loss": 0.3364, "step": 120 }, { "epoch": 0.39755351681957185, "grad_norm": 5.750469207763672, "learning_rate": 9.305283513945636e-06, "loss": 0.2918, "step": 130 }, { "epoch": 0.42813455657492355, "grad_norm": 3.8485758304595947, "learning_rate": 9.127701767496293e-06, "loss": 0.3611, "step": 140 }, { "epoch": 0.45871559633027525, "grad_norm": 5.908198356628418, "learning_rate": 8.950120021046948e-06, "loss": 0.3353, "step": 150 }, { "epoch": 0.4892966360856269, "grad_norm": 7.6231184005737305, "learning_rate": 8.772538274597602e-06, "loss": 0.3719, "step": 160 }, { "epoch": 0.5198776758409785, "grad_norm": 6.425923824310303, "learning_rate": 8.59495652814826e-06, "loss": 0.3461, "step": 170 }, { "epoch": 0.5504587155963303, "grad_norm": 4.749773025512695, "learning_rate": 8.417374781698915e-06, "loss": 0.3298, "step": 180 }, { "epoch": 0.581039755351682, "grad_norm": 3.624875783920288, "learning_rate": 8.23979303524957e-06, "loss": 0.3178, "step": 190 }, { "epoch": 0.6116207951070336, "grad_norm": 2.93977952003479, "learning_rate": 8.062211288800228e-06, "loss": 0.337, "step": 200 }, { "epoch": 0.6422018348623854, "grad_norm": 9.312886238098145, "learning_rate": 7.884629542350883e-06, "loss": 0.3139, "step": 210 }, { "epoch": 0.672782874617737, "grad_norm": 4.528453350067139, "learning_rate": 7.707047795901539e-06, "loss": 0.2572, "step": 220 }, { "epoch": 0.7033639143730887, "grad_norm": 12.262564659118652, "learning_rate": 7.529466049452193e-06, "loss": 0.2924, "step": 230 }, { "epoch": 0.7339449541284404, "grad_norm": 4.399960517883301, "learning_rate": 7.35188430300285e-06, "loss": 0.285, "step": 240 }, { "epoch": 0.764525993883792, "grad_norm": 2.8317487239837646, "learning_rate": 7.174302556553505e-06, "loss": 0.2616, "step": 250 }, { "epoch": 0.7951070336391437, "grad_norm": 14.594043731689453, "learning_rate": 6.9967208101041615e-06, "loss": 0.3415, "step": 260 }, { "epoch": 0.8256880733944955, "grad_norm": 3.254194736480713, "learning_rate": 6.819139063654818e-06, "loss": 0.2571, "step": 270 }, { "epoch": 0.8562691131498471, "grad_norm": 3.7313687801361084, "learning_rate": 6.641557317205473e-06, "loss": 0.2529, "step": 280 }, { "epoch": 0.8868501529051988, "grad_norm": 5.240476608276367, "learning_rate": 6.463975570756128e-06, "loss": 0.3422, "step": 290 }, { "epoch": 0.9174311926605505, "grad_norm": 4.861540794372559, "learning_rate": 6.2863938243067844e-06, "loss": 0.3243, "step": 300 }, { "epoch": 0.9480122324159022, "grad_norm": 8.03921890258789, "learning_rate": 6.10881207785744e-06, "loss": 0.3244, "step": 310 }, { "epoch": 0.9785932721712538, "grad_norm": 3.031550645828247, "learning_rate": 5.931230331408096e-06, "loss": 0.2426, "step": 320 }, { "epoch": 1.0, "eval_accuracy": 0.8768171384850804, "eval_f1": 0.7480438184663537, "eval_loss": 0.29354020953178406, "eval_precision": 0.7308868501529052, "eval_recall": 0.7660256410256411, "eval_runtime": 4.9072, "eval_samples_per_second": 266.342, "eval_steps_per_second": 16.71, "step": 327 }, { "epoch": 1.0091743119266054, "grad_norm": 12.193892478942871, "learning_rate": 5.753648584958752e-06, "loss": 0.3332, "step": 330 }, { "epoch": 1.039755351681957, "grad_norm": 5.682682037353516, "learning_rate": 5.576066838509407e-06, "loss": 0.2385, "step": 340 }, { "epoch": 1.070336391437309, "grad_norm": 18.050596237182617, "learning_rate": 5.398485092060064e-06, "loss": 0.2123, "step": 350 }, { "epoch": 1.1009174311926606, "grad_norm": 5.985245704650879, "learning_rate": 5.220903345610719e-06, "loss": 0.1935, "step": 360 }, { "epoch": 1.1314984709480123, "grad_norm": 3.5328218936920166, "learning_rate": 5.043321599161375e-06, "loss": 0.268, "step": 370 }, { "epoch": 1.162079510703364, "grad_norm": 4.972208023071289, "learning_rate": 4.865739852712031e-06, "loss": 0.1793, "step": 380 }, { "epoch": 1.1926605504587156, "grad_norm": 6.088878154754639, "learning_rate": 4.688158106262687e-06, "loss": 0.1993, "step": 390 }, { "epoch": 1.2232415902140672, "grad_norm": 4.503061294555664, "learning_rate": 4.510576359813342e-06, "loss": 0.2092, "step": 400 }, { "epoch": 1.2538226299694188, "grad_norm": 4.567380428314209, "learning_rate": 4.3329946133639985e-06, "loss": 0.2834, "step": 410 }, { "epoch": 1.2844036697247707, "grad_norm": 6.611799716949463, "learning_rate": 4.155412866914654e-06, "loss": 0.2832, "step": 420 }, { "epoch": 1.3149847094801224, "grad_norm": 8.199454307556152, "learning_rate": 3.97783112046531e-06, "loss": 0.3619, "step": 430 }, { "epoch": 1.345565749235474, "grad_norm": 10.987822532653809, "learning_rate": 3.800249374015966e-06, "loss": 0.3282, "step": 440 }, { "epoch": 1.3761467889908257, "grad_norm": 9.002391815185547, "learning_rate": 3.622667627566622e-06, "loss": 0.1932, "step": 450 }, { "epoch": 1.4067278287461773, "grad_norm": 9.500117301940918, "learning_rate": 3.445085881117277e-06, "loss": 0.3071, "step": 460 }, { "epoch": 1.4373088685015292, "grad_norm": 3.1432082653045654, "learning_rate": 3.2675041346679334e-06, "loss": 0.2714, "step": 470 }, { "epoch": 1.4678899082568808, "grad_norm": 5.107339859008789, "learning_rate": 3.0899223882185893e-06, "loss": 0.2005, "step": 480 }, { "epoch": 1.4984709480122325, "grad_norm": 5.789766311645508, "learning_rate": 2.9123406417692444e-06, "loss": 0.2696, "step": 490 }, { "epoch": 1.529051987767584, "grad_norm": 6.676187515258789, "learning_rate": 2.7347588953199003e-06, "loss": 0.2581, "step": 500 }, { "epoch": 1.5596330275229358, "grad_norm": 9.344066619873047, "learning_rate": 2.5571771488705567e-06, "loss": 0.2441, "step": 510 }, { "epoch": 1.5902140672782874, "grad_norm": 5.086206912994385, "learning_rate": 2.3795954024212122e-06, "loss": 0.2883, "step": 520 }, { "epoch": 1.620795107033639, "grad_norm": 4.854761123657227, "learning_rate": 2.2020136559718677e-06, "loss": 0.3288, "step": 530 }, { "epoch": 1.6513761467889907, "grad_norm": 5.0419158935546875, "learning_rate": 2.024431909522524e-06, "loss": 0.1916, "step": 540 }, { "epoch": 1.6819571865443423, "grad_norm": 3.6190755367279053, "learning_rate": 1.8468501630731796e-06, "loss": 0.2405, "step": 550 }, { "epoch": 1.7125382262996942, "grad_norm": 2.2586376667022705, "learning_rate": 1.6692684166238354e-06, "loss": 0.2336, "step": 560 }, { "epoch": 1.7431192660550459, "grad_norm": 0.9094193577766418, "learning_rate": 1.4916866701744913e-06, "loss": 0.3062, "step": 570 }, { "epoch": 1.7737003058103975, "grad_norm": 5.636134147644043, "learning_rate": 1.314104923725147e-06, "loss": 0.2616, "step": 580 }, { "epoch": 1.8042813455657494, "grad_norm": 8.59340763092041, "learning_rate": 1.1365231772758028e-06, "loss": 0.2395, "step": 590 }, { "epoch": 1.834862385321101, "grad_norm": 6.957756042480469, "learning_rate": 9.589414308264587e-07, "loss": 0.2656, "step": 600 }, { "epoch": 1.8654434250764527, "grad_norm": 6.419522285461426, "learning_rate": 7.813596843771144e-07, "loss": 0.3393, "step": 610 }, { "epoch": 1.8960244648318043, "grad_norm": 2.6836931705474854, "learning_rate": 6.037779379277703e-07, "loss": 0.2229, "step": 620 }, { "epoch": 1.926605504587156, "grad_norm": 5.273196697235107, "learning_rate": 4.261961914784261e-07, "loss": 0.2025, "step": 630 }, { "epoch": 1.9571865443425076, "grad_norm": 5.824622631072998, "learning_rate": 2.4861444502908185e-07, "loss": 0.2995, "step": 640 }, { "epoch": 1.9877675840978593, "grad_norm": 18.051441192626953, "learning_rate": 7.103269857973767e-08, "loss": 0.303, "step": 650 }, { "epoch": 2.0, "eval_accuracy": 0.8791124713083397, "eval_f1": 0.7554179566563467, "eval_loss": 0.2987494468688965, "eval_precision": 0.7305389221556886, "eval_recall": 0.782051282051282, "eval_runtime": 4.9837, "eval_samples_per_second": 262.254, "eval_steps_per_second": 16.454, "step": 654 } ], "logging_steps": 10, "max_steps": 654, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1377131076427776.0, "train_batch_size": 16, "trial_name": null, "trial_params": { "learning_rate": 1.161384621778711e-05, "num_train_epochs": 2, "per_device_train_batch_size": 16, "seed": 28 } }