|
{ |
|
"best_metric": 0.8791124713083397, |
|
"best_model_checkpoint": "distilbert-base-multilingual-cased-aoe-hyper/run-5/checkpoint-654", |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 654, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03058103975535168, |
|
"grad_norm": 1.6833314895629883, |
|
"learning_rate": 1.1436264471337766e-05, |
|
"loss": 0.6513, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06116207951070336, |
|
"grad_norm": 1.33408784866333, |
|
"learning_rate": 1.1258682724888423e-05, |
|
"loss": 0.5141, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.09174311926605505, |
|
"grad_norm": 2.2130067348480225, |
|
"learning_rate": 1.1081100978439079e-05, |
|
"loss": 0.4869, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.12232415902140673, |
|
"grad_norm": 2.5281882286071777, |
|
"learning_rate": 1.0903519231989732e-05, |
|
"loss": 0.4489, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1529051987767584, |
|
"grad_norm": 1.898231029510498, |
|
"learning_rate": 1.072593748554039e-05, |
|
"loss": 0.4336, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1834862385321101, |
|
"grad_norm": 3.3090434074401855, |
|
"learning_rate": 1.0548355739091045e-05, |
|
"loss": 0.4298, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.21406727828746178, |
|
"grad_norm": 2.3698971271514893, |
|
"learning_rate": 1.03707739926417e-05, |
|
"loss": 0.4439, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.24464831804281345, |
|
"grad_norm": 4.222820281982422, |
|
"learning_rate": 1.0193192246192358e-05, |
|
"loss": 0.4223, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.27522935779816515, |
|
"grad_norm": 1.8396034240722656, |
|
"learning_rate": 1.0015610499743013e-05, |
|
"loss": 0.3714, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.3058103975535168, |
|
"grad_norm": 8.237236976623535, |
|
"learning_rate": 9.838028753293667e-06, |
|
"loss": 0.4295, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3363914373088685, |
|
"grad_norm": 5.705390453338623, |
|
"learning_rate": 9.660447006844324e-06, |
|
"loss": 0.3417, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.3669724770642202, |
|
"grad_norm": 6.750524520874023, |
|
"learning_rate": 9.48286526039498e-06, |
|
"loss": 0.3364, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.39755351681957185, |
|
"grad_norm": 5.750469207763672, |
|
"learning_rate": 9.305283513945636e-06, |
|
"loss": 0.2918, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.42813455657492355, |
|
"grad_norm": 3.8485758304595947, |
|
"learning_rate": 9.127701767496293e-06, |
|
"loss": 0.3611, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.45871559633027525, |
|
"grad_norm": 5.908198356628418, |
|
"learning_rate": 8.950120021046948e-06, |
|
"loss": 0.3353, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.4892966360856269, |
|
"grad_norm": 7.6231184005737305, |
|
"learning_rate": 8.772538274597602e-06, |
|
"loss": 0.3719, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.5198776758409785, |
|
"grad_norm": 6.425923824310303, |
|
"learning_rate": 8.59495652814826e-06, |
|
"loss": 0.3461, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.5504587155963303, |
|
"grad_norm": 4.749773025512695, |
|
"learning_rate": 8.417374781698915e-06, |
|
"loss": 0.3298, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.581039755351682, |
|
"grad_norm": 3.624875783920288, |
|
"learning_rate": 8.23979303524957e-06, |
|
"loss": 0.3178, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.6116207951070336, |
|
"grad_norm": 2.93977952003479, |
|
"learning_rate": 8.062211288800228e-06, |
|
"loss": 0.337, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.6422018348623854, |
|
"grad_norm": 9.312886238098145, |
|
"learning_rate": 7.884629542350883e-06, |
|
"loss": 0.3139, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.672782874617737, |
|
"grad_norm": 4.528453350067139, |
|
"learning_rate": 7.707047795901539e-06, |
|
"loss": 0.2572, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.7033639143730887, |
|
"grad_norm": 12.262564659118652, |
|
"learning_rate": 7.529466049452193e-06, |
|
"loss": 0.2924, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.7339449541284404, |
|
"grad_norm": 4.399960517883301, |
|
"learning_rate": 7.35188430300285e-06, |
|
"loss": 0.285, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.764525993883792, |
|
"grad_norm": 2.8317487239837646, |
|
"learning_rate": 7.174302556553505e-06, |
|
"loss": 0.2616, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.7951070336391437, |
|
"grad_norm": 14.594043731689453, |
|
"learning_rate": 6.9967208101041615e-06, |
|
"loss": 0.3415, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.8256880733944955, |
|
"grad_norm": 3.254194736480713, |
|
"learning_rate": 6.819139063654818e-06, |
|
"loss": 0.2571, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.8562691131498471, |
|
"grad_norm": 3.7313687801361084, |
|
"learning_rate": 6.641557317205473e-06, |
|
"loss": 0.2529, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.8868501529051988, |
|
"grad_norm": 5.240476608276367, |
|
"learning_rate": 6.463975570756128e-06, |
|
"loss": 0.3422, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.9174311926605505, |
|
"grad_norm": 4.861540794372559, |
|
"learning_rate": 6.2863938243067844e-06, |
|
"loss": 0.3243, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.9480122324159022, |
|
"grad_norm": 8.03921890258789, |
|
"learning_rate": 6.10881207785744e-06, |
|
"loss": 0.3244, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.9785932721712538, |
|
"grad_norm": 3.031550645828247, |
|
"learning_rate": 5.931230331408096e-06, |
|
"loss": 0.2426, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8768171384850804, |
|
"eval_f1": 0.7480438184663537, |
|
"eval_loss": 0.29354020953178406, |
|
"eval_precision": 0.7308868501529052, |
|
"eval_recall": 0.7660256410256411, |
|
"eval_runtime": 4.9072, |
|
"eval_samples_per_second": 266.342, |
|
"eval_steps_per_second": 16.71, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 1.0091743119266054, |
|
"grad_norm": 12.193892478942871, |
|
"learning_rate": 5.753648584958752e-06, |
|
"loss": 0.3332, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.039755351681957, |
|
"grad_norm": 5.682682037353516, |
|
"learning_rate": 5.576066838509407e-06, |
|
"loss": 0.2385, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.070336391437309, |
|
"grad_norm": 18.050596237182617, |
|
"learning_rate": 5.398485092060064e-06, |
|
"loss": 0.2123, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.1009174311926606, |
|
"grad_norm": 5.985245704650879, |
|
"learning_rate": 5.220903345610719e-06, |
|
"loss": 0.1935, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.1314984709480123, |
|
"grad_norm": 3.5328218936920166, |
|
"learning_rate": 5.043321599161375e-06, |
|
"loss": 0.268, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.162079510703364, |
|
"grad_norm": 4.972208023071289, |
|
"learning_rate": 4.865739852712031e-06, |
|
"loss": 0.1793, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.1926605504587156, |
|
"grad_norm": 6.088878154754639, |
|
"learning_rate": 4.688158106262687e-06, |
|
"loss": 0.1993, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.2232415902140672, |
|
"grad_norm": 4.503061294555664, |
|
"learning_rate": 4.510576359813342e-06, |
|
"loss": 0.2092, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.2538226299694188, |
|
"grad_norm": 4.567380428314209, |
|
"learning_rate": 4.3329946133639985e-06, |
|
"loss": 0.2834, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.2844036697247707, |
|
"grad_norm": 6.611799716949463, |
|
"learning_rate": 4.155412866914654e-06, |
|
"loss": 0.2832, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.3149847094801224, |
|
"grad_norm": 8.199454307556152, |
|
"learning_rate": 3.97783112046531e-06, |
|
"loss": 0.3619, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.345565749235474, |
|
"grad_norm": 10.987822532653809, |
|
"learning_rate": 3.800249374015966e-06, |
|
"loss": 0.3282, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.3761467889908257, |
|
"grad_norm": 9.002391815185547, |
|
"learning_rate": 3.622667627566622e-06, |
|
"loss": 0.1932, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.4067278287461773, |
|
"grad_norm": 9.500117301940918, |
|
"learning_rate": 3.445085881117277e-06, |
|
"loss": 0.3071, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.4373088685015292, |
|
"grad_norm": 3.1432082653045654, |
|
"learning_rate": 3.2675041346679334e-06, |
|
"loss": 0.2714, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.4678899082568808, |
|
"grad_norm": 5.107339859008789, |
|
"learning_rate": 3.0899223882185893e-06, |
|
"loss": 0.2005, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.4984709480122325, |
|
"grad_norm": 5.789766311645508, |
|
"learning_rate": 2.9123406417692444e-06, |
|
"loss": 0.2696, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.529051987767584, |
|
"grad_norm": 6.676187515258789, |
|
"learning_rate": 2.7347588953199003e-06, |
|
"loss": 0.2581, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.5596330275229358, |
|
"grad_norm": 9.344066619873047, |
|
"learning_rate": 2.5571771488705567e-06, |
|
"loss": 0.2441, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.5902140672782874, |
|
"grad_norm": 5.086206912994385, |
|
"learning_rate": 2.3795954024212122e-06, |
|
"loss": 0.2883, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.620795107033639, |
|
"grad_norm": 4.854761123657227, |
|
"learning_rate": 2.2020136559718677e-06, |
|
"loss": 0.3288, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.6513761467889907, |
|
"grad_norm": 5.0419158935546875, |
|
"learning_rate": 2.024431909522524e-06, |
|
"loss": 0.1916, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.6819571865443423, |
|
"grad_norm": 3.6190755367279053, |
|
"learning_rate": 1.8468501630731796e-06, |
|
"loss": 0.2405, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.7125382262996942, |
|
"grad_norm": 2.2586376667022705, |
|
"learning_rate": 1.6692684166238354e-06, |
|
"loss": 0.2336, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.7431192660550459, |
|
"grad_norm": 0.9094193577766418, |
|
"learning_rate": 1.4916866701744913e-06, |
|
"loss": 0.3062, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.7737003058103975, |
|
"grad_norm": 5.636134147644043, |
|
"learning_rate": 1.314104923725147e-06, |
|
"loss": 0.2616, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.8042813455657494, |
|
"grad_norm": 8.59340763092041, |
|
"learning_rate": 1.1365231772758028e-06, |
|
"loss": 0.2395, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.834862385321101, |
|
"grad_norm": 6.957756042480469, |
|
"learning_rate": 9.589414308264587e-07, |
|
"loss": 0.2656, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.8654434250764527, |
|
"grad_norm": 6.419522285461426, |
|
"learning_rate": 7.813596843771144e-07, |
|
"loss": 0.3393, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.8960244648318043, |
|
"grad_norm": 2.6836931705474854, |
|
"learning_rate": 6.037779379277703e-07, |
|
"loss": 0.2229, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.926605504587156, |
|
"grad_norm": 5.273196697235107, |
|
"learning_rate": 4.261961914784261e-07, |
|
"loss": 0.2025, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.9571865443425076, |
|
"grad_norm": 5.824622631072998, |
|
"learning_rate": 2.4861444502908185e-07, |
|
"loss": 0.2995, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.9877675840978593, |
|
"grad_norm": 18.051441192626953, |
|
"learning_rate": 7.103269857973767e-08, |
|
"loss": 0.303, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8791124713083397, |
|
"eval_f1": 0.7554179566563467, |
|
"eval_loss": 0.2987494468688965, |
|
"eval_precision": 0.7305389221556886, |
|
"eval_recall": 0.782051282051282, |
|
"eval_runtime": 4.9837, |
|
"eval_samples_per_second": 262.254, |
|
"eval_steps_per_second": 16.454, |
|
"step": 654 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 654, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1377131076427776.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": { |
|
"learning_rate": 1.161384621778711e-05, |
|
"num_train_epochs": 2, |
|
"per_device_train_batch_size": 16, |
|
"seed": 28 |
|
} |
|
} |
|
|