cvapict's picture
End of training
9edb9ff verified
{
"best_metric": 0.8791124713083397,
"best_model_checkpoint": "distilbert-base-multilingual-cased-aoe-hyper/run-5/checkpoint-654",
"epoch": 2.0,
"eval_steps": 500,
"global_step": 654,
"is_hyper_param_search": true,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03058103975535168,
"grad_norm": 1.6833314895629883,
"learning_rate": 1.1436264471337766e-05,
"loss": 0.6513,
"step": 10
},
{
"epoch": 0.06116207951070336,
"grad_norm": 1.33408784866333,
"learning_rate": 1.1258682724888423e-05,
"loss": 0.5141,
"step": 20
},
{
"epoch": 0.09174311926605505,
"grad_norm": 2.2130067348480225,
"learning_rate": 1.1081100978439079e-05,
"loss": 0.4869,
"step": 30
},
{
"epoch": 0.12232415902140673,
"grad_norm": 2.5281882286071777,
"learning_rate": 1.0903519231989732e-05,
"loss": 0.4489,
"step": 40
},
{
"epoch": 0.1529051987767584,
"grad_norm": 1.898231029510498,
"learning_rate": 1.072593748554039e-05,
"loss": 0.4336,
"step": 50
},
{
"epoch": 0.1834862385321101,
"grad_norm": 3.3090434074401855,
"learning_rate": 1.0548355739091045e-05,
"loss": 0.4298,
"step": 60
},
{
"epoch": 0.21406727828746178,
"grad_norm": 2.3698971271514893,
"learning_rate": 1.03707739926417e-05,
"loss": 0.4439,
"step": 70
},
{
"epoch": 0.24464831804281345,
"grad_norm": 4.222820281982422,
"learning_rate": 1.0193192246192358e-05,
"loss": 0.4223,
"step": 80
},
{
"epoch": 0.27522935779816515,
"grad_norm": 1.8396034240722656,
"learning_rate": 1.0015610499743013e-05,
"loss": 0.3714,
"step": 90
},
{
"epoch": 0.3058103975535168,
"grad_norm": 8.237236976623535,
"learning_rate": 9.838028753293667e-06,
"loss": 0.4295,
"step": 100
},
{
"epoch": 0.3363914373088685,
"grad_norm": 5.705390453338623,
"learning_rate": 9.660447006844324e-06,
"loss": 0.3417,
"step": 110
},
{
"epoch": 0.3669724770642202,
"grad_norm": 6.750524520874023,
"learning_rate": 9.48286526039498e-06,
"loss": 0.3364,
"step": 120
},
{
"epoch": 0.39755351681957185,
"grad_norm": 5.750469207763672,
"learning_rate": 9.305283513945636e-06,
"loss": 0.2918,
"step": 130
},
{
"epoch": 0.42813455657492355,
"grad_norm": 3.8485758304595947,
"learning_rate": 9.127701767496293e-06,
"loss": 0.3611,
"step": 140
},
{
"epoch": 0.45871559633027525,
"grad_norm": 5.908198356628418,
"learning_rate": 8.950120021046948e-06,
"loss": 0.3353,
"step": 150
},
{
"epoch": 0.4892966360856269,
"grad_norm": 7.6231184005737305,
"learning_rate": 8.772538274597602e-06,
"loss": 0.3719,
"step": 160
},
{
"epoch": 0.5198776758409785,
"grad_norm": 6.425923824310303,
"learning_rate": 8.59495652814826e-06,
"loss": 0.3461,
"step": 170
},
{
"epoch": 0.5504587155963303,
"grad_norm": 4.749773025512695,
"learning_rate": 8.417374781698915e-06,
"loss": 0.3298,
"step": 180
},
{
"epoch": 0.581039755351682,
"grad_norm": 3.624875783920288,
"learning_rate": 8.23979303524957e-06,
"loss": 0.3178,
"step": 190
},
{
"epoch": 0.6116207951070336,
"grad_norm": 2.93977952003479,
"learning_rate": 8.062211288800228e-06,
"loss": 0.337,
"step": 200
},
{
"epoch": 0.6422018348623854,
"grad_norm": 9.312886238098145,
"learning_rate": 7.884629542350883e-06,
"loss": 0.3139,
"step": 210
},
{
"epoch": 0.672782874617737,
"grad_norm": 4.528453350067139,
"learning_rate": 7.707047795901539e-06,
"loss": 0.2572,
"step": 220
},
{
"epoch": 0.7033639143730887,
"grad_norm": 12.262564659118652,
"learning_rate": 7.529466049452193e-06,
"loss": 0.2924,
"step": 230
},
{
"epoch": 0.7339449541284404,
"grad_norm": 4.399960517883301,
"learning_rate": 7.35188430300285e-06,
"loss": 0.285,
"step": 240
},
{
"epoch": 0.764525993883792,
"grad_norm": 2.8317487239837646,
"learning_rate": 7.174302556553505e-06,
"loss": 0.2616,
"step": 250
},
{
"epoch": 0.7951070336391437,
"grad_norm": 14.594043731689453,
"learning_rate": 6.9967208101041615e-06,
"loss": 0.3415,
"step": 260
},
{
"epoch": 0.8256880733944955,
"grad_norm": 3.254194736480713,
"learning_rate": 6.819139063654818e-06,
"loss": 0.2571,
"step": 270
},
{
"epoch": 0.8562691131498471,
"grad_norm": 3.7313687801361084,
"learning_rate": 6.641557317205473e-06,
"loss": 0.2529,
"step": 280
},
{
"epoch": 0.8868501529051988,
"grad_norm": 5.240476608276367,
"learning_rate": 6.463975570756128e-06,
"loss": 0.3422,
"step": 290
},
{
"epoch": 0.9174311926605505,
"grad_norm": 4.861540794372559,
"learning_rate": 6.2863938243067844e-06,
"loss": 0.3243,
"step": 300
},
{
"epoch": 0.9480122324159022,
"grad_norm": 8.03921890258789,
"learning_rate": 6.10881207785744e-06,
"loss": 0.3244,
"step": 310
},
{
"epoch": 0.9785932721712538,
"grad_norm": 3.031550645828247,
"learning_rate": 5.931230331408096e-06,
"loss": 0.2426,
"step": 320
},
{
"epoch": 1.0,
"eval_accuracy": 0.8768171384850804,
"eval_f1": 0.7480438184663537,
"eval_loss": 0.29354020953178406,
"eval_precision": 0.7308868501529052,
"eval_recall": 0.7660256410256411,
"eval_runtime": 4.9072,
"eval_samples_per_second": 266.342,
"eval_steps_per_second": 16.71,
"step": 327
},
{
"epoch": 1.0091743119266054,
"grad_norm": 12.193892478942871,
"learning_rate": 5.753648584958752e-06,
"loss": 0.3332,
"step": 330
},
{
"epoch": 1.039755351681957,
"grad_norm": 5.682682037353516,
"learning_rate": 5.576066838509407e-06,
"loss": 0.2385,
"step": 340
},
{
"epoch": 1.070336391437309,
"grad_norm": 18.050596237182617,
"learning_rate": 5.398485092060064e-06,
"loss": 0.2123,
"step": 350
},
{
"epoch": 1.1009174311926606,
"grad_norm": 5.985245704650879,
"learning_rate": 5.220903345610719e-06,
"loss": 0.1935,
"step": 360
},
{
"epoch": 1.1314984709480123,
"grad_norm": 3.5328218936920166,
"learning_rate": 5.043321599161375e-06,
"loss": 0.268,
"step": 370
},
{
"epoch": 1.162079510703364,
"grad_norm": 4.972208023071289,
"learning_rate": 4.865739852712031e-06,
"loss": 0.1793,
"step": 380
},
{
"epoch": 1.1926605504587156,
"grad_norm": 6.088878154754639,
"learning_rate": 4.688158106262687e-06,
"loss": 0.1993,
"step": 390
},
{
"epoch": 1.2232415902140672,
"grad_norm": 4.503061294555664,
"learning_rate": 4.510576359813342e-06,
"loss": 0.2092,
"step": 400
},
{
"epoch": 1.2538226299694188,
"grad_norm": 4.567380428314209,
"learning_rate": 4.3329946133639985e-06,
"loss": 0.2834,
"step": 410
},
{
"epoch": 1.2844036697247707,
"grad_norm": 6.611799716949463,
"learning_rate": 4.155412866914654e-06,
"loss": 0.2832,
"step": 420
},
{
"epoch": 1.3149847094801224,
"grad_norm": 8.199454307556152,
"learning_rate": 3.97783112046531e-06,
"loss": 0.3619,
"step": 430
},
{
"epoch": 1.345565749235474,
"grad_norm": 10.987822532653809,
"learning_rate": 3.800249374015966e-06,
"loss": 0.3282,
"step": 440
},
{
"epoch": 1.3761467889908257,
"grad_norm": 9.002391815185547,
"learning_rate": 3.622667627566622e-06,
"loss": 0.1932,
"step": 450
},
{
"epoch": 1.4067278287461773,
"grad_norm": 9.500117301940918,
"learning_rate": 3.445085881117277e-06,
"loss": 0.3071,
"step": 460
},
{
"epoch": 1.4373088685015292,
"grad_norm": 3.1432082653045654,
"learning_rate": 3.2675041346679334e-06,
"loss": 0.2714,
"step": 470
},
{
"epoch": 1.4678899082568808,
"grad_norm": 5.107339859008789,
"learning_rate": 3.0899223882185893e-06,
"loss": 0.2005,
"step": 480
},
{
"epoch": 1.4984709480122325,
"grad_norm": 5.789766311645508,
"learning_rate": 2.9123406417692444e-06,
"loss": 0.2696,
"step": 490
},
{
"epoch": 1.529051987767584,
"grad_norm": 6.676187515258789,
"learning_rate": 2.7347588953199003e-06,
"loss": 0.2581,
"step": 500
},
{
"epoch": 1.5596330275229358,
"grad_norm": 9.344066619873047,
"learning_rate": 2.5571771488705567e-06,
"loss": 0.2441,
"step": 510
},
{
"epoch": 1.5902140672782874,
"grad_norm": 5.086206912994385,
"learning_rate": 2.3795954024212122e-06,
"loss": 0.2883,
"step": 520
},
{
"epoch": 1.620795107033639,
"grad_norm": 4.854761123657227,
"learning_rate": 2.2020136559718677e-06,
"loss": 0.3288,
"step": 530
},
{
"epoch": 1.6513761467889907,
"grad_norm": 5.0419158935546875,
"learning_rate": 2.024431909522524e-06,
"loss": 0.1916,
"step": 540
},
{
"epoch": 1.6819571865443423,
"grad_norm": 3.6190755367279053,
"learning_rate": 1.8468501630731796e-06,
"loss": 0.2405,
"step": 550
},
{
"epoch": 1.7125382262996942,
"grad_norm": 2.2586376667022705,
"learning_rate": 1.6692684166238354e-06,
"loss": 0.2336,
"step": 560
},
{
"epoch": 1.7431192660550459,
"grad_norm": 0.9094193577766418,
"learning_rate": 1.4916866701744913e-06,
"loss": 0.3062,
"step": 570
},
{
"epoch": 1.7737003058103975,
"grad_norm": 5.636134147644043,
"learning_rate": 1.314104923725147e-06,
"loss": 0.2616,
"step": 580
},
{
"epoch": 1.8042813455657494,
"grad_norm": 8.59340763092041,
"learning_rate": 1.1365231772758028e-06,
"loss": 0.2395,
"step": 590
},
{
"epoch": 1.834862385321101,
"grad_norm": 6.957756042480469,
"learning_rate": 9.589414308264587e-07,
"loss": 0.2656,
"step": 600
},
{
"epoch": 1.8654434250764527,
"grad_norm": 6.419522285461426,
"learning_rate": 7.813596843771144e-07,
"loss": 0.3393,
"step": 610
},
{
"epoch": 1.8960244648318043,
"grad_norm": 2.6836931705474854,
"learning_rate": 6.037779379277703e-07,
"loss": 0.2229,
"step": 620
},
{
"epoch": 1.926605504587156,
"grad_norm": 5.273196697235107,
"learning_rate": 4.261961914784261e-07,
"loss": 0.2025,
"step": 630
},
{
"epoch": 1.9571865443425076,
"grad_norm": 5.824622631072998,
"learning_rate": 2.4861444502908185e-07,
"loss": 0.2995,
"step": 640
},
{
"epoch": 1.9877675840978593,
"grad_norm": 18.051441192626953,
"learning_rate": 7.103269857973767e-08,
"loss": 0.303,
"step": 650
},
{
"epoch": 2.0,
"eval_accuracy": 0.8791124713083397,
"eval_f1": 0.7554179566563467,
"eval_loss": 0.2987494468688965,
"eval_precision": 0.7305389221556886,
"eval_recall": 0.782051282051282,
"eval_runtime": 4.9837,
"eval_samples_per_second": 262.254,
"eval_steps_per_second": 16.454,
"step": 654
}
],
"logging_steps": 10,
"max_steps": 654,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1377131076427776.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": {
"learning_rate": 1.161384621778711e-05,
"num_train_epochs": 2,
"per_device_train_batch_size": 16,
"seed": 28
}
}