|
{ |
|
"best_metric": 0.7932489451476793, |
|
"best_model_checkpoint": "distilbert-base-multilingual-cased-hyper-matt/run-jc9uav7l/checkpoint-1600", |
|
"epoch": 4.0, |
|
"eval_steps": 500, |
|
"global_step": 1600, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.025, |
|
"grad_norm": 3.898715019226074, |
|
"learning_rate": 5.237000370669628e-05, |
|
"loss": 0.5804, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 3.486314058303833, |
|
"learning_rate": 5.2106837858923936e-05, |
|
"loss": 0.5382, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.075, |
|
"grad_norm": 5.969634056091309, |
|
"learning_rate": 5.18436720111516e-05, |
|
"loss": 0.4842, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 3.656458854675293, |
|
"learning_rate": 5.1580506163379254e-05, |
|
"loss": 0.4253, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.125, |
|
"grad_norm": 0.4058239161968231, |
|
"learning_rate": 5.131734031560691e-05, |
|
"loss": 0.4599, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.092224597930908, |
|
"learning_rate": 5.1054174467834564e-05, |
|
"loss": 0.4936, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.175, |
|
"grad_norm": 7.18532657623291, |
|
"learning_rate": 5.079100862006222e-05, |
|
"loss": 0.529, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 5.497727870941162, |
|
"learning_rate": 5.052784277228988e-05, |
|
"loss": 0.4603, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.225, |
|
"grad_norm": 14.79340934753418, |
|
"learning_rate": 5.026467692451754e-05, |
|
"loss": 0.6846, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 2.487825632095337, |
|
"learning_rate": 5.000151107674519e-05, |
|
"loss": 0.5422, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.275, |
|
"grad_norm": 1.7481558322906494, |
|
"learning_rate": 4.973834522897285e-05, |
|
"loss": 0.4124, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.459429144859314, |
|
"learning_rate": 4.94751793812005e-05, |
|
"loss": 0.5866, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.325, |
|
"grad_norm": 1.4248297214508057, |
|
"learning_rate": 4.9212013533428165e-05, |
|
"loss": 0.3253, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 18.61319923400879, |
|
"learning_rate": 4.894884768565583e-05, |
|
"loss": 0.4766, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.375, |
|
"grad_norm": 23.339385986328125, |
|
"learning_rate": 4.868568183788348e-05, |
|
"loss": 1.1408, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.9814672470092773, |
|
"learning_rate": 4.842251599011114e-05, |
|
"loss": 0.6388, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.425, |
|
"grad_norm": 10.203920364379883, |
|
"learning_rate": 4.815935014233879e-05, |
|
"loss": 0.4511, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 4.456247806549072, |
|
"learning_rate": 4.789618429456645e-05, |
|
"loss": 0.3185, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.475, |
|
"grad_norm": 0.4962649941444397, |
|
"learning_rate": 4.763301844679411e-05, |
|
"loss": 0.522, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 6.5583815574646, |
|
"learning_rate": 4.7369852599021765e-05, |
|
"loss": 0.87, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.525, |
|
"grad_norm": 5.371535301208496, |
|
"learning_rate": 4.710668675124942e-05, |
|
"loss": 0.478, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 2.1848862171173096, |
|
"learning_rate": 4.6843520903477076e-05, |
|
"loss": 0.4016, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.575, |
|
"grad_norm": 0.9754756093025208, |
|
"learning_rate": 4.658035505570473e-05, |
|
"loss": 0.2634, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.6011353731155396, |
|
"learning_rate": 4.631718920793239e-05, |
|
"loss": 0.6037, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 14.307558059692383, |
|
"learning_rate": 4.605402336016005e-05, |
|
"loss": 0.5226, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.4895697832107544, |
|
"learning_rate": 4.5790857512387704e-05, |
|
"loss": 0.3069, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.675, |
|
"grad_norm": 1.63743257522583, |
|
"learning_rate": 4.552769166461536e-05, |
|
"loss": 0.5554, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.2908447980880737, |
|
"learning_rate": 4.5264525816843014e-05, |
|
"loss": 0.5409, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.725, |
|
"grad_norm": 104.72332763671875, |
|
"learning_rate": 4.5001359969070676e-05, |
|
"loss": 0.4431, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 38.51509475708008, |
|
"learning_rate": 4.473819412129833e-05, |
|
"loss": 0.8298, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.775, |
|
"grad_norm": 0.7204974889755249, |
|
"learning_rate": 4.447502827352599e-05, |
|
"loss": 1.0198, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 4.1109700202941895, |
|
"learning_rate": 4.421186242575364e-05, |
|
"loss": 0.4, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.825, |
|
"grad_norm": 1.3143742084503174, |
|
"learning_rate": 4.39486965779813e-05, |
|
"loss": 0.6479, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.47246071696281433, |
|
"learning_rate": 4.368553073020896e-05, |
|
"loss": 0.1606, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.875, |
|
"grad_norm": 0.24373719096183777, |
|
"learning_rate": 4.3422364882436615e-05, |
|
"loss": 0.5316, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 6.273545265197754, |
|
"learning_rate": 4.315919903466427e-05, |
|
"loss": 0.6365, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.925, |
|
"grad_norm": 0.4661806523799896, |
|
"learning_rate": 4.2896033186891925e-05, |
|
"loss": 0.4455, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.7078198194503784, |
|
"learning_rate": 4.263286733911959e-05, |
|
"loss": 0.3436, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.975, |
|
"grad_norm": 0.8958209753036499, |
|
"learning_rate": 4.236970149134725e-05, |
|
"loss": 0.6337, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 15.921177864074707, |
|
"learning_rate": 4.2106535643574905e-05, |
|
"loss": 0.7286, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.825, |
|
"eval_f1": 0.6276595744680851, |
|
"eval_loss": 0.6047177314758301, |
|
"eval_precision": 0.9076923076923077, |
|
"eval_recall": 0.4796747967479675, |
|
"eval_runtime": 1.5265, |
|
"eval_samples_per_second": 262.045, |
|
"eval_steps_per_second": 16.378, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.025, |
|
"grad_norm": 0.5335336327552795, |
|
"learning_rate": 4.184336979580256e-05, |
|
"loss": 0.4385, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 82.54154968261719, |
|
"learning_rate": 4.1580203948030215e-05, |
|
"loss": 0.3295, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.075, |
|
"grad_norm": 5.63857889175415, |
|
"learning_rate": 4.131703810025787e-05, |
|
"loss": 0.4648, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 85.73626708984375, |
|
"learning_rate": 4.105387225248553e-05, |
|
"loss": 0.5061, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.125, |
|
"grad_norm": 0.2682015597820282, |
|
"learning_rate": 4.079070640471319e-05, |
|
"loss": 0.3782, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 24.346281051635742, |
|
"learning_rate": 4.052754055694084e-05, |
|
"loss": 0.1474, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.175, |
|
"grad_norm": 0.15391361713409424, |
|
"learning_rate": 4.02643747091685e-05, |
|
"loss": 0.4207, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 0.11409182101488113, |
|
"learning_rate": 4.0001208861396154e-05, |
|
"loss": 0.4453, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.225, |
|
"grad_norm": 0.14815831184387207, |
|
"learning_rate": 3.9738043013623816e-05, |
|
"loss": 0.4236, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 5.3385539054870605, |
|
"learning_rate": 3.947487716585147e-05, |
|
"loss": 0.3188, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.275, |
|
"grad_norm": 5.233155250549316, |
|
"learning_rate": 3.9211711318079126e-05, |
|
"loss": 0.7307, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 0.8003888130187988, |
|
"learning_rate": 3.894854547030678e-05, |
|
"loss": 0.3721, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.325, |
|
"grad_norm": 0.12942475080490112, |
|
"learning_rate": 3.868537962253444e-05, |
|
"loss": 0.061, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 0.29207348823547363, |
|
"learning_rate": 3.84222137747621e-05, |
|
"loss": 0.2718, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.375, |
|
"grad_norm": 0.2017810046672821, |
|
"learning_rate": 3.8159047926989754e-05, |
|
"loss": 0.4297, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 0.2950953245162964, |
|
"learning_rate": 3.789588207921741e-05, |
|
"loss": 0.0521, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.425, |
|
"grad_norm": 6.70197057723999, |
|
"learning_rate": 3.7632716231445065e-05, |
|
"loss": 0.4555, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 28.17333984375, |
|
"learning_rate": 3.736955038367272e-05, |
|
"loss": 0.4297, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.475, |
|
"grad_norm": 0.12028508633375168, |
|
"learning_rate": 3.710638453590038e-05, |
|
"loss": 0.1992, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 0.2757216691970825, |
|
"learning_rate": 3.684321868812804e-05, |
|
"loss": 0.2196, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.525, |
|
"grad_norm": 0.12552417814731598, |
|
"learning_rate": 3.658005284035569e-05, |
|
"loss": 0.5815, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 4.250138282775879, |
|
"learning_rate": 3.631688699258335e-05, |
|
"loss": 0.5406, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.575, |
|
"grad_norm": 42.90773010253906, |
|
"learning_rate": 3.605372114481101e-05, |
|
"loss": 0.2289, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.2683817446231842, |
|
"learning_rate": 3.5790555297038665e-05, |
|
"loss": 0.2187, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.625, |
|
"grad_norm": 0.16949182748794556, |
|
"learning_rate": 3.552738944926633e-05, |
|
"loss": 0.3488, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 28.463308334350586, |
|
"learning_rate": 3.526422360149398e-05, |
|
"loss": 0.3393, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.675, |
|
"grad_norm": 8.720256805419922, |
|
"learning_rate": 3.500105775372164e-05, |
|
"loss": 0.4792, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 0.81484055519104, |
|
"learning_rate": 3.473789190594929e-05, |
|
"loss": 0.4159, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.725, |
|
"grad_norm": 0.22710223495960236, |
|
"learning_rate": 3.447472605817695e-05, |
|
"loss": 0.1826, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 0.8144286274909973, |
|
"learning_rate": 3.421156021040461e-05, |
|
"loss": 0.3352, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.775, |
|
"grad_norm": 0.3589684069156647, |
|
"learning_rate": 3.3948394362632266e-05, |
|
"loss": 0.7971, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 20.449018478393555, |
|
"learning_rate": 3.368522851485992e-05, |
|
"loss": 0.5189, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.825, |
|
"grad_norm": 0.6803117394447327, |
|
"learning_rate": 3.3422062667087576e-05, |
|
"loss": 0.2641, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 5.5921502113342285, |
|
"learning_rate": 3.315889681931523e-05, |
|
"loss": 0.1776, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.875, |
|
"grad_norm": 15.335833549499512, |
|
"learning_rate": 3.2895730971542894e-05, |
|
"loss": 0.6454, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 24.82597541809082, |
|
"learning_rate": 3.263256512377055e-05, |
|
"loss": 0.3175, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.925, |
|
"grad_norm": 0.3673095107078552, |
|
"learning_rate": 3.2369399275998204e-05, |
|
"loss": 0.1565, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 0.30511701107025146, |
|
"learning_rate": 3.210623342822586e-05, |
|
"loss": 0.3474, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.975, |
|
"grad_norm": 38.377174377441406, |
|
"learning_rate": 3.1843067580453515e-05, |
|
"loss": 0.5005, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.5073786973953247, |
|
"learning_rate": 3.157990173268118e-05, |
|
"loss": 0.7247, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.865, |
|
"eval_f1": 0.773109243697479, |
|
"eval_loss": 0.45795485377311707, |
|
"eval_precision": 0.8, |
|
"eval_recall": 0.7479674796747967, |
|
"eval_runtime": 1.5384, |
|
"eval_samples_per_second": 260.012, |
|
"eval_steps_per_second": 16.251, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.025, |
|
"grad_norm": 14.978399276733398, |
|
"learning_rate": 3.131673588490883e-05, |
|
"loss": 0.2517, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 0.3453332185745239, |
|
"learning_rate": 3.105357003713649e-05, |
|
"loss": 0.2949, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.075, |
|
"grad_norm": 0.21060849726200104, |
|
"learning_rate": 3.079040418936414e-05, |
|
"loss": 0.2702, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 48.51129913330078, |
|
"learning_rate": 3.05272383415918e-05, |
|
"loss": 0.0914, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.125, |
|
"grad_norm": 16.647279739379883, |
|
"learning_rate": 3.0264072493819457e-05, |
|
"loss": 0.2664, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"grad_norm": 0.24965181946754456, |
|
"learning_rate": 3.0000906646047115e-05, |
|
"loss": 0.8968, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.175, |
|
"grad_norm": 1.752435564994812, |
|
"learning_rate": 2.973774079827477e-05, |
|
"loss": 0.5488, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 0.20626233518123627, |
|
"learning_rate": 2.9474574950502433e-05, |
|
"loss": 0.3845, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.225, |
|
"grad_norm": 39.55342483520508, |
|
"learning_rate": 2.921140910273009e-05, |
|
"loss": 0.255, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 0.6314402222633362, |
|
"learning_rate": 2.8948243254957747e-05, |
|
"loss": 0.2481, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.275, |
|
"grad_norm": 4.929794788360596, |
|
"learning_rate": 2.8685077407185402e-05, |
|
"loss": 0.3894, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 0.6494444608688354, |
|
"learning_rate": 2.842191155941306e-05, |
|
"loss": 0.2897, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.325, |
|
"grad_norm": 0.17967885732650757, |
|
"learning_rate": 2.8158745711640716e-05, |
|
"loss": 0.1642, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 0.047755829989910126, |
|
"learning_rate": 2.7895579863868374e-05, |
|
"loss": 0.3492, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.375, |
|
"grad_norm": 66.83489990234375, |
|
"learning_rate": 2.763241401609603e-05, |
|
"loss": 0.0899, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 21.72690200805664, |
|
"learning_rate": 2.7369248168323685e-05, |
|
"loss": 0.2154, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.425, |
|
"grad_norm": 0.1563444286584854, |
|
"learning_rate": 2.7106082320551344e-05, |
|
"loss": 0.2106, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"grad_norm": 0.4117478132247925, |
|
"learning_rate": 2.6842916472779e-05, |
|
"loss": 0.0819, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.475, |
|
"grad_norm": 41.47480392456055, |
|
"learning_rate": 2.6579750625006658e-05, |
|
"loss": 0.4206, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 0.29815855622291565, |
|
"learning_rate": 2.6316584777234313e-05, |
|
"loss": 0.1832, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.525, |
|
"grad_norm": 0.07651757448911667, |
|
"learning_rate": 2.6053418929461968e-05, |
|
"loss": 0.4396, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"grad_norm": 3.2124338150024414, |
|
"learning_rate": 2.5790253081689627e-05, |
|
"loss": 0.1765, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.575, |
|
"grad_norm": 32.49565505981445, |
|
"learning_rate": 2.5527087233917282e-05, |
|
"loss": 0.5668, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"grad_norm": 3.7792484760284424, |
|
"learning_rate": 2.526392138614494e-05, |
|
"loss": 0.0786, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.625, |
|
"grad_norm": 24.304460525512695, |
|
"learning_rate": 2.5000755538372596e-05, |
|
"loss": 0.4862, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 0.03224577382206917, |
|
"learning_rate": 2.473758969060025e-05, |
|
"loss": 0.0639, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 2.675, |
|
"grad_norm": 0.10748755186796188, |
|
"learning_rate": 2.4474423842827913e-05, |
|
"loss": 0.3126, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"grad_norm": 0.07485207915306091, |
|
"learning_rate": 2.421125799505557e-05, |
|
"loss": 0.4306, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.725, |
|
"grad_norm": 0.3468710780143738, |
|
"learning_rate": 2.3948092147283224e-05, |
|
"loss": 0.2609, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"grad_norm": 0.1928665190935135, |
|
"learning_rate": 2.3684926299510883e-05, |
|
"loss": 0.1698, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.775, |
|
"grad_norm": 0.0683489441871643, |
|
"learning_rate": 2.3421760451738538e-05, |
|
"loss": 0.6863, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 0.12724503874778748, |
|
"learning_rate": 2.3158594603966197e-05, |
|
"loss": 0.1283, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.825, |
|
"grad_norm": 18.53827476501465, |
|
"learning_rate": 2.2895428756193852e-05, |
|
"loss": 0.4287, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"grad_norm": 13.878090858459473, |
|
"learning_rate": 2.2632262908421507e-05, |
|
"loss": 0.4846, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 2.875, |
|
"grad_norm": 45.64787673950195, |
|
"learning_rate": 2.2369097060649166e-05, |
|
"loss": 0.2517, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"grad_norm": 0.3627373278141022, |
|
"learning_rate": 2.210593121287682e-05, |
|
"loss": 0.024, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 2.925, |
|
"grad_norm": 4.56863260269165, |
|
"learning_rate": 2.184276536510448e-05, |
|
"loss": 0.1032, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"grad_norm": 0.12671475112438202, |
|
"learning_rate": 2.1579599517332135e-05, |
|
"loss": 0.3173, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 2.975, |
|
"grad_norm": 0.2638857662677765, |
|
"learning_rate": 2.1316433669559794e-05, |
|
"loss": 0.3071, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 165.09217834472656, |
|
"learning_rate": 2.1053267821787452e-05, |
|
"loss": 0.3667, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.855, |
|
"eval_f1": 0.7928571428571428, |
|
"eval_loss": 0.6172541975975037, |
|
"eval_precision": 0.7070063694267515, |
|
"eval_recall": 0.9024390243902439, |
|
"eval_runtime": 1.5339, |
|
"eval_samples_per_second": 260.768, |
|
"eval_steps_per_second": 16.298, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.025, |
|
"grad_norm": 0.03663462772965431, |
|
"learning_rate": 2.0790101974015108e-05, |
|
"loss": 0.2258, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"grad_norm": 0.15920574963092804, |
|
"learning_rate": 2.0526936126242766e-05, |
|
"loss": 0.1118, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 3.075, |
|
"grad_norm": 10.940315246582031, |
|
"learning_rate": 2.026377027847042e-05, |
|
"loss": 0.1079, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"grad_norm": 0.24473267793655396, |
|
"learning_rate": 2.0000604430698077e-05, |
|
"loss": 0.0076, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 3.125, |
|
"grad_norm": 0.11901724338531494, |
|
"learning_rate": 1.9737438582925736e-05, |
|
"loss": 0.0048, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"grad_norm": 0.5029746890068054, |
|
"learning_rate": 1.947427273515339e-05, |
|
"loss": 0.518, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 3.175, |
|
"grad_norm": 0.16407223045825958, |
|
"learning_rate": 1.921110688738105e-05, |
|
"loss": 0.0487, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"grad_norm": 43.9595947265625, |
|
"learning_rate": 1.8947941039608705e-05, |
|
"loss": 0.221, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 3.225, |
|
"grad_norm": 0.043054983019828796, |
|
"learning_rate": 1.868477519183636e-05, |
|
"loss": 0.2537, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"grad_norm": 0.23155592381954193, |
|
"learning_rate": 1.842160934406402e-05, |
|
"loss": 0.4016, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.275, |
|
"grad_norm": 0.11036993563175201, |
|
"learning_rate": 1.8158443496291674e-05, |
|
"loss": 0.2479, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"grad_norm": 9.334052085876465, |
|
"learning_rate": 1.7895277648519333e-05, |
|
"loss": 0.3436, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 3.325, |
|
"grad_norm": 0.12501460313796997, |
|
"learning_rate": 1.763211180074699e-05, |
|
"loss": 0.2112, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"grad_norm": 0.06664387881755829, |
|
"learning_rate": 1.7368945952974647e-05, |
|
"loss": 0.2406, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 3.375, |
|
"grad_norm": 0.2532443702220917, |
|
"learning_rate": 1.7105780105202305e-05, |
|
"loss": 0.268, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"grad_norm": 0.11059623956680298, |
|
"learning_rate": 1.684261425742996e-05, |
|
"loss": 0.1377, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 3.425, |
|
"grad_norm": 0.22316700220108032, |
|
"learning_rate": 1.6579448409657616e-05, |
|
"loss": 0.2411, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"grad_norm": 0.4598884582519531, |
|
"learning_rate": 1.6316282561885274e-05, |
|
"loss": 0.1774, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 3.475, |
|
"grad_norm": 0.17457233369350433, |
|
"learning_rate": 1.605311671411293e-05, |
|
"loss": 0.0483, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"grad_norm": 0.12707385420799255, |
|
"learning_rate": 1.578995086634059e-05, |
|
"loss": 0.0061, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.525, |
|
"grad_norm": 0.8905083537101746, |
|
"learning_rate": 1.5526785018568244e-05, |
|
"loss": 0.1226, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"grad_norm": 0.027454137802124023, |
|
"learning_rate": 1.52636191707959e-05, |
|
"loss": 0.1416, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 3.575, |
|
"grad_norm": 9.629097938537598, |
|
"learning_rate": 1.5000453323023558e-05, |
|
"loss": 0.2324, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"grad_norm": 0.048838574439287186, |
|
"learning_rate": 1.4737287475251216e-05, |
|
"loss": 0.1535, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 3.625, |
|
"grad_norm": 0.04500986263155937, |
|
"learning_rate": 1.4474121627478873e-05, |
|
"loss": 0.0829, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"grad_norm": 0.07558059692382812, |
|
"learning_rate": 1.421095577970653e-05, |
|
"loss": 0.2319, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 3.675, |
|
"grad_norm": 0.17243552207946777, |
|
"learning_rate": 1.3947789931934187e-05, |
|
"loss": 0.008, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"grad_norm": 0.15297254920005798, |
|
"learning_rate": 1.3684624084161843e-05, |
|
"loss": 0.1315, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 3.725, |
|
"grad_norm": 0.05509917438030243, |
|
"learning_rate": 1.34214582363895e-05, |
|
"loss": 0.0062, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"grad_norm": 0.042248114943504333, |
|
"learning_rate": 1.3158292388617156e-05, |
|
"loss": 0.0033, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.775, |
|
"grad_norm": 0.0196397565305233, |
|
"learning_rate": 1.2895126540844813e-05, |
|
"loss": 0.1618, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"grad_norm": 0.054895512759685516, |
|
"learning_rate": 1.263196069307247e-05, |
|
"loss": 0.1938, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 3.825, |
|
"grad_norm": 0.04431680217385292, |
|
"learning_rate": 1.2368794845300126e-05, |
|
"loss": 0.2307, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"grad_norm": 0.04325120523571968, |
|
"learning_rate": 1.2105628997527784e-05, |
|
"loss": 0.0026, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 3.875, |
|
"grad_norm": 0.9810055494308472, |
|
"learning_rate": 1.1842463149755441e-05, |
|
"loss": 0.2551, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"grad_norm": 0.11533799022436142, |
|
"learning_rate": 1.1579297301983098e-05, |
|
"loss": 0.604, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 3.925, |
|
"grad_norm": 0.09560931473970413, |
|
"learning_rate": 1.1316131454210754e-05, |
|
"loss": 0.1318, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"grad_norm": 0.1265428364276886, |
|
"learning_rate": 1.105296560643841e-05, |
|
"loss": 0.1454, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 3.975, |
|
"grad_norm": 0.04381510615348816, |
|
"learning_rate": 1.0789799758666068e-05, |
|
"loss": 0.1591, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 56.61437225341797, |
|
"learning_rate": 1.0526633910893726e-05, |
|
"loss": 0.3659, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8775, |
|
"eval_f1": 0.7932489451476793, |
|
"eval_loss": 0.5321782231330872, |
|
"eval_precision": 0.8245614035087719, |
|
"eval_recall": 0.7642276422764228, |
|
"eval_runtime": 1.5289, |
|
"eval_samples_per_second": 261.622, |
|
"eval_steps_per_second": 16.351, |
|
"step": 1600 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 847261481803776.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": { |
|
"_wandb": {}, |
|
"assignments": {}, |
|
"learning_rate": 5.2633169554468626e-05, |
|
"metric": "eval/loss", |
|
"num_train_epochs": 5, |
|
"per_device_train_batch_size": 4, |
|
"seed": 25 |
|
} |
|
} |
|
|