Safetensors
English
qwen2_vl
biology
medical
chemistry
biomed-Qwen2-VL-2B-Instruct / trainer_state.json
AdaptLLM's picture
Upload folder using huggingface_hub (#1)
5d775f6 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9998731447418495,
"eval_steps": 500,
"global_step": 3941,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.002537105163009007,
"grad_norm": 4.350327938684374,
"learning_rate": 2.5316455696202533e-07,
"loss": 1.8196,
"step": 10
},
{
"epoch": 0.005074210326018014,
"grad_norm": 3.678530456221003,
"learning_rate": 5.063291139240507e-07,
"loss": 1.8108,
"step": 20
},
{
"epoch": 0.00761131548902702,
"grad_norm": 2.945036910011768,
"learning_rate": 7.59493670886076e-07,
"loss": 1.8147,
"step": 30
},
{
"epoch": 0.010148420652036028,
"grad_norm": 2.3190574498378447,
"learning_rate": 1.0126582278481013e-06,
"loss": 1.7548,
"step": 40
},
{
"epoch": 0.012685525815045033,
"grad_norm": 2.074674879169714,
"learning_rate": 1.2658227848101267e-06,
"loss": 1.7204,
"step": 50
},
{
"epoch": 0.01522263097805404,
"grad_norm": 1.748653753253889,
"learning_rate": 1.518987341772152e-06,
"loss": 1.6762,
"step": 60
},
{
"epoch": 0.01775973614106305,
"grad_norm": 1.6751399170091914,
"learning_rate": 1.7721518987341774e-06,
"loss": 1.6487,
"step": 70
},
{
"epoch": 0.020296841304072055,
"grad_norm": 1.6363195765660283,
"learning_rate": 2.0253164556962026e-06,
"loss": 1.6116,
"step": 80
},
{
"epoch": 0.022833946467081062,
"grad_norm": 1.5794976909544993,
"learning_rate": 2.278481012658228e-06,
"loss": 1.5916,
"step": 90
},
{
"epoch": 0.025371051630090066,
"grad_norm": 1.6233632992191482,
"learning_rate": 2.5316455696202535e-06,
"loss": 1.598,
"step": 100
},
{
"epoch": 0.027908156793099072,
"grad_norm": 1.5800661094425872,
"learning_rate": 2.7848101265822785e-06,
"loss": 1.5626,
"step": 110
},
{
"epoch": 0.03044526195610808,
"grad_norm": 1.6072050143283245,
"learning_rate": 3.037974683544304e-06,
"loss": 1.5457,
"step": 120
},
{
"epoch": 0.03298236711911709,
"grad_norm": 1.6572306247078625,
"learning_rate": 3.2911392405063294e-06,
"loss": 1.5391,
"step": 130
},
{
"epoch": 0.0355194722821261,
"grad_norm": 1.586848380490154,
"learning_rate": 3.544303797468355e-06,
"loss": 1.5125,
"step": 140
},
{
"epoch": 0.038056577445135104,
"grad_norm": 1.638449311664989,
"learning_rate": 3.7974683544303802e-06,
"loss": 1.5243,
"step": 150
},
{
"epoch": 0.04059368260814411,
"grad_norm": 1.6031920359772533,
"learning_rate": 4.050632911392405e-06,
"loss": 1.4856,
"step": 160
},
{
"epoch": 0.04313078777115312,
"grad_norm": 1.5967439995800559,
"learning_rate": 4.303797468354431e-06,
"loss": 1.5129,
"step": 170
},
{
"epoch": 0.045667892934162124,
"grad_norm": 1.6075212497819606,
"learning_rate": 4.556962025316456e-06,
"loss": 1.4961,
"step": 180
},
{
"epoch": 0.04820499809717113,
"grad_norm": 1.6791238021539772,
"learning_rate": 4.8101265822784815e-06,
"loss": 1.4893,
"step": 190
},
{
"epoch": 0.05074210326018013,
"grad_norm": 1.6210921331693446,
"learning_rate": 5.063291139240507e-06,
"loss": 1.5013,
"step": 200
},
{
"epoch": 0.05327920842318914,
"grad_norm": 1.7652729374280518,
"learning_rate": 5.3164556962025316e-06,
"loss": 1.473,
"step": 210
},
{
"epoch": 0.055816313586198145,
"grad_norm": 1.6723244529240142,
"learning_rate": 5.569620253164557e-06,
"loss": 1.4791,
"step": 220
},
{
"epoch": 0.05835341874920715,
"grad_norm": 1.825326138794735,
"learning_rate": 5.8227848101265824e-06,
"loss": 1.4761,
"step": 230
},
{
"epoch": 0.06089052391221616,
"grad_norm": 1.9131148271572453,
"learning_rate": 6.075949367088608e-06,
"loss": 1.4626,
"step": 240
},
{
"epoch": 0.06342762907522517,
"grad_norm": 1.6613770739809675,
"learning_rate": 6.329113924050634e-06,
"loss": 1.4601,
"step": 250
},
{
"epoch": 0.06596473423823418,
"grad_norm": 1.6666458237214428,
"learning_rate": 6.582278481012659e-06,
"loss": 1.4686,
"step": 260
},
{
"epoch": 0.06850183940124319,
"grad_norm": 1.5745675069520453,
"learning_rate": 6.835443037974684e-06,
"loss": 1.461,
"step": 270
},
{
"epoch": 0.0710389445642522,
"grad_norm": 1.6507776778175596,
"learning_rate": 7.08860759493671e-06,
"loss": 1.47,
"step": 280
},
{
"epoch": 0.0735760497272612,
"grad_norm": 1.6009958375778823,
"learning_rate": 7.341772151898735e-06,
"loss": 1.4526,
"step": 290
},
{
"epoch": 0.07611315489027021,
"grad_norm": 1.6786912574149853,
"learning_rate": 7.5949367088607605e-06,
"loss": 1.4501,
"step": 300
},
{
"epoch": 0.07865026005327921,
"grad_norm": 1.6698693144659327,
"learning_rate": 7.848101265822786e-06,
"loss": 1.4483,
"step": 310
},
{
"epoch": 0.08118736521628822,
"grad_norm": 1.7393580296857223,
"learning_rate": 8.10126582278481e-06,
"loss": 1.4252,
"step": 320
},
{
"epoch": 0.08372447037929723,
"grad_norm": 1.6124831573952214,
"learning_rate": 8.354430379746837e-06,
"loss": 1.4274,
"step": 330
},
{
"epoch": 0.08626157554230623,
"grad_norm": 1.6899774259466704,
"learning_rate": 8.607594936708861e-06,
"loss": 1.437,
"step": 340
},
{
"epoch": 0.08879868070531524,
"grad_norm": 1.6821954539953226,
"learning_rate": 8.860759493670886e-06,
"loss": 1.4388,
"step": 350
},
{
"epoch": 0.09133578586832425,
"grad_norm": 1.8121412852354848,
"learning_rate": 9.113924050632912e-06,
"loss": 1.4151,
"step": 360
},
{
"epoch": 0.09387289103133326,
"grad_norm": 1.5407350947949157,
"learning_rate": 9.367088607594937e-06,
"loss": 1.4274,
"step": 370
},
{
"epoch": 0.09640999619434226,
"grad_norm": 1.7381357929095853,
"learning_rate": 9.620253164556963e-06,
"loss": 1.4309,
"step": 380
},
{
"epoch": 0.09894710135735126,
"grad_norm": 1.6085237968347799,
"learning_rate": 9.87341772151899e-06,
"loss": 1.4173,
"step": 390
},
{
"epoch": 0.10148420652036026,
"grad_norm": 1.728407830056737,
"learning_rate": 9.999950942931784e-06,
"loss": 1.4312,
"step": 400
},
{
"epoch": 0.10402131168336927,
"grad_norm": 1.6972786696047149,
"learning_rate": 9.999558492161865e-06,
"loss": 1.422,
"step": 410
},
{
"epoch": 0.10655841684637828,
"grad_norm": 1.6297612720977512,
"learning_rate": 9.998773621425852e-06,
"loss": 1.3892,
"step": 420
},
{
"epoch": 0.10909552200938728,
"grad_norm": 1.7368434280409393,
"learning_rate": 9.997596392328971e-06,
"loss": 1.4368,
"step": 430
},
{
"epoch": 0.11163262717239629,
"grad_norm": 1.8196387241516612,
"learning_rate": 9.996026897273024e-06,
"loss": 1.4129,
"step": 440
},
{
"epoch": 0.1141697323354053,
"grad_norm": 1.6083388947957715,
"learning_rate": 9.994065259449128e-06,
"loss": 1.4181,
"step": 450
},
{
"epoch": 0.1167068374984143,
"grad_norm": 1.7315485017229137,
"learning_rate": 9.991711632828049e-06,
"loss": 1.4107,
"step": 460
},
{
"epoch": 0.11924394266142331,
"grad_norm": 1.6192259621686464,
"learning_rate": 9.988966202148115e-06,
"loss": 1.3933,
"step": 470
},
{
"epoch": 0.12178104782443232,
"grad_norm": 1.685661277294985,
"learning_rate": 9.985829182900717e-06,
"loss": 1.4305,
"step": 480
},
{
"epoch": 0.12431815298744132,
"grad_norm": 1.7673237121898477,
"learning_rate": 9.982300821313394e-06,
"loss": 1.407,
"step": 490
},
{
"epoch": 0.12685525815045035,
"grad_norm": 1.6729684650970384,
"learning_rate": 9.978381394330509e-06,
"loss": 1.3941,
"step": 500
},
{
"epoch": 0.12939236331345935,
"grad_norm": 1.6737204711157692,
"learning_rate": 9.974071209591507e-06,
"loss": 1.4083,
"step": 510
},
{
"epoch": 0.13192946847646836,
"grad_norm": 1.5846450496238496,
"learning_rate": 9.96937060540677e-06,
"loss": 1.3913,
"step": 520
},
{
"epoch": 0.13446657363947737,
"grad_norm": 1.6440175318683266,
"learning_rate": 9.964279950731066e-06,
"loss": 1.4141,
"step": 530
},
{
"epoch": 0.13700367880248637,
"grad_norm": 1.5435352480418292,
"learning_rate": 9.958799645134585e-06,
"loss": 1.3923,
"step": 540
},
{
"epoch": 0.13954078396549538,
"grad_norm": 1.6806917695478834,
"learning_rate": 9.952930118771576e-06,
"loss": 1.3882,
"step": 550
},
{
"epoch": 0.1420778891285044,
"grad_norm": 1.6991483906725386,
"learning_rate": 9.946671832346588e-06,
"loss": 1.3806,
"step": 560
},
{
"epoch": 0.1446149942915134,
"grad_norm": 1.6444779930069549,
"learning_rate": 9.940025277078304e-06,
"loss": 1.3877,
"step": 570
},
{
"epoch": 0.1471520994545224,
"grad_norm": 1.584958906864304,
"learning_rate": 9.932990974660992e-06,
"loss": 1.3758,
"step": 580
},
{
"epoch": 0.1496892046175314,
"grad_norm": 1.6339337045637337,
"learning_rate": 9.925569477223549e-06,
"loss": 1.3942,
"step": 590
},
{
"epoch": 0.15222630978054041,
"grad_norm": 1.6782688039697937,
"learning_rate": 9.917761367286164e-06,
"loss": 1.3997,
"step": 600
},
{
"epoch": 0.15476341494354942,
"grad_norm": 1.722985917307532,
"learning_rate": 9.909567257714605e-06,
"loss": 1.3902,
"step": 610
},
{
"epoch": 0.15730052010655843,
"grad_norm": 1.515629790408513,
"learning_rate": 9.9009877916721e-06,
"loss": 1.3906,
"step": 620
},
{
"epoch": 0.15983762526956743,
"grad_norm": 1.6832684084973726,
"learning_rate": 9.892023642568871e-06,
"loss": 1.3644,
"step": 630
},
{
"epoch": 0.16237473043257644,
"grad_norm": 1.7217857604177804,
"learning_rate": 9.882675514009262e-06,
"loss": 1.3673,
"step": 640
},
{
"epoch": 0.16491183559558545,
"grad_norm": 1.772784930329774,
"learning_rate": 9.872944139736523e-06,
"loss": 1.3751,
"step": 650
},
{
"epoch": 0.16744894075859446,
"grad_norm": 1.528731449667675,
"learning_rate": 9.862830283575215e-06,
"loss": 1.3678,
"step": 660
},
{
"epoch": 0.16998604592160346,
"grad_norm": 1.6156407009731812,
"learning_rate": 9.852334739371252e-06,
"loss": 1.3825,
"step": 670
},
{
"epoch": 0.17252315108461247,
"grad_norm": 1.6415774929326135,
"learning_rate": 9.841458330929598e-06,
"loss": 1.3884,
"step": 680
},
{
"epoch": 0.17506025624762148,
"grad_norm": 1.6070221223746397,
"learning_rate": 9.830201911949604e-06,
"loss": 1.3934,
"step": 690
},
{
"epoch": 0.17759736141063048,
"grad_norm": 1.562454926578275,
"learning_rate": 9.818566365957996e-06,
"loss": 1.3645,
"step": 700
},
{
"epoch": 0.1801344665736395,
"grad_norm": 1.5996614008577792,
"learning_rate": 9.80655260623953e-06,
"loss": 1.3708,
"step": 710
},
{
"epoch": 0.1826715717366485,
"grad_norm": 1.5048794279696338,
"learning_rate": 9.794161575765311e-06,
"loss": 1.3749,
"step": 720
},
{
"epoch": 0.1852086768996575,
"grad_norm": 1.5935516523984996,
"learning_rate": 9.78139424711877e-06,
"loss": 1.3886,
"step": 730
},
{
"epoch": 0.1877457820626665,
"grad_norm": 1.5282445861981415,
"learning_rate": 9.76825162241933e-06,
"loss": 1.373,
"step": 740
},
{
"epoch": 0.19028288722567552,
"grad_norm": 1.6358363561782086,
"learning_rate": 9.754734733243749e-06,
"loss": 1.3742,
"step": 750
},
{
"epoch": 0.19281999238868452,
"grad_norm": 1.587601691095452,
"learning_rate": 9.740844640545151e-06,
"loss": 1.3603,
"step": 760
},
{
"epoch": 0.19535709755169353,
"grad_norm": 1.5280980736395107,
"learning_rate": 9.726582434569744e-06,
"loss": 1.3636,
"step": 770
},
{
"epoch": 0.1978942027147025,
"grad_norm": 1.5821827900533842,
"learning_rate": 9.711949234771258e-06,
"loss": 1.3536,
"step": 780
},
{
"epoch": 0.20043130787771152,
"grad_norm": 1.6354511380141648,
"learning_rate": 9.696946189723067e-06,
"loss": 1.3777,
"step": 790
},
{
"epoch": 0.20296841304072052,
"grad_norm": 1.4961728532518945,
"learning_rate": 9.681574477028039e-06,
"loss": 1.3555,
"step": 800
},
{
"epoch": 0.20550551820372953,
"grad_norm": 1.6165039622149184,
"learning_rate": 9.66583530322611e-06,
"loss": 1.3736,
"step": 810
},
{
"epoch": 0.20804262336673854,
"grad_norm": 1.6085071421673924,
"learning_rate": 9.649729903699575e-06,
"loss": 1.3685,
"step": 820
},
{
"epoch": 0.21057972852974755,
"grad_norm": 1.674024959469941,
"learning_rate": 9.633259542576127e-06,
"loss": 1.3516,
"step": 830
},
{
"epoch": 0.21311683369275655,
"grad_norm": 1.5457350813422102,
"learning_rate": 9.61642551262963e-06,
"loss": 1.3433,
"step": 840
},
{
"epoch": 0.21565393885576556,
"grad_norm": 1.5638603110160445,
"learning_rate": 9.599229135178651e-06,
"loss": 1.3596,
"step": 850
},
{
"epoch": 0.21819104401877457,
"grad_norm": 1.5845605847496684,
"learning_rate": 9.581671759982747e-06,
"loss": 1.3821,
"step": 860
},
{
"epoch": 0.22072814918178357,
"grad_norm": 1.56321358587459,
"learning_rate": 9.563754765136522e-06,
"loss": 1.3568,
"step": 870
},
{
"epoch": 0.22326525434479258,
"grad_norm": 1.621089242536098,
"learning_rate": 9.545479556961457e-06,
"loss": 1.3614,
"step": 880
},
{
"epoch": 0.2258023595078016,
"grad_norm": 1.5746598904044478,
"learning_rate": 9.526847569895529e-06,
"loss": 1.3536,
"step": 890
},
{
"epoch": 0.2283394646708106,
"grad_norm": 1.632144120218129,
"learning_rate": 9.507860266380625e-06,
"loss": 1.3521,
"step": 900
},
{
"epoch": 0.2308765698338196,
"grad_norm": 1.6660492103415234,
"learning_rate": 9.488519136747741e-06,
"loss": 1.3455,
"step": 910
},
{
"epoch": 0.2334136749968286,
"grad_norm": 1.6307872469664786,
"learning_rate": 9.468825699100013e-06,
"loss": 1.3388,
"step": 920
},
{
"epoch": 0.23595078015983761,
"grad_norm": 1.51760811186189,
"learning_rate": 9.448781499193563e-06,
"loss": 1.36,
"step": 930
},
{
"epoch": 0.23848788532284662,
"grad_norm": 1.6298958079571104,
"learning_rate": 9.428388110316165e-06,
"loss": 1.346,
"step": 940
},
{
"epoch": 0.24102499048585563,
"grad_norm": 1.6241168589647443,
"learning_rate": 9.407647133163754e-06,
"loss": 1.3565,
"step": 950
},
{
"epoch": 0.24356209564886463,
"grad_norm": 1.6330870068463266,
"learning_rate": 9.386560195714796e-06,
"loss": 1.3539,
"step": 960
},
{
"epoch": 0.24609920081187364,
"grad_norm": 1.5846187793083721,
"learning_rate": 9.365128953102495e-06,
"loss": 1.3443,
"step": 970
},
{
"epoch": 0.24863630597488265,
"grad_norm": 1.5816319458789425,
"learning_rate": 9.343355087484893e-06,
"loss": 1.3449,
"step": 980
},
{
"epoch": 0.25117341113789166,
"grad_norm": 1.608667389007063,
"learning_rate": 9.321240307912818e-06,
"loss": 1.3503,
"step": 990
},
{
"epoch": 0.2537105163009007,
"grad_norm": 1.535736158897923,
"learning_rate": 9.298786350195758e-06,
"loss": 1.3504,
"step": 1000
},
{
"epoch": 0.25624762146390967,
"grad_norm": 1.6105502703548435,
"learning_rate": 9.275994976765602e-06,
"loss": 1.3512,
"step": 1010
},
{
"epoch": 0.2587847266269187,
"grad_norm": 1.457234439212148,
"learning_rate": 9.252867976538312e-06,
"loss": 1.3447,
"step": 1020
},
{
"epoch": 0.2613218317899277,
"grad_norm": 1.632312084639862,
"learning_rate": 9.22940716477351e-06,
"loss": 1.3451,
"step": 1030
},
{
"epoch": 0.2638589369529367,
"grad_norm": 1.5657163405769847,
"learning_rate": 9.205614382931986e-06,
"loss": 1.3678,
"step": 1040
},
{
"epoch": 0.2663960421159457,
"grad_norm": 1.523325498659843,
"learning_rate": 9.181491498531179e-06,
"loss": 1.355,
"step": 1050
},
{
"epoch": 0.26893314727895473,
"grad_norm": 1.5647021825494114,
"learning_rate": 9.157040404998572e-06,
"loss": 1.3455,
"step": 1060
},
{
"epoch": 0.2714702524419637,
"grad_norm": 1.581907280598391,
"learning_rate": 9.132263021523096e-06,
"loss": 1.353,
"step": 1070
},
{
"epoch": 0.27400735760497275,
"grad_norm": 1.4861566014453274,
"learning_rate": 9.107161292904476e-06,
"loss": 1.3428,
"step": 1080
},
{
"epoch": 0.2765444627679817,
"grad_norm": 1.6256210181495103,
"learning_rate": 9.081737189400583e-06,
"loss": 1.3421,
"step": 1090
},
{
"epoch": 0.27908156793099076,
"grad_norm": 1.4876360574590954,
"learning_rate": 9.0559927065728e-06,
"loss": 1.3377,
"step": 1100
},
{
"epoch": 0.28161867309399974,
"grad_norm": 1.5204275847901962,
"learning_rate": 9.029929865129375e-06,
"loss": 1.349,
"step": 1110
},
{
"epoch": 0.2841557782570088,
"grad_norm": 1.5624633516357405,
"learning_rate": 9.003550710766813e-06,
"loss": 1.3552,
"step": 1120
},
{
"epoch": 0.28669288342001775,
"grad_norm": 1.5591567540085947,
"learning_rate": 8.97685731400932e-06,
"loss": 1.3209,
"step": 1130
},
{
"epoch": 0.2892299885830268,
"grad_norm": 1.5373317845285133,
"learning_rate": 8.949851770046272e-06,
"loss": 1.3267,
"step": 1140
},
{
"epoch": 0.29176709374603577,
"grad_norm": 1.5556061129094692,
"learning_rate": 8.922536198567772e-06,
"loss": 1.3379,
"step": 1150
},
{
"epoch": 0.2943041989090448,
"grad_norm": 1.971486780664198,
"learning_rate": 8.894912743598269e-06,
"loss": 1.3272,
"step": 1160
},
{
"epoch": 0.2968413040720538,
"grad_norm": 1.5365700226491938,
"learning_rate": 8.866983573328267e-06,
"loss": 1.333,
"step": 1170
},
{
"epoch": 0.2993784092350628,
"grad_norm": 1.6217713070921793,
"learning_rate": 8.83875087994415e-06,
"loss": 1.3497,
"step": 1180
},
{
"epoch": 0.3019155143980718,
"grad_norm": 1.4917017043884344,
"learning_rate": 8.810216879456114e-06,
"loss": 1.3355,
"step": 1190
},
{
"epoch": 0.30445261956108083,
"grad_norm": 1.5427563058731948,
"learning_rate": 8.781383811524222e-06,
"loss": 1.3339,
"step": 1200
},
{
"epoch": 0.3069897247240898,
"grad_norm": 1.5666645778409243,
"learning_rate": 8.752253939282622e-06,
"loss": 1.332,
"step": 1210
},
{
"epoch": 0.30952682988709884,
"grad_norm": 1.5940427272465527,
"learning_rate": 8.722829549161904e-06,
"loss": 1.3411,
"step": 1220
},
{
"epoch": 0.3120639350501078,
"grad_norm": 1.569355522659196,
"learning_rate": 8.69311295070964e-06,
"loss": 1.321,
"step": 1230
},
{
"epoch": 0.31460104021311686,
"grad_norm": 1.5823744419831982,
"learning_rate": 8.663106476409107e-06,
"loss": 1.3511,
"step": 1240
},
{
"epoch": 0.31713814537612584,
"grad_norm": 1.5626340370876246,
"learning_rate": 8.632812481496195e-06,
"loss": 1.3491,
"step": 1250
},
{
"epoch": 0.31967525053913487,
"grad_norm": 1.6216546055767536,
"learning_rate": 8.602233343774562e-06,
"loss": 1.3294,
"step": 1260
},
{
"epoch": 0.32221235570214385,
"grad_norm": 1.4885399811487754,
"learning_rate": 8.571371463428986e-06,
"loss": 1.3419,
"step": 1270
},
{
"epoch": 0.3247494608651529,
"grad_norm": 1.597124872589071,
"learning_rate": 8.540229262836974e-06,
"loss": 1.3245,
"step": 1280
},
{
"epoch": 0.32728656602816186,
"grad_norm": 1.5069638761813242,
"learning_rate": 8.508809186378631e-06,
"loss": 1.3357,
"step": 1290
},
{
"epoch": 0.3298236711911709,
"grad_norm": 1.5496475251999724,
"learning_rate": 8.477113700244788e-06,
"loss": 1.3297,
"step": 1300
},
{
"epoch": 0.3323607763541799,
"grad_norm": 1.5177410295586948,
"learning_rate": 8.445145292243446e-06,
"loss": 1.3361,
"step": 1310
},
{
"epoch": 0.3348978815171889,
"grad_norm": 1.4375424317665,
"learning_rate": 8.412906471604489e-06,
"loss": 1.3365,
"step": 1320
},
{
"epoch": 0.3374349866801979,
"grad_norm": 1.4733958562961815,
"learning_rate": 8.380399768782742e-06,
"loss": 1.3364,
"step": 1330
},
{
"epoch": 0.3399720918432069,
"grad_norm": 1.5665888162471464,
"learning_rate": 8.347627735259344e-06,
"loss": 1.3572,
"step": 1340
},
{
"epoch": 0.3425091970062159,
"grad_norm": 1.5175787042273947,
"learning_rate": 8.314592943341494e-06,
"loss": 1.311,
"step": 1350
},
{
"epoch": 0.34504630216922494,
"grad_norm": 1.5210307965368668,
"learning_rate": 8.281297985960538e-06,
"loss": 1.3261,
"step": 1360
},
{
"epoch": 0.3475834073322339,
"grad_norm": 1.5365431443148119,
"learning_rate": 8.247745476468449e-06,
"loss": 1.3433,
"step": 1370
},
{
"epoch": 0.35012051249524295,
"grad_norm": 1.5548012069585933,
"learning_rate": 8.213938048432697e-06,
"loss": 1.3134,
"step": 1380
},
{
"epoch": 0.35265761765825193,
"grad_norm": 1.4642811591908687,
"learning_rate": 8.179878355429556e-06,
"loss": 1.3159,
"step": 1390
},
{
"epoch": 0.35519472282126097,
"grad_norm": 1.6713134353309254,
"learning_rate": 8.145569070835799e-06,
"loss": 1.3285,
"step": 1400
},
{
"epoch": 0.35773182798426995,
"grad_norm": 1.5444628338197106,
"learning_rate": 8.111012887618882e-06,
"loss": 1.344,
"step": 1410
},
{
"epoch": 0.360268933147279,
"grad_norm": 1.5042040298049457,
"learning_rate": 8.076212518125556e-06,
"loss": 1.3217,
"step": 1420
},
{
"epoch": 0.36280603831028796,
"grad_norm": 1.5827643194628298,
"learning_rate": 8.041170693868985e-06,
"loss": 1.3284,
"step": 1430
},
{
"epoch": 0.365343143473297,
"grad_norm": 1.4314485322723574,
"learning_rate": 8.005890165314334e-06,
"loss": 1.3188,
"step": 1440
},
{
"epoch": 0.367880248636306,
"grad_norm": 1.5452457890288078,
"learning_rate": 7.970373701662892e-06,
"loss": 1.3123,
"step": 1450
},
{
"epoch": 0.370417353799315,
"grad_norm": 1.5944938106930338,
"learning_rate": 7.934624090634713e-06,
"loss": 1.3131,
"step": 1460
},
{
"epoch": 0.372954458962324,
"grad_norm": 1.5553727991379855,
"learning_rate": 7.8986441382498e-06,
"loss": 1.3318,
"step": 1470
},
{
"epoch": 0.375491564125333,
"grad_norm": 1.5196578480754726,
"learning_rate": 7.862436668607865e-06,
"loss": 1.3164,
"step": 1480
},
{
"epoch": 0.378028669288342,
"grad_norm": 1.5354385242535227,
"learning_rate": 7.826004523666661e-06,
"loss": 1.3292,
"step": 1490
},
{
"epoch": 0.38056577445135104,
"grad_norm": 1.5449910825994637,
"learning_rate": 7.78935056301891e-06,
"loss": 1.3272,
"step": 1500
},
{
"epoch": 0.38310287961436,
"grad_norm": 1.4946907973724173,
"learning_rate": 7.752477663667854e-06,
"loss": 1.3391,
"step": 1510
},
{
"epoch": 0.38563998477736905,
"grad_norm": 1.5791940161814702,
"learning_rate": 7.715388719801437e-06,
"loss": 1.3392,
"step": 1520
},
{
"epoch": 0.38817708994037803,
"grad_norm": 1.4567702862839176,
"learning_rate": 7.67808664256514e-06,
"loss": 1.2971,
"step": 1530
},
{
"epoch": 0.39071419510338706,
"grad_norm": 1.4605769814867744,
"learning_rate": 7.640574359833472e-06,
"loss": 1.3148,
"step": 1540
},
{
"epoch": 0.39325130026639604,
"grad_norm": 1.5566796816874888,
"learning_rate": 7.6028548159801685e-06,
"loss": 1.3315,
"step": 1550
},
{
"epoch": 0.395788405429405,
"grad_norm": 1.5768032029757384,
"learning_rate": 7.564930971647087e-06,
"loss": 1.3238,
"step": 1560
},
{
"epoch": 0.39832551059241406,
"grad_norm": 1.5702550171255043,
"learning_rate": 7.52680580351181e-06,
"loss": 1.3175,
"step": 1570
},
{
"epoch": 0.40086261575542304,
"grad_norm": 1.5687466837527182,
"learning_rate": 7.488482304054019e-06,
"loss": 1.3104,
"step": 1580
},
{
"epoch": 0.40339972091843207,
"grad_norm": 1.5897550883645912,
"learning_rate": 7.449963481320599e-06,
"loss": 1.316,
"step": 1590
},
{
"epoch": 0.40593682608144105,
"grad_norm": 1.5236147067965886,
"learning_rate": 7.411252358689541e-06,
"loss": 1.3273,
"step": 1600
},
{
"epoch": 0.4084739312444501,
"grad_norm": 1.5469446528938424,
"learning_rate": 7.372351974632634e-06,
"loss": 1.3119,
"step": 1610
},
{
"epoch": 0.41101103640745906,
"grad_norm": 1.4722026799112722,
"learning_rate": 7.333265382476971e-06,
"loss": 1.3151,
"step": 1620
},
{
"epoch": 0.4135481415704681,
"grad_norm": 1.5178886141824586,
"learning_rate": 7.293995650165287e-06,
"loss": 1.3245,
"step": 1630
},
{
"epoch": 0.4160852467334771,
"grad_norm": 1.5308435376939995,
"learning_rate": 7.2545458600151615e-06,
"loss": 1.3317,
"step": 1640
},
{
"epoch": 0.4186223518964861,
"grad_norm": 1.5091424984828243,
"learning_rate": 7.214919108477077e-06,
"loss": 1.3044,
"step": 1650
},
{
"epoch": 0.4211594570594951,
"grad_norm": 1.457202507709852,
"learning_rate": 7.175118505891385e-06,
"loss": 1.3339,
"step": 1660
},
{
"epoch": 0.4236965622225041,
"grad_norm": 1.530896247556501,
"learning_rate": 7.135147176244158e-06,
"loss": 1.3044,
"step": 1670
},
{
"epoch": 0.4262336673855131,
"grad_norm": 1.5274463812149695,
"learning_rate": 7.0950082569219955e-06,
"loss": 1.3048,
"step": 1680
},
{
"epoch": 0.42877077254852214,
"grad_norm": 1.507428973101804,
"learning_rate": 7.054704898465772e-06,
"loss": 1.3069,
"step": 1690
},
{
"epoch": 0.4313078777115311,
"grad_norm": 1.5716469315983397,
"learning_rate": 7.0142402643233346e-06,
"loss": 1.3136,
"step": 1700
},
{
"epoch": 0.43384498287454015,
"grad_norm": 1.4220881687524514,
"learning_rate": 6.973617530601209e-06,
"loss": 1.3165,
"step": 1710
},
{
"epoch": 0.43638208803754913,
"grad_norm": 1.5926945403384438,
"learning_rate": 6.932839885815304e-06,
"loss": 1.3301,
"step": 1720
},
{
"epoch": 0.43891919320055817,
"grad_norm": 1.4527595611730801,
"learning_rate": 6.891910530640642e-06,
"loss": 1.3145,
"step": 1730
},
{
"epoch": 0.44145629836356715,
"grad_norm": 1.5069254389998272,
"learning_rate": 6.850832677660134e-06,
"loss": 1.3139,
"step": 1740
},
{
"epoch": 0.4439934035265762,
"grad_norm": 1.4587280578384394,
"learning_rate": 6.809609551112419e-06,
"loss": 1.3085,
"step": 1750
},
{
"epoch": 0.44653050868958516,
"grad_norm": 1.5122830472595903,
"learning_rate": 6.768244386638793e-06,
"loss": 1.3158,
"step": 1760
},
{
"epoch": 0.4490676138525942,
"grad_norm": 1.4912245201929943,
"learning_rate": 6.726740431029243e-06,
"loss": 1.3167,
"step": 1770
},
{
"epoch": 0.4516047190156032,
"grad_norm": 1.5574941259720791,
"learning_rate": 6.685100941967596e-06,
"loss": 1.3118,
"step": 1780
},
{
"epoch": 0.4541418241786122,
"grad_norm": 1.4994130740882026,
"learning_rate": 6.643329187775827e-06,
"loss": 1.307,
"step": 1790
},
{
"epoch": 0.4566789293416212,
"grad_norm": 1.5791237950971593,
"learning_rate": 6.601428447157525e-06,
"loss": 1.3086,
"step": 1800
},
{
"epoch": 0.4592160345046302,
"grad_norm": 1.5319564794342408,
"learning_rate": 6.559402008940539e-06,
"loss": 1.3025,
"step": 1810
},
{
"epoch": 0.4617531396676392,
"grad_norm": 1.5560620624811086,
"learning_rate": 6.517253171818844e-06,
"loss": 1.3146,
"step": 1820
},
{
"epoch": 0.46429024483064824,
"grad_norm": 1.5762189341956727,
"learning_rate": 6.474985244093613e-06,
"loss": 1.307,
"step": 1830
},
{
"epoch": 0.4668273499936572,
"grad_norm": 1.568824162809672,
"learning_rate": 6.432601543413552e-06,
"loss": 1.2996,
"step": 1840
},
{
"epoch": 0.46936445515666625,
"grad_norm": 1.461712822890638,
"learning_rate": 6.390105396514497e-06,
"loss": 1.3013,
"step": 1850
},
{
"epoch": 0.47190156031967523,
"grad_norm": 1.4727912142252813,
"learning_rate": 6.347500138958285e-06,
"loss": 1.3086,
"step": 1860
},
{
"epoch": 0.47443866548268426,
"grad_norm": 1.4842630358439066,
"learning_rate": 6.304789114870953e-06,
"loss": 1.3121,
"step": 1870
},
{
"epoch": 0.47697577064569324,
"grad_norm": 1.5147058669468259,
"learning_rate": 6.261975676680252e-06,
"loss": 1.3109,
"step": 1880
},
{
"epoch": 0.4795128758087023,
"grad_norm": 1.5879467208142688,
"learning_rate": 6.219063184852509e-06,
"loss": 1.3057,
"step": 1890
},
{
"epoch": 0.48204998097171126,
"grad_norm": 1.4622817504218393,
"learning_rate": 6.176055007628859e-06,
"loss": 1.2978,
"step": 1900
},
{
"epoch": 0.4845870861347203,
"grad_norm": 1.4651555100721898,
"learning_rate": 6.132954520760882e-06,
"loss": 1.2936,
"step": 1910
},
{
"epoch": 0.48712419129772927,
"grad_norm": 1.4242680820832143,
"learning_rate": 6.089765107245616e-06,
"loss": 1.311,
"step": 1920
},
{
"epoch": 0.4896612964607383,
"grad_norm": 1.4510357489546541,
"learning_rate": 6.046490157060041e-06,
"loss": 1.2917,
"step": 1930
},
{
"epoch": 0.4921984016237473,
"grad_norm": 1.5389362630585735,
"learning_rate": 6.003133066894987e-06,
"loss": 1.3173,
"step": 1940
},
{
"epoch": 0.4947355067867563,
"grad_norm": 1.5597918071325416,
"learning_rate": 5.959697239888525e-06,
"loss": 1.2978,
"step": 1950
},
{
"epoch": 0.4972726119497653,
"grad_norm": 1.481163850939429,
"learning_rate": 5.916186085358858e-06,
"loss": 1.3125,
"step": 1960
},
{
"epoch": 0.49980971711277433,
"grad_norm": 1.5172196100773179,
"learning_rate": 5.872603018536713e-06,
"loss": 1.3035,
"step": 1970
},
{
"epoch": 0.5023468222757833,
"grad_norm": 1.520182324070576,
"learning_rate": 5.828951460297277e-06,
"loss": 1.2943,
"step": 1980
},
{
"epoch": 0.5048839274387923,
"grad_norm": 1.374020881318329,
"learning_rate": 5.785234836891697e-06,
"loss": 1.3019,
"step": 1990
},
{
"epoch": 0.5074210326018014,
"grad_norm": 1.609172422257604,
"learning_rate": 5.741456579678141e-06,
"loss": 1.2929,
"step": 2000
},
{
"epoch": 0.5099581377648104,
"grad_norm": 1.451921659432821,
"learning_rate": 5.697620124852472e-06,
"loss": 1.2868,
"step": 2010
},
{
"epoch": 0.5124952429278193,
"grad_norm": 1.531522896512812,
"learning_rate": 5.65372891317854e-06,
"loss": 1.2875,
"step": 2020
},
{
"epoch": 0.5150323480908283,
"grad_norm": 1.443649652350418,
"learning_rate": 5.6097863897181075e-06,
"loss": 1.2963,
"step": 2030
},
{
"epoch": 0.5175694532538374,
"grad_norm": 1.5591743411035264,
"learning_rate": 5.565796003560447e-06,
"loss": 1.3121,
"step": 2040
},
{
"epoch": 0.5201065584168464,
"grad_norm": 1.428229068798765,
"learning_rate": 5.521761207551622e-06,
"loss": 1.2979,
"step": 2050
},
{
"epoch": 0.5226436635798554,
"grad_norm": 1.5164415865949983,
"learning_rate": 5.47768545802346e-06,
"loss": 1.3107,
"step": 2060
},
{
"epoch": 0.5251807687428643,
"grad_norm": 1.5292361648846982,
"learning_rate": 5.433572214522275e-06,
"loss": 1.2952,
"step": 2070
},
{
"epoch": 0.5277178739058734,
"grad_norm": 1.4451039662214231,
"learning_rate": 5.389424939537311e-06,
"loss": 1.2922,
"step": 2080
},
{
"epoch": 0.5302549790688824,
"grad_norm": 1.558654012548035,
"learning_rate": 5.345247098228977e-06,
"loss": 1.2942,
"step": 2090
},
{
"epoch": 0.5327920842318914,
"grad_norm": 1.5393309134302235,
"learning_rate": 5.301042158156866e-06,
"loss": 1.2898,
"step": 2100
},
{
"epoch": 0.5353291893949004,
"grad_norm": 1.5206662969722375,
"learning_rate": 5.256813589007571e-06,
"loss": 1.2967,
"step": 2110
},
{
"epoch": 0.5378662945579095,
"grad_norm": 1.5295277898061372,
"learning_rate": 5.212564862322355e-06,
"loss": 1.2987,
"step": 2120
},
{
"epoch": 0.5404033997209184,
"grad_norm": 1.5121887795702076,
"learning_rate": 5.168299451224665e-06,
"loss": 1.2859,
"step": 2130
},
{
"epoch": 0.5429405048839274,
"grad_norm": 1.5405224763949017,
"learning_rate": 5.124020830147525e-06,
"loss": 1.2942,
"step": 2140
},
{
"epoch": 0.5454776100469364,
"grad_norm": 1.5241647102261355,
"learning_rate": 5.079732474560821e-06,
"loss": 1.2967,
"step": 2150
},
{
"epoch": 0.5480147152099455,
"grad_norm": 1.5740459163455902,
"learning_rate": 5.035437860698508e-06,
"loss": 1.2792,
"step": 2160
},
{
"epoch": 0.5505518203729545,
"grad_norm": 1.455514390960437,
"learning_rate": 4.991140465285762e-06,
"loss": 1.2722,
"step": 2170
},
{
"epoch": 0.5530889255359634,
"grad_norm": 1.4543563727275153,
"learning_rate": 4.94684376526608e-06,
"loss": 1.294,
"step": 2180
},
{
"epoch": 0.5556260306989724,
"grad_norm": 1.415880887469612,
"learning_rate": 4.902551237528387e-06,
"loss": 1.2898,
"step": 2190
},
{
"epoch": 0.5581631358619815,
"grad_norm": 1.5027054686198038,
"learning_rate": 4.858266358634109e-06,
"loss": 1.2943,
"step": 2200
},
{
"epoch": 0.5607002410249905,
"grad_norm": 1.495415983271707,
"learning_rate": 4.813992604544319e-06,
"loss": 1.309,
"step": 2210
},
{
"epoch": 0.5632373461879995,
"grad_norm": 1.5256997169566149,
"learning_rate": 4.769733450346885e-06,
"loss": 1.2941,
"step": 2220
},
{
"epoch": 0.5657744513510085,
"grad_norm": 1.4207029137255274,
"learning_rate": 4.725492369983721e-06,
"loss": 1.2808,
"step": 2230
},
{
"epoch": 0.5683115565140175,
"grad_norm": 1.5127789303300487,
"learning_rate": 4.6812728359781064e-06,
"loss": 1.2886,
"step": 2240
},
{
"epoch": 0.5708486616770265,
"grad_norm": 1.4480660719145084,
"learning_rate": 4.637078319162127e-06,
"loss": 1.2848,
"step": 2250
},
{
"epoch": 0.5733857668400355,
"grad_norm": 1.4818074524822986,
"learning_rate": 4.592912288404251e-06,
"loss": 1.2747,
"step": 2260
},
{
"epoch": 0.5759228720030445,
"grad_norm": 1.496021447098999,
"learning_rate": 4.5487782103370445e-06,
"loss": 1.2889,
"step": 2270
},
{
"epoch": 0.5784599771660536,
"grad_norm": 1.4726400774082267,
"learning_rate": 4.504679549085077e-06,
"loss": 1.2956,
"step": 2280
},
{
"epoch": 0.5809970823290626,
"grad_norm": 1.492109044123467,
"learning_rate": 4.460619765993025e-06,
"loss": 1.2974,
"step": 2290
},
{
"epoch": 0.5835341874920715,
"grad_norm": 1.4567515467141523,
"learning_rate": 4.416602319353974e-06,
"loss": 1.29,
"step": 2300
},
{
"epoch": 0.5860712926550805,
"grad_norm": 1.460535915347314,
"learning_rate": 4.3726306641379915e-06,
"loss": 1.2745,
"step": 2310
},
{
"epoch": 0.5886083978180896,
"grad_norm": 1.4651576736560898,
"learning_rate": 4.328708251720924e-06,
"loss": 1.2739,
"step": 2320
},
{
"epoch": 0.5911455029810986,
"grad_norm": 1.6196158147206026,
"learning_rate": 4.2848385296135165e-06,
"loss": 1.3101,
"step": 2330
},
{
"epoch": 0.5936826081441076,
"grad_norm": 1.527439804056797,
"learning_rate": 4.241024941190792e-06,
"loss": 1.2771,
"step": 2340
},
{
"epoch": 0.5962197133071165,
"grad_norm": 1.4872645401772542,
"learning_rate": 4.197270925421796e-06,
"loss": 1.2877,
"step": 2350
},
{
"epoch": 0.5987568184701256,
"grad_norm": 1.4908027336325684,
"learning_rate": 4.153579916599659e-06,
"loss": 1.2969,
"step": 2360
},
{
"epoch": 0.6012939236331346,
"grad_norm": 1.370441167203172,
"learning_rate": 4.109955344072036e-06,
"loss": 1.2745,
"step": 2370
},
{
"epoch": 0.6038310287961436,
"grad_norm": 1.457801692594122,
"learning_rate": 4.066400631971938e-06,
"loss": 1.2714,
"step": 2380
},
{
"epoch": 0.6063681339591526,
"grad_norm": 1.5047248748403204,
"learning_rate": 4.022919198948966e-06,
"loss": 1.2759,
"step": 2390
},
{
"epoch": 0.6089052391221617,
"grad_norm": 1.5232259549425642,
"learning_rate": 3.979514457900982e-06,
"loss": 1.2845,
"step": 2400
},
{
"epoch": 0.6114423442851706,
"grad_norm": 1.4170452963382303,
"learning_rate": 3.936189815706219e-06,
"loss": 1.2833,
"step": 2410
},
{
"epoch": 0.6139794494481796,
"grad_norm": 1.5010818180720833,
"learning_rate": 3.8929486729558775e-06,
"loss": 1.2941,
"step": 2420
},
{
"epoch": 0.6165165546111886,
"grad_norm": 1.4420347497785075,
"learning_rate": 3.849794423687212e-06,
"loss": 1.2775,
"step": 2430
},
{
"epoch": 0.6190536597741977,
"grad_norm": 1.520468191298721,
"learning_rate": 3.8067304551171247e-06,
"loss": 1.2627,
"step": 2440
},
{
"epoch": 0.6215907649372067,
"grad_norm": 1.4753704862458017,
"learning_rate": 3.7637601473763035e-06,
"loss": 1.284,
"step": 2450
},
{
"epoch": 0.6241278701002156,
"grad_norm": 1.469877746697786,
"learning_rate": 3.7208868732439145e-06,
"loss": 1.2927,
"step": 2460
},
{
"epoch": 0.6266649752632246,
"grad_norm": 1.4601548141707599,
"learning_rate": 3.6781139978828606e-06,
"loss": 1.2947,
"step": 2470
},
{
"epoch": 0.6292020804262337,
"grad_norm": 1.5092438879342172,
"learning_rate": 3.6354448785756558e-06,
"loss": 1.2843,
"step": 2480
},
{
"epoch": 0.6317391855892427,
"grad_norm": 1.4368007055488876,
"learning_rate": 3.592882864460905e-06,
"loss": 1.265,
"step": 2490
},
{
"epoch": 0.6342762907522517,
"grad_norm": 1.4672055312297339,
"learning_rate": 3.5504312962704245e-06,
"loss": 1.2709,
"step": 2500
},
{
"epoch": 0.6368133959152606,
"grad_norm": 1.4995451462382032,
"learning_rate": 3.5080935060670345e-06,
"loss": 1.2679,
"step": 2510
},
{
"epoch": 0.6393505010782697,
"grad_norm": 1.458116276283539,
"learning_rate": 3.465872816983008e-06,
"loss": 1.2821,
"step": 2520
},
{
"epoch": 0.6418876062412787,
"grad_norm": 1.4447640379158275,
"learning_rate": 3.4237725429592507e-06,
"loss": 1.2865,
"step": 2530
},
{
"epoch": 0.6444247114042877,
"grad_norm": 1.3965736731366891,
"learning_rate": 3.3817959884851735e-06,
"loss": 1.2698,
"step": 2540
},
{
"epoch": 0.6469618165672967,
"grad_norm": 1.4648194884238146,
"learning_rate": 3.3399464483393272e-06,
"loss": 1.291,
"step": 2550
},
{
"epoch": 0.6494989217303058,
"grad_norm": 1.4271493727093771,
"learning_rate": 3.298227207330792e-06,
"loss": 1.2765,
"step": 2560
},
{
"epoch": 0.6520360268933147,
"grad_norm": 1.5962462881292958,
"learning_rate": 3.256641540041346e-06,
"loss": 1.2905,
"step": 2570
},
{
"epoch": 0.6545731320563237,
"grad_norm": 1.4501719681830862,
"learning_rate": 3.2151927105684423e-06,
"loss": 1.298,
"step": 2580
},
{
"epoch": 0.6571102372193327,
"grad_norm": 1.5186349976521718,
"learning_rate": 3.1738839722690085e-06,
"loss": 1.2742,
"step": 2590
},
{
"epoch": 0.6596473423823418,
"grad_norm": 1.3901740398219145,
"learning_rate": 3.1327185675040907e-06,
"loss": 1.2769,
"step": 2600
},
{
"epoch": 0.6621844475453508,
"grad_norm": 1.4618375024699428,
"learning_rate": 3.0916997273843454e-06,
"loss": 1.2938,
"step": 2610
},
{
"epoch": 0.6647215527083598,
"grad_norm": 1.4675982361039484,
"learning_rate": 3.0508306715164416e-06,
"loss": 1.2913,
"step": 2620
},
{
"epoch": 0.6672586578713687,
"grad_norm": 1.5086185778550512,
"learning_rate": 3.0101146077503386e-06,
"loss": 1.2777,
"step": 2630
},
{
"epoch": 0.6697957630343778,
"grad_norm": 1.4573487737483761,
"learning_rate": 2.9695547319275093e-06,
"loss": 1.2633,
"step": 2640
},
{
"epoch": 0.6723328681973868,
"grad_norm": 1.43323809832072,
"learning_rate": 2.9291542276300866e-06,
"loss": 1.289,
"step": 2650
},
{
"epoch": 0.6748699733603958,
"grad_norm": 1.420082813628849,
"learning_rate": 2.8889162659309832e-06,
"loss": 1.2729,
"step": 2660
},
{
"epoch": 0.6774070785234048,
"grad_norm": 1.424990219399345,
"learning_rate": 2.848844005145004e-06,
"loss": 1.3024,
"step": 2670
},
{
"epoch": 0.6799441836864138,
"grad_norm": 1.4395745448115305,
"learning_rate": 2.808940590580922e-06,
"loss": 1.2845,
"step": 2680
},
{
"epoch": 0.6824812888494228,
"grad_norm": 1.4802086998925903,
"learning_rate": 2.769209154294623e-06,
"loss": 1.2844,
"step": 2690
},
{
"epoch": 0.6850183940124318,
"grad_norm": 1.491623196795251,
"learning_rate": 2.7296528148432565e-06,
"loss": 1.2683,
"step": 2700
},
{
"epoch": 0.6875554991754408,
"grad_norm": 1.416764375906272,
"learning_rate": 2.690274677040462e-06,
"loss": 1.2776,
"step": 2710
},
{
"epoch": 0.6900926043384499,
"grad_norm": 1.519033593874162,
"learning_rate": 2.6510778317126597e-06,
"loss": 1.2807,
"step": 2720
},
{
"epoch": 0.6926297095014589,
"grad_norm": 1.3894691132515595,
"learning_rate": 2.6120653554564624e-06,
"loss": 1.2777,
"step": 2730
},
{
"epoch": 0.6951668146644678,
"grad_norm": 1.4049713206074572,
"learning_rate": 2.573240310397187e-06,
"loss": 1.2736,
"step": 2740
},
{
"epoch": 0.6977039198274768,
"grad_norm": 1.4357642101900112,
"learning_rate": 2.5346057439484923e-06,
"loss": 1.2803,
"step": 2750
},
{
"epoch": 0.7002410249904859,
"grad_norm": 1.490167340198777,
"learning_rate": 2.4961646885732034e-06,
"loss": 1.2744,
"step": 2760
},
{
"epoch": 0.7027781301534949,
"grad_norm": 1.4179312953545702,
"learning_rate": 2.4579201615452812e-06,
"loss": 1.2842,
"step": 2770
},
{
"epoch": 0.7053152353165039,
"grad_norm": 1.6140649717523825,
"learning_rate": 2.4198751647129896e-06,
"loss": 1.2963,
"step": 2780
},
{
"epoch": 0.7078523404795128,
"grad_norm": 1.530468810042779,
"learning_rate": 2.3820326842632894e-06,
"loss": 1.2637,
"step": 2790
},
{
"epoch": 0.7103894456425219,
"grad_norm": 1.412588711043796,
"learning_rate": 2.344395690487441e-06,
"loss": 1.2856,
"step": 2800
},
{
"epoch": 0.7129265508055309,
"grad_norm": 1.5447254908338892,
"learning_rate": 2.3069671375478645e-06,
"loss": 1.2848,
"step": 2810
},
{
"epoch": 0.7154636559685399,
"grad_norm": 1.43691808431636,
"learning_rate": 2.2697499632462695e-06,
"loss": 1.2536,
"step": 2820
},
{
"epoch": 0.7180007611315489,
"grad_norm": 1.5560428170621574,
"learning_rate": 2.2327470887930595e-06,
"loss": 1.3015,
"step": 2830
},
{
"epoch": 0.720537866294558,
"grad_norm": 1.450374747082515,
"learning_rate": 2.195961418578041e-06,
"loss": 1.2744,
"step": 2840
},
{
"epoch": 0.7230749714575669,
"grad_norm": 1.484538648746269,
"learning_rate": 2.159395839942464e-06,
"loss": 1.2664,
"step": 2850
},
{
"epoch": 0.7256120766205759,
"grad_norm": 1.3953379506558543,
"learning_rate": 2.1230532229523865e-06,
"loss": 1.2489,
"step": 2860
},
{
"epoch": 0.7281491817835849,
"grad_norm": 1.4415654155573785,
"learning_rate": 2.086936420173399e-06,
"loss": 1.2719,
"step": 2870
},
{
"epoch": 0.730686286946594,
"grad_norm": 1.4271516629005172,
"learning_rate": 2.051048266446727e-06,
"loss": 1.2652,
"step": 2880
},
{
"epoch": 0.733223392109603,
"grad_norm": 1.4951992832082914,
"learning_rate": 2.0153915786667203e-06,
"loss": 1.26,
"step": 2890
},
{
"epoch": 0.735760497272612,
"grad_norm": 1.4351479751585414,
"learning_rate": 1.9799691555597555e-06,
"loss": 1.2881,
"step": 2900
},
{
"epoch": 0.7382976024356209,
"grad_norm": 1.474899241565124,
"learning_rate": 1.9447837774645513e-06,
"loss": 1.2702,
"step": 2910
},
{
"epoch": 0.74083470759863,
"grad_norm": 1.4426835070499822,
"learning_rate": 1.9098382061139503e-06,
"loss": 1.2699,
"step": 2920
},
{
"epoch": 0.743371812761639,
"grad_norm": 1.4876818985570295,
"learning_rate": 1.8751351844181414e-06,
"loss": 1.2612,
"step": 2930
},
{
"epoch": 0.745908917924648,
"grad_norm": 1.4360645410392319,
"learning_rate": 1.8406774362493662e-06,
"loss": 1.2754,
"step": 2940
},
{
"epoch": 0.748446023087657,
"grad_norm": 1.4473888665732064,
"learning_rate": 1.8064676662281206e-06,
"loss": 1.2902,
"step": 2950
},
{
"epoch": 0.750983128250666,
"grad_norm": 1.4434612838312966,
"learning_rate": 1.7725085595108682e-06,
"loss": 1.273,
"step": 2960
},
{
"epoch": 0.753520233413675,
"grad_norm": 1.558136105535075,
"learning_rate": 1.7388027815792725e-06,
"loss": 1.2787,
"step": 2970
},
{
"epoch": 0.756057338576684,
"grad_norm": 1.4724878594646564,
"learning_rate": 1.705352978030993e-06,
"loss": 1.2627,
"step": 2980
},
{
"epoch": 0.758594443739693,
"grad_norm": 1.4768497018650097,
"learning_rate": 1.672161774372022e-06,
"loss": 1.2911,
"step": 2990
},
{
"epoch": 0.7611315489027021,
"grad_norm": 1.4598692131173956,
"learning_rate": 1.639231775810602e-06,
"loss": 1.2907,
"step": 3000
},
{
"epoch": 0.763668654065711,
"grad_norm": 1.3971487709781405,
"learning_rate": 1.6065655670527546e-06,
"loss": 1.2632,
"step": 3010
},
{
"epoch": 0.76620575922872,
"grad_norm": 1.4196228285690422,
"learning_rate": 1.574165712099392e-06,
"loss": 1.2542,
"step": 3020
},
{
"epoch": 0.768742864391729,
"grad_norm": 1.4395590200787511,
"learning_rate": 1.542034754045067e-06,
"loss": 1.2693,
"step": 3030
},
{
"epoch": 0.7712799695547381,
"grad_norm": 1.4538143237649903,
"learning_rate": 1.5101752148783705e-06,
"loss": 1.2728,
"step": 3040
},
{
"epoch": 0.7738170747177471,
"grad_norm": 1.4483981816763403,
"learning_rate": 1.4785895952839735e-06,
"loss": 1.2671,
"step": 3050
},
{
"epoch": 0.7763541798807561,
"grad_norm": 1.5335192207213328,
"learning_rate": 1.447280374446346e-06,
"loss": 1.2778,
"step": 3060
},
{
"epoch": 0.778891285043765,
"grad_norm": 1.4504666284348766,
"learning_rate": 1.4162500098551608e-06,
"loss": 1.276,
"step": 3070
},
{
"epoch": 0.7814283902067741,
"grad_norm": 1.454412830474016,
"learning_rate": 1.385500937112415e-06,
"loss": 1.2804,
"step": 3080
},
{
"epoch": 0.7839654953697831,
"grad_norm": 1.462536001446098,
"learning_rate": 1.3550355697412386e-06,
"loss": 1.2586,
"step": 3090
},
{
"epoch": 0.7865026005327921,
"grad_norm": 1.4861860594882876,
"learning_rate": 1.3248562989964719e-06,
"loss": 1.2843,
"step": 3100
},
{
"epoch": 0.7890397056958011,
"grad_norm": 1.391241218546658,
"learning_rate": 1.2949654936769622e-06,
"loss": 1.2723,
"step": 3110
},
{
"epoch": 0.79157681085881,
"grad_norm": 1.412674356321388,
"learning_rate": 1.2653654999396436e-06,
"loss": 1.2621,
"step": 3120
},
{
"epoch": 0.7941139160218191,
"grad_norm": 1.406341007739084,
"learning_rate": 1.2360586411153747e-06,
"loss": 1.2897,
"step": 3130
},
{
"epoch": 0.7966510211848281,
"grad_norm": 1.4125498017483746,
"learning_rate": 1.2070472175265857e-06,
"loss": 1.2657,
"step": 3140
},
{
"epoch": 0.7991881263478371,
"grad_norm": 1.575395352386111,
"learning_rate": 1.1783335063067286e-06,
"loss": 1.2974,
"step": 3150
},
{
"epoch": 0.8017252315108461,
"grad_norm": 1.4632409776646316,
"learning_rate": 1.1499197612215269e-06,
"loss": 1.2914,
"step": 3160
},
{
"epoch": 0.8042623366738552,
"grad_norm": 1.420307782085356,
"learning_rate": 1.1218082124920903e-06,
"loss": 1.2583,
"step": 3170
},
{
"epoch": 0.8067994418368641,
"grad_norm": 1.4073518626370982,
"learning_rate": 1.0940010666198575e-06,
"loss": 1.2588,
"step": 3180
},
{
"epoch": 0.8093365469998731,
"grad_norm": 1.427712685491864,
"learning_rate": 1.0665005062134015e-06,
"loss": 1.2641,
"step": 3190
},
{
"epoch": 0.8118736521628821,
"grad_norm": 1.4042233353051128,
"learning_rate": 1.0393086898171234e-06,
"loss": 1.2623,
"step": 3200
},
{
"epoch": 0.8144107573258912,
"grad_norm": 1.4353551061219325,
"learning_rate": 1.0124277517418196e-06,
"loss": 1.2701,
"step": 3210
},
{
"epoch": 0.8169478624889002,
"grad_norm": 1.4714738106408498,
"learning_rate": 9.858598018971599e-07,
"loss": 1.2665,
"step": 3220
},
{
"epoch": 0.8194849676519091,
"grad_norm": 1.3867710691517015,
"learning_rate": 9.596069256260792e-07,
"loss": 1.2811,
"step": 3230
},
{
"epoch": 0.8220220728149181,
"grad_norm": 1.5556697110120234,
"learning_rate": 9.336711835410972e-07,
"loss": 1.2577,
"step": 3240
},
{
"epoch": 0.8245591779779272,
"grad_norm": 1.4677232808017586,
"learning_rate": 9.080546113625738e-07,
"loss": 1.2675,
"step": 3250
},
{
"epoch": 0.8270962831409362,
"grad_norm": 1.4009076826572764,
"learning_rate": 8.827592197589341e-07,
"loss": 1.2573,
"step": 3260
},
{
"epoch": 0.8296333883039452,
"grad_norm": 1.3584163410682717,
"learning_rate": 8.577869941888389e-07,
"loss": 1.2654,
"step": 3270
},
{
"epoch": 0.8321704934669542,
"grad_norm": 1.4763004523041792,
"learning_rate": 8.331398947453512e-07,
"loss": 1.271,
"step": 3280
},
{
"epoch": 0.8347075986299632,
"grad_norm": 1.432107775038367,
"learning_rate": 8.08819856002081e-07,
"loss": 1.2771,
"step": 3290
},
{
"epoch": 0.8372447037929722,
"grad_norm": 1.4636493219573536,
"learning_rate": 7.848287868613441e-07,
"loss": 1.2511,
"step": 3300
},
{
"epoch": 0.8397818089559812,
"grad_norm": 1.501456048501624,
"learning_rate": 7.611685704043281e-07,
"loss": 1.2724,
"step": 3310
},
{
"epoch": 0.8423189141189902,
"grad_norm": 1.4533194309629769,
"learning_rate": 7.378410637432848e-07,
"loss": 1.2761,
"step": 3320
},
{
"epoch": 0.8448560192819993,
"grad_norm": 1.4930040288043631,
"learning_rate": 7.148480978757694e-07,
"loss": 1.2808,
"step": 3330
},
{
"epoch": 0.8473931244450083,
"grad_norm": 1.4327631888495864,
"learning_rate": 6.921914775409211e-07,
"loss": 1.2764,
"step": 3340
},
{
"epoch": 0.8499302296080172,
"grad_norm": 1.4066505744498654,
"learning_rate": 6.698729810778065e-07,
"loss": 1.2724,
"step": 3350
},
{
"epoch": 0.8524673347710262,
"grad_norm": 1.4214214237910756,
"learning_rate": 6.478943602858373e-07,
"loss": 1.2703,
"step": 3360
},
{
"epoch": 0.8550044399340353,
"grad_norm": 1.4609797404161982,
"learning_rate": 6.262573402872707e-07,
"loss": 1.2702,
"step": 3370
},
{
"epoch": 0.8575415450970443,
"grad_norm": 1.4426076897314533,
"learning_rate": 6.04963619391799e-07,
"loss": 1.2652,
"step": 3380
},
{
"epoch": 0.8600786502600533,
"grad_norm": 1.4557782632700174,
"learning_rate": 5.840148689632536e-07,
"loss": 1.2628,
"step": 3390
},
{
"epoch": 0.8626157554230622,
"grad_norm": 1.4465949605683495,
"learning_rate": 5.634127332884143e-07,
"loss": 1.2649,
"step": 3400
},
{
"epoch": 0.8651528605860713,
"grad_norm": 1.4543385042228827,
"learning_rate": 5.431588294479479e-07,
"loss": 1.2863,
"step": 3410
},
{
"epoch": 0.8676899657490803,
"grad_norm": 1.4211990179028964,
"learning_rate": 5.232547471894839e-07,
"loss": 1.2603,
"step": 3420
},
{
"epoch": 0.8702270709120893,
"grad_norm": 1.4970252757505178,
"learning_rate": 5.037020488028322e-07,
"loss": 1.2659,
"step": 3430
},
{
"epoch": 0.8727641760750983,
"grad_norm": 1.476846612856639,
"learning_rate": 4.845022689973567e-07,
"loss": 1.2622,
"step": 3440
},
{
"epoch": 0.8753012812381074,
"grad_norm": 1.3975933791175674,
"learning_rate": 4.656569147815171e-07,
"loss": 1.2675,
"step": 3450
},
{
"epoch": 0.8778383864011163,
"grad_norm": 1.4066115466115592,
"learning_rate": 4.471674653445801e-07,
"loss": 1.2657,
"step": 3460
},
{
"epoch": 0.8803754915641253,
"grad_norm": 1.4093052515025426,
"learning_rate": 4.290353719405199e-07,
"loss": 1.2622,
"step": 3470
},
{
"epoch": 0.8829125967271343,
"grad_norm": 1.517434872609148,
"learning_rate": 4.1126205777410054e-07,
"loss": 1.2658,
"step": 3480
},
{
"epoch": 0.8854497018901434,
"grad_norm": 1.3671982309013966,
"learning_rate": 3.938489178891769e-07,
"loss": 1.26,
"step": 3490
},
{
"epoch": 0.8879868070531524,
"grad_norm": 1.3848746120915914,
"learning_rate": 3.767973190591906e-07,
"loss": 1.252,
"step": 3500
},
{
"epoch": 0.8905239122161613,
"grad_norm": 1.404335518132526,
"learning_rate": 3.6010859967988975e-07,
"loss": 1.2684,
"step": 3510
},
{
"epoch": 0.8930610173791703,
"grad_norm": 1.4696908362694405,
"learning_rate": 3.437840696642797e-07,
"loss": 1.28,
"step": 3520
},
{
"epoch": 0.8955981225421794,
"grad_norm": 1.4394982066957633,
"learning_rate": 3.2782501033980897e-07,
"loss": 1.2596,
"step": 3530
},
{
"epoch": 0.8981352277051884,
"grad_norm": 1.441827806292722,
"learning_rate": 3.1223267434778934e-07,
"loss": 1.2548,
"step": 3540
},
{
"epoch": 0.9006723328681974,
"grad_norm": 1.4029572337771223,
"learning_rate": 2.9700828554508175e-07,
"loss": 1.2714,
"step": 3550
},
{
"epoch": 0.9032094380312063,
"grad_norm": 1.456563644801128,
"learning_rate": 2.82153038908034e-07,
"loss": 1.271,
"step": 3560
},
{
"epoch": 0.9057465431942154,
"grad_norm": 1.4854658320433165,
"learning_rate": 2.6766810043867996e-07,
"loss": 1.2636,
"step": 3570
},
{
"epoch": 0.9082836483572244,
"grad_norm": 1.3976434876269141,
"learning_rate": 2.53554607073227e-07,
"loss": 1.2555,
"step": 3580
},
{
"epoch": 0.9108207535202334,
"grad_norm": 1.4520566750739115,
"learning_rate": 2.3981366659281135e-07,
"loss": 1.2741,
"step": 3590
},
{
"epoch": 0.9133578586832424,
"grad_norm": 1.431635462724296,
"learning_rate": 2.2644635753654832e-07,
"loss": 1.2641,
"step": 3600
},
{
"epoch": 0.9158949638462515,
"grad_norm": 1.4459695881350807,
"learning_rate": 2.1345372911687868e-07,
"loss": 1.2719,
"step": 3610
},
{
"epoch": 0.9184320690092604,
"grad_norm": 1.4523937909973577,
"learning_rate": 2.008368011372136e-07,
"loss": 1.2574,
"step": 3620
},
{
"epoch": 0.9209691741722694,
"grad_norm": 1.448000545062192,
"learning_rate": 1.8859656391188918e-07,
"loss": 1.2678,
"step": 3630
},
{
"epoch": 0.9235062793352784,
"grad_norm": 1.4574912967654818,
"learning_rate": 1.7673397818843696e-07,
"loss": 1.2631,
"step": 3640
},
{
"epoch": 0.9260433844982875,
"grad_norm": 1.469570266612804,
"learning_rate": 1.65249975072172e-07,
"loss": 1.2676,
"step": 3650
},
{
"epoch": 0.9285804896612965,
"grad_norm": 1.4705072480655184,
"learning_rate": 1.5414545595311193e-07,
"loss": 1.2363,
"step": 3660
},
{
"epoch": 0.9311175948243055,
"grad_norm": 1.469943074464241,
"learning_rate": 1.4342129243522241e-07,
"loss": 1.2716,
"step": 3670
},
{
"epoch": 0.9336546999873144,
"grad_norm": 1.4039691988667693,
"learning_rate": 1.3307832626800966e-07,
"loss": 1.2674,
"step": 3680
},
{
"epoch": 0.9361918051503235,
"grad_norm": 1.4357184274036978,
"learning_rate": 1.2311736928044437e-07,
"loss": 1.2662,
"step": 3690
},
{
"epoch": 0.9387289103133325,
"grad_norm": 1.4198942808558601,
"learning_rate": 1.1353920331724666e-07,
"loss": 1.2743,
"step": 3700
},
{
"epoch": 0.9412660154763415,
"grad_norm": 1.472183542066415,
"learning_rate": 1.0434458017751392e-07,
"loss": 1.2505,
"step": 3710
},
{
"epoch": 0.9438031206393505,
"grad_norm": 1.4530560276575668,
"learning_rate": 9.553422155571257e-08,
"loss": 1.2637,
"step": 3720
},
{
"epoch": 0.9463402258023595,
"grad_norm": 1.4342800198901315,
"learning_rate": 8.710881898503276e-08,
"loss": 1.2706,
"step": 3730
},
{
"epoch": 0.9488773309653685,
"grad_norm": 1.4769111711160674,
"learning_rate": 7.906903378310738e-08,
"loss": 1.2717,
"step": 3740
},
{
"epoch": 0.9514144361283775,
"grad_norm": 1.4434198544006103,
"learning_rate": 7.141549700010741e-08,
"loss": 1.2764,
"step": 3750
},
{
"epoch": 0.9539515412913865,
"grad_norm": 1.3967239883734883,
"learning_rate": 6.414880936920665e-08,
"loss": 1.2454,
"step": 3760
},
{
"epoch": 0.9564886464543956,
"grad_norm": 1.4745766429828837,
"learning_rate": 5.726954125943318e-08,
"loss": 1.2747,
"step": 3770
},
{
"epoch": 0.9590257516174046,
"grad_norm": 1.3354472782196753,
"learning_rate": 5.0778232630897536e-08,
"loss": 1.2717,
"step": 3780
},
{
"epoch": 0.9615628567804135,
"grad_norm": 1.5048063707346293,
"learning_rate": 4.4675392992412634e-08,
"loss": 1.2728,
"step": 3790
},
{
"epoch": 0.9640999619434225,
"grad_norm": 1.4399837174477832,
"learning_rate": 3.896150136150134e-08,
"loss": 1.2826,
"step": 3800
},
{
"epoch": 0.9666370671064316,
"grad_norm": 1.4450380885385077,
"learning_rate": 3.3637006226797665e-08,
"loss": 1.2534,
"step": 3810
},
{
"epoch": 0.9691741722694406,
"grad_norm": 1.35673950609508,
"learning_rate": 2.8702325512844908e-08,
"loss": 1.2609,
"step": 3820
},
{
"epoch": 0.9717112774324496,
"grad_norm": 1.43190405721669,
"learning_rate": 2.4157846547292473e-08,
"loss": 1.2787,
"step": 3830
},
{
"epoch": 0.9742483825954585,
"grad_norm": 1.4245067018508633,
"learning_rate": 2.000392603049517e-08,
"loss": 1.2665,
"step": 3840
},
{
"epoch": 0.9767854877584676,
"grad_norm": 1.4112885660620007,
"learning_rate": 1.6240890007510612e-08,
"loss": 1.2785,
"step": 3850
},
{
"epoch": 0.9793225929214766,
"grad_norm": 1.499341122900986,
"learning_rate": 1.286903384251581e-08,
"loss": 1.2539,
"step": 3860
},
{
"epoch": 0.9818596980844856,
"grad_norm": 1.4494246005853764,
"learning_rate": 9.888622195615705e-09,
"loss": 1.2725,
"step": 3870
},
{
"epoch": 0.9843968032474946,
"grad_norm": 1.421431046100693,
"learning_rate": 7.299889002075344e-09,
"loss": 1.2726,
"step": 3880
},
{
"epoch": 0.9869339084105037,
"grad_norm": 1.4209346862475516,
"learning_rate": 5.103037453954573e-09,
"loss": 1.2548,
"step": 3890
},
{
"epoch": 0.9894710135735126,
"grad_norm": 1.3751824191752333,
"learning_rate": 3.2982399841618996e-09,
"loss": 1.2859,
"step": 3900
},
{
"epoch": 0.9920081187365216,
"grad_norm": 1.4627888810880714,
"learning_rate": 1.8856382529192085e-09,
"loss": 1.2842,
"step": 3910
},
{
"epoch": 0.9945452238995306,
"grad_norm": 1.3905016842842302,
"learning_rate": 8.653431366406617e-10,
"loss": 1.2447,
"step": 3920
},
{
"epoch": 0.9970823290625397,
"grad_norm": 1.5470180096733397,
"learning_rate": 2.374347192335424e-10,
"loss": 1.2707,
"step": 3930
},
{
"epoch": 0.9996194342255487,
"grad_norm": 1.4506027900307656,
"learning_rate": 1.9622858088430564e-12,
"loss": 1.2737,
"step": 3940
},
{
"epoch": 0.9998731447418495,
"step": 3941,
"total_flos": 3.7575827488610714e+18,
"train_loss": 1.3289946492206504,
"train_runtime": 13442.1001,
"train_samples_per_second": 37.53,
"train_steps_per_second": 0.293
}
],
"logging_steps": 10,
"max_steps": 3941,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.7575827488610714e+18,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}