Gemma_medprob-anatomy_lora / trainer_state.json
emilykang's picture
Training in progress, epoch 1
a76a569 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 9.907120743034056,
"eval_steps": 500,
"global_step": 800,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1238390092879257,
"grad_norm": 7.5771355628967285,
"learning_rate": 0.0001999229036240723,
"loss": 1.3532,
"step": 10
},
{
"epoch": 0.2476780185758514,
"grad_norm": 0.3571035861968994,
"learning_rate": 0.0001996917333733128,
"loss": 1.0315,
"step": 20
},
{
"epoch": 0.3715170278637771,
"grad_norm": 0.3278755843639374,
"learning_rate": 0.00019930684569549264,
"loss": 0.9018,
"step": 30
},
{
"epoch": 0.4953560371517028,
"grad_norm": 0.4095502495765686,
"learning_rate": 0.00019876883405951377,
"loss": 0.807,
"step": 40
},
{
"epoch": 0.6191950464396285,
"grad_norm": 0.4147421717643738,
"learning_rate": 0.00019807852804032305,
"loss": 0.7204,
"step": 50
},
{
"epoch": 0.7430340557275542,
"grad_norm": 0.2525322735309601,
"learning_rate": 0.00019723699203976766,
"loss": 0.6548,
"step": 60
},
{
"epoch": 0.8668730650154799,
"grad_norm": 0.28104496002197266,
"learning_rate": 0.00019624552364536473,
"loss": 0.6687,
"step": 70
},
{
"epoch": 0.9907120743034056,
"grad_norm": 0.4467855393886566,
"learning_rate": 0.00019510565162951537,
"loss": 0.6317,
"step": 80
},
{
"epoch": 1.1145510835913313,
"grad_norm": 0.2571893036365509,
"learning_rate": 0.00019381913359224842,
"loss": 0.6319,
"step": 90
},
{
"epoch": 1.238390092879257,
"grad_norm": 0.24546292424201965,
"learning_rate": 0.0001923879532511287,
"loss": 0.6262,
"step": 100
},
{
"epoch": 1.3622291021671826,
"grad_norm": 0.24089373648166656,
"learning_rate": 0.00019081431738250814,
"loss": 0.6309,
"step": 110
},
{
"epoch": 1.4860681114551084,
"grad_norm": 0.24842403829097748,
"learning_rate": 0.0001891006524188368,
"loss": 0.6142,
"step": 120
},
{
"epoch": 1.609907120743034,
"grad_norm": 0.2339727133512497,
"learning_rate": 0.00018724960070727972,
"loss": 0.6131,
"step": 130
},
{
"epoch": 1.7337461300309598,
"grad_norm": 0.21254688501358032,
"learning_rate": 0.00018526401643540922,
"loss": 0.5892,
"step": 140
},
{
"epoch": 1.8575851393188856,
"grad_norm": 0.34352943301200867,
"learning_rate": 0.00018314696123025454,
"loss": 0.6037,
"step": 150
},
{
"epoch": 1.9814241486068112,
"grad_norm": 0.21427258849143982,
"learning_rate": 0.00018090169943749476,
"loss": 0.6051,
"step": 160
},
{
"epoch": 2.1052631578947367,
"grad_norm": 0.23226872086524963,
"learning_rate": 0.00017853169308807448,
"loss": 0.6109,
"step": 170
},
{
"epoch": 2.2291021671826625,
"grad_norm": 0.254842072725296,
"learning_rate": 0.0001760405965600031,
"loss": 0.5912,
"step": 180
},
{
"epoch": 2.3529411764705883,
"grad_norm": 0.2571081519126892,
"learning_rate": 0.00017343225094356855,
"loss": 0.5975,
"step": 190
},
{
"epoch": 2.476780185758514,
"grad_norm": 0.25343191623687744,
"learning_rate": 0.00017071067811865476,
"loss": 0.5786,
"step": 200
},
{
"epoch": 2.6006191950464395,
"grad_norm": 0.21258015930652618,
"learning_rate": 0.0001678800745532942,
"loss": 0.586,
"step": 210
},
{
"epoch": 2.7244582043343653,
"grad_norm": 0.25848379731178284,
"learning_rate": 0.00016494480483301836,
"loss": 0.5714,
"step": 220
},
{
"epoch": 2.848297213622291,
"grad_norm": 0.26716166734695435,
"learning_rate": 0.00016190939493098344,
"loss": 0.5887,
"step": 230
},
{
"epoch": 2.972136222910217,
"grad_norm": 0.23578402400016785,
"learning_rate": 0.00015877852522924732,
"loss": 0.5902,
"step": 240
},
{
"epoch": 3.0959752321981426,
"grad_norm": 0.23565009236335754,
"learning_rate": 0.00015555702330196023,
"loss": 0.5792,
"step": 250
},
{
"epoch": 3.219814241486068,
"grad_norm": 0.2390134632587433,
"learning_rate": 0.0001522498564715949,
"loss": 0.5676,
"step": 260
},
{
"epoch": 3.343653250773994,
"grad_norm": 0.25006794929504395,
"learning_rate": 0.00014886212414969553,
"loss": 0.5788,
"step": 270
},
{
"epoch": 3.4674922600619196,
"grad_norm": 0.2533760666847229,
"learning_rate": 0.00014539904997395468,
"loss": 0.5769,
"step": 280
},
{
"epoch": 3.5913312693498454,
"grad_norm": 0.2808171510696411,
"learning_rate": 0.0001418659737537428,
"loss": 0.5521,
"step": 290
},
{
"epoch": 3.715170278637771,
"grad_norm": 0.28783777356147766,
"learning_rate": 0.000138268343236509,
"loss": 0.5723,
"step": 300
},
{
"epoch": 3.8390092879256965,
"grad_norm": 0.29237958788871765,
"learning_rate": 0.0001346117057077493,
"loss": 0.5668,
"step": 310
},
{
"epoch": 3.9628482972136223,
"grad_norm": 0.2757062315940857,
"learning_rate": 0.00013090169943749476,
"loss": 0.579,
"step": 320
},
{
"epoch": 4.086687306501548,
"grad_norm": 0.28595200181007385,
"learning_rate": 0.00012714404498650743,
"loss": 0.5488,
"step": 330
},
{
"epoch": 4.2105263157894735,
"grad_norm": 0.2707183063030243,
"learning_rate": 0.00012334453638559057,
"loss": 0.5511,
"step": 340
},
{
"epoch": 4.3343653250774,
"grad_norm": 0.3559975028038025,
"learning_rate": 0.00011950903220161285,
"loss": 0.5683,
"step": 350
},
{
"epoch": 4.458204334365325,
"grad_norm": 0.2762058973312378,
"learning_rate": 0.0001156434465040231,
"loss": 0.5499,
"step": 360
},
{
"epoch": 4.58204334365325,
"grad_norm": 0.2717606723308563,
"learning_rate": 0.00011175373974578378,
"loss": 0.5612,
"step": 370
},
{
"epoch": 4.705882352941177,
"grad_norm": 0.27757707238197327,
"learning_rate": 0.0001078459095727845,
"loss": 0.5602,
"step": 380
},
{
"epoch": 4.829721362229102,
"grad_norm": 0.3977556526660919,
"learning_rate": 0.00010392598157590688,
"loss": 0.5459,
"step": 390
},
{
"epoch": 4.953560371517028,
"grad_norm": 0.26867300271987915,
"learning_rate": 0.0001,
"loss": 0.537,
"step": 400
},
{
"epoch": 5.077399380804954,
"grad_norm": 0.26843276619911194,
"learning_rate": 9.607401842409317e-05,
"loss": 0.5601,
"step": 410
},
{
"epoch": 5.201238390092879,
"grad_norm": 0.30268290638923645,
"learning_rate": 9.215409042721552e-05,
"loss": 0.5317,
"step": 420
},
{
"epoch": 5.325077399380805,
"grad_norm": 0.3163929581642151,
"learning_rate": 8.824626025421626e-05,
"loss": 0.5343,
"step": 430
},
{
"epoch": 5.4489164086687305,
"grad_norm": 0.2883571982383728,
"learning_rate": 8.435655349597689e-05,
"loss": 0.5255,
"step": 440
},
{
"epoch": 5.572755417956657,
"grad_norm": 0.3254496157169342,
"learning_rate": 8.049096779838719e-05,
"loss": 0.5281,
"step": 450
},
{
"epoch": 5.696594427244582,
"grad_norm": 0.2983749508857727,
"learning_rate": 7.66554636144095e-05,
"loss": 0.5515,
"step": 460
},
{
"epoch": 5.820433436532507,
"grad_norm": 0.2880017161369324,
"learning_rate": 7.285595501349258e-05,
"loss": 0.5575,
"step": 470
},
{
"epoch": 5.944272445820434,
"grad_norm": 0.43873119354248047,
"learning_rate": 6.909830056250527e-05,
"loss": 0.5367,
"step": 480
},
{
"epoch": 6.068111455108359,
"grad_norm": 0.33720219135284424,
"learning_rate": 6.538829429225069e-05,
"loss": 0.5509,
"step": 490
},
{
"epoch": 6.191950464396285,
"grad_norm": 0.3185509741306305,
"learning_rate": 6.173165676349103e-05,
"loss": 0.5176,
"step": 500
},
{
"epoch": 6.315789473684211,
"grad_norm": 0.3240034878253937,
"learning_rate": 5.8134026246257225e-05,
"loss": 0.5306,
"step": 510
},
{
"epoch": 6.439628482972136,
"grad_norm": 0.33068713545799255,
"learning_rate": 5.4600950026045326e-05,
"loss": 0.517,
"step": 520
},
{
"epoch": 6.563467492260062,
"grad_norm": 0.33544909954071045,
"learning_rate": 5.113787585030454e-05,
"loss": 0.5288,
"step": 530
},
{
"epoch": 6.687306501547988,
"grad_norm": 0.3468843400478363,
"learning_rate": 4.7750143528405126e-05,
"loss": 0.5222,
"step": 540
},
{
"epoch": 6.811145510835914,
"grad_norm": 0.33482104539871216,
"learning_rate": 4.444297669803981e-05,
"loss": 0.5227,
"step": 550
},
{
"epoch": 6.934984520123839,
"grad_norm": 0.3804668188095093,
"learning_rate": 4.12214747707527e-05,
"loss": 0.5251,
"step": 560
},
{
"epoch": 7.0588235294117645,
"grad_norm": 0.3186335861682892,
"learning_rate": 3.8090605069016595e-05,
"loss": 0.5264,
"step": 570
},
{
"epoch": 7.182662538699691,
"grad_norm": 0.34853196144104004,
"learning_rate": 3.5055195166981645e-05,
"loss": 0.5281,
"step": 580
},
{
"epoch": 7.306501547987616,
"grad_norm": 0.36451995372772217,
"learning_rate": 3.211992544670582e-05,
"loss": 0.5074,
"step": 590
},
{
"epoch": 7.430340557275541,
"grad_norm": 0.3326849639415741,
"learning_rate": 2.9289321881345254e-05,
"loss": 0.5249,
"step": 600
},
{
"epoch": 7.554179566563468,
"grad_norm": 0.3516250550746918,
"learning_rate": 2.6567749056431467e-05,
"loss": 0.5112,
"step": 610
},
{
"epoch": 7.678018575851393,
"grad_norm": 0.3434501588344574,
"learning_rate": 2.3959403439996907e-05,
"loss": 0.5176,
"step": 620
},
{
"epoch": 7.801857585139319,
"grad_norm": 0.3573139011859894,
"learning_rate": 2.146830691192553e-05,
"loss": 0.5251,
"step": 630
},
{
"epoch": 7.925696594427245,
"grad_norm": 0.3552079200744629,
"learning_rate": 1.9098300562505266e-05,
"loss": 0.5066,
"step": 640
},
{
"epoch": 8.04953560371517,
"grad_norm": 0.3211444020271301,
"learning_rate": 1.6853038769745467e-05,
"loss": 0.5255,
"step": 650
},
{
"epoch": 8.173374613003096,
"grad_norm": 0.3437272310256958,
"learning_rate": 1.4735983564590783e-05,
"loss": 0.5152,
"step": 660
},
{
"epoch": 8.297213622291022,
"grad_norm": 0.39420753717422485,
"learning_rate": 1.2750399292720283e-05,
"loss": 0.5147,
"step": 670
},
{
"epoch": 8.421052631578947,
"grad_norm": 0.33711323142051697,
"learning_rate": 1.0899347581163221e-05,
"loss": 0.5053,
"step": 680
},
{
"epoch": 8.544891640866872,
"grad_norm": 0.41042107343673706,
"learning_rate": 9.185682617491863e-06,
"loss": 0.4921,
"step": 690
},
{
"epoch": 8.6687306501548,
"grad_norm": 0.3305673897266388,
"learning_rate": 7.612046748871327e-06,
"loss": 0.5182,
"step": 700
},
{
"epoch": 8.792569659442725,
"grad_norm": 0.42124322056770325,
"learning_rate": 6.180866407751595e-06,
"loss": 0.5229,
"step": 710
},
{
"epoch": 8.91640866873065,
"grad_norm": 0.3476842939853668,
"learning_rate": 4.8943483704846475e-06,
"loss": 0.517,
"step": 720
},
{
"epoch": 9.040247678018575,
"grad_norm": 0.3766673803329468,
"learning_rate": 3.7544763546352834e-06,
"loss": 0.4968,
"step": 730
},
{
"epoch": 9.1640866873065,
"grad_norm": 0.34576883912086487,
"learning_rate": 2.7630079602323442e-06,
"loss": 0.5143,
"step": 740
},
{
"epoch": 9.287925696594428,
"grad_norm": 0.3424369692802429,
"learning_rate": 1.921471959676957e-06,
"loss": 0.5028,
"step": 750
},
{
"epoch": 9.411764705882353,
"grad_norm": 0.3952752947807312,
"learning_rate": 1.231165940486234e-06,
"loss": 0.4951,
"step": 760
},
{
"epoch": 9.535603715170279,
"grad_norm": 0.35501015186309814,
"learning_rate": 6.931543045073708e-07,
"loss": 0.5218,
"step": 770
},
{
"epoch": 9.659442724458204,
"grad_norm": 0.3750287592411041,
"learning_rate": 3.0826662668720364e-07,
"loss": 0.5143,
"step": 780
},
{
"epoch": 9.78328173374613,
"grad_norm": 0.336580365896225,
"learning_rate": 7.709637592770991e-08,
"loss": 0.5106,
"step": 790
},
{
"epoch": 9.907120743034056,
"grad_norm": 0.35525137186050415,
"learning_rate": 0.0,
"loss": 0.514,
"step": 800
},
{
"epoch": 9.907120743034056,
"step": 800,
"total_flos": 3.90379106992128e+16,
"train_loss": 0.5782659471035003,
"train_runtime": 2175.2449,
"train_samples_per_second": 1.485,
"train_steps_per_second": 0.368
}
],
"logging_steps": 10,
"max_steps": 800,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 3.90379106992128e+16,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}