gemma-2-2b-it-ud / trainer_state.json
tonyzhao6's picture
Upload 10 files
a259e30 verified
raw
history blame contribute delete
No virus
14.6 kB
{
"best_metric": 0.11245531588792801,
"best_model_checkpoint": "/home/tonyzhao6/Projects/urgency-detection-finetuning/results/model_training/gemma-2-2b-it-8bit-64-32-v4/checkpoint-700",
"epoch": 0.970873786407767,
"eval_steps": 100,
"global_step": 700,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.013869625520110958,
"grad_norm": 0.9987179040908813,
"learning_rate": 1.834862385321101e-05,
"loss": 2.1962,
"step": 10
},
{
"epoch": 0.027739251040221916,
"grad_norm": 0.7583550810813904,
"learning_rate": 3.669724770642202e-05,
"loss": 1.6408,
"step": 20
},
{
"epoch": 0.04160887656033287,
"grad_norm": 0.6101565361022949,
"learning_rate": 5.504587155963303e-05,
"loss": 0.8461,
"step": 30
},
{
"epoch": 0.05547850208044383,
"grad_norm": 0.2617435157299042,
"learning_rate": 7.339449541284404e-05,
"loss": 0.3555,
"step": 40
},
{
"epoch": 0.06934812760055478,
"grad_norm": 0.17355866730213165,
"learning_rate": 9.174311926605506e-05,
"loss": 0.2506,
"step": 50
},
{
"epoch": 0.08321775312066575,
"grad_norm": 0.1640639752149582,
"learning_rate": 0.00011009174311926606,
"loss": 0.23,
"step": 60
},
{
"epoch": 0.0970873786407767,
"grad_norm": 0.1592264175415039,
"learning_rate": 0.00012844036697247707,
"loss": 0.2178,
"step": 70
},
{
"epoch": 0.11095700416088766,
"grad_norm": 0.13895617425441742,
"learning_rate": 0.0001467889908256881,
"loss": 0.1953,
"step": 80
},
{
"epoch": 0.12482662968099861,
"grad_norm": 0.13263335824012756,
"learning_rate": 0.0001651376146788991,
"loss": 0.1851,
"step": 90
},
{
"epoch": 0.13869625520110956,
"grad_norm": 0.14112189412117004,
"learning_rate": 0.00018348623853211012,
"loss": 0.1782,
"step": 100
},
{
"epoch": 0.13869625520110956,
"eval_loss": 0.1666356474161148,
"eval_runtime": 87.2237,
"eval_samples_per_second": 14.48,
"eval_steps_per_second": 0.906,
"step": 100
},
{
"epoch": 0.15256588072122051,
"grad_norm": 0.1304333657026291,
"learning_rate": 0.00019994279176201374,
"loss": 0.1644,
"step": 110
},
{
"epoch": 0.1664355062413315,
"grad_norm": 0.13556469976902008,
"learning_rate": 0.00019937070938215104,
"loss": 0.1614,
"step": 120
},
{
"epoch": 0.18030513176144244,
"grad_norm": 0.12493357062339783,
"learning_rate": 0.00019879862700228834,
"loss": 0.148,
"step": 130
},
{
"epoch": 0.1941747572815534,
"grad_norm": 0.12785165011882782,
"learning_rate": 0.00019822654462242566,
"loss": 0.1526,
"step": 140
},
{
"epoch": 0.20804438280166435,
"grad_norm": 0.1414169818162918,
"learning_rate": 0.00019765446224256295,
"loss": 0.1498,
"step": 150
},
{
"epoch": 0.22191400832177532,
"grad_norm": 0.11336012184619904,
"learning_rate": 0.00019708237986270025,
"loss": 0.1506,
"step": 160
},
{
"epoch": 0.23578363384188628,
"grad_norm": 0.11893126368522644,
"learning_rate": 0.00019651029748283754,
"loss": 0.1343,
"step": 170
},
{
"epoch": 0.24965325936199723,
"grad_norm": 0.12188615649938583,
"learning_rate": 0.00019593821510297484,
"loss": 0.1379,
"step": 180
},
{
"epoch": 0.2635228848821082,
"grad_norm": 0.11430846899747849,
"learning_rate": 0.00019536613272311214,
"loss": 0.1344,
"step": 190
},
{
"epoch": 0.27739251040221913,
"grad_norm": 0.11359121650457382,
"learning_rate": 0.00019479405034324946,
"loss": 0.139,
"step": 200
},
{
"epoch": 0.27739251040221913,
"eval_loss": 0.13432957231998444,
"eval_runtime": 86.9127,
"eval_samples_per_second": 14.532,
"eval_steps_per_second": 0.909,
"step": 200
},
{
"epoch": 0.2912621359223301,
"grad_norm": 0.10335998982191086,
"learning_rate": 0.00019422196796338675,
"loss": 0.1374,
"step": 210
},
{
"epoch": 0.30513176144244103,
"grad_norm": 0.09991727769374847,
"learning_rate": 0.00019364988558352405,
"loss": 0.1344,
"step": 220
},
{
"epoch": 0.31900138696255204,
"grad_norm": 0.10995834320783615,
"learning_rate": 0.00019307780320366135,
"loss": 0.1394,
"step": 230
},
{
"epoch": 0.332871012482663,
"grad_norm": 0.10396566987037659,
"learning_rate": 0.00019250572082379864,
"loss": 0.1223,
"step": 240
},
{
"epoch": 0.34674063800277394,
"grad_norm": 0.10032226890325546,
"learning_rate": 0.00019193363844393594,
"loss": 0.1285,
"step": 250
},
{
"epoch": 0.3606102635228849,
"grad_norm": 0.10445073246955872,
"learning_rate": 0.00019136155606407323,
"loss": 0.1261,
"step": 260
},
{
"epoch": 0.37447988904299584,
"grad_norm": 0.11336586624383926,
"learning_rate": 0.00019078947368421053,
"loss": 0.1286,
"step": 270
},
{
"epoch": 0.3883495145631068,
"grad_norm": 0.10205301642417908,
"learning_rate": 0.00019021739130434782,
"loss": 0.1208,
"step": 280
},
{
"epoch": 0.40221914008321774,
"grad_norm": 0.09567493945360184,
"learning_rate": 0.00018964530892448515,
"loss": 0.1271,
"step": 290
},
{
"epoch": 0.4160887656033287,
"grad_norm": 0.10747899860143661,
"learning_rate": 0.00018907322654462244,
"loss": 0.1233,
"step": 300
},
{
"epoch": 0.4160887656033287,
"eval_loss": 0.1257271021604538,
"eval_runtime": 86.6524,
"eval_samples_per_second": 14.575,
"eval_steps_per_second": 0.912,
"step": 300
},
{
"epoch": 0.42995839112343964,
"grad_norm": 0.10108979046344757,
"learning_rate": 0.00018850114416475974,
"loss": 0.124,
"step": 310
},
{
"epoch": 0.44382801664355065,
"grad_norm": 0.09316466003656387,
"learning_rate": 0.00018792906178489703,
"loss": 0.1212,
"step": 320
},
{
"epoch": 0.4576976421636616,
"grad_norm": 0.10638488829135895,
"learning_rate": 0.00018735697940503433,
"loss": 0.1288,
"step": 330
},
{
"epoch": 0.47156726768377255,
"grad_norm": 0.09914766252040863,
"learning_rate": 0.00018678489702517162,
"loss": 0.1259,
"step": 340
},
{
"epoch": 0.4854368932038835,
"grad_norm": 0.09707864373922348,
"learning_rate": 0.00018621281464530892,
"loss": 0.124,
"step": 350
},
{
"epoch": 0.49930651872399445,
"grad_norm": 0.09507231414318085,
"learning_rate": 0.00018564073226544621,
"loss": 0.1262,
"step": 360
},
{
"epoch": 0.5131761442441054,
"grad_norm": 0.09129882603883743,
"learning_rate": 0.0001850686498855835,
"loss": 0.1211,
"step": 370
},
{
"epoch": 0.5270457697642164,
"grad_norm": 0.09889239072799683,
"learning_rate": 0.00018449656750572083,
"loss": 0.1218,
"step": 380
},
{
"epoch": 0.5409153952843273,
"grad_norm": 0.09886115044355392,
"learning_rate": 0.00018392448512585813,
"loss": 0.1214,
"step": 390
},
{
"epoch": 0.5547850208044383,
"grad_norm": 0.09064166992902756,
"learning_rate": 0.00018335240274599542,
"loss": 0.126,
"step": 400
},
{
"epoch": 0.5547850208044383,
"eval_loss": 0.12142250686883926,
"eval_runtime": 86.64,
"eval_samples_per_second": 14.578,
"eval_steps_per_second": 0.912,
"step": 400
},
{
"epoch": 0.5686546463245492,
"grad_norm": 0.10354544222354889,
"learning_rate": 0.00018278032036613272,
"loss": 0.1253,
"step": 410
},
{
"epoch": 0.5825242718446602,
"grad_norm": 0.09165250509977341,
"learning_rate": 0.00018220823798627001,
"loss": 0.1224,
"step": 420
},
{
"epoch": 0.5963938973647711,
"grad_norm": 0.09138130396604538,
"learning_rate": 0.0001816361556064073,
"loss": 0.1289,
"step": 430
},
{
"epoch": 0.6102635228848821,
"grad_norm": 0.09735599905252457,
"learning_rate": 0.00018106407322654463,
"loss": 0.1181,
"step": 440
},
{
"epoch": 0.624133148404993,
"grad_norm": 0.09955897927284241,
"learning_rate": 0.00018049199084668193,
"loss": 0.1207,
"step": 450
},
{
"epoch": 0.6380027739251041,
"grad_norm": 0.09378518909215927,
"learning_rate": 0.00017991990846681922,
"loss": 0.1189,
"step": 460
},
{
"epoch": 0.651872399445215,
"grad_norm": 0.09985518455505371,
"learning_rate": 0.00017934782608695652,
"loss": 0.1196,
"step": 470
},
{
"epoch": 0.665742024965326,
"grad_norm": 0.09567826986312866,
"learning_rate": 0.00017877574370709382,
"loss": 0.1189,
"step": 480
},
{
"epoch": 0.6796116504854369,
"grad_norm": 0.09133660793304443,
"learning_rate": 0.0001782036613272311,
"loss": 0.1199,
"step": 490
},
{
"epoch": 0.6934812760055479,
"grad_norm": 0.07571779191493988,
"learning_rate": 0.00017763157894736843,
"loss": 0.1199,
"step": 500
},
{
"epoch": 0.6934812760055479,
"eval_loss": 0.11764033138751984,
"eval_runtime": 86.7125,
"eval_samples_per_second": 14.565,
"eval_steps_per_second": 0.911,
"step": 500
},
{
"epoch": 0.7073509015256588,
"grad_norm": 0.07904700189828873,
"learning_rate": 0.00017705949656750573,
"loss": 0.1174,
"step": 510
},
{
"epoch": 0.7212205270457698,
"grad_norm": 0.0874553844332695,
"learning_rate": 0.00017648741418764302,
"loss": 0.1191,
"step": 520
},
{
"epoch": 0.7350901525658807,
"grad_norm": 0.09417985379695892,
"learning_rate": 0.00017591533180778032,
"loss": 0.1158,
"step": 530
},
{
"epoch": 0.7489597780859917,
"grad_norm": 0.0866062194108963,
"learning_rate": 0.00017534324942791762,
"loss": 0.1106,
"step": 540
},
{
"epoch": 0.7628294036061026,
"grad_norm": 0.08498796820640564,
"learning_rate": 0.0001747711670480549,
"loss": 0.1124,
"step": 550
},
{
"epoch": 0.7766990291262136,
"grad_norm": 0.08251694589853287,
"learning_rate": 0.00017419908466819223,
"loss": 0.1136,
"step": 560
},
{
"epoch": 0.7905686546463245,
"grad_norm": 0.08275240659713745,
"learning_rate": 0.00017362700228832953,
"loss": 0.1107,
"step": 570
},
{
"epoch": 0.8044382801664355,
"grad_norm": 0.08751562237739563,
"learning_rate": 0.00017305491990846682,
"loss": 0.1169,
"step": 580
},
{
"epoch": 0.8183079056865464,
"grad_norm": 0.09078636020421982,
"learning_rate": 0.00017248283752860412,
"loss": 0.1143,
"step": 590
},
{
"epoch": 0.8321775312066574,
"grad_norm": 0.08412676304578781,
"learning_rate": 0.00017191075514874142,
"loss": 0.1197,
"step": 600
},
{
"epoch": 0.8321775312066574,
"eval_loss": 0.11502571403980255,
"eval_runtime": 86.625,
"eval_samples_per_second": 14.58,
"eval_steps_per_second": 0.912,
"step": 600
},
{
"epoch": 0.8460471567267683,
"grad_norm": 0.08373397588729858,
"learning_rate": 0.0001713386727688787,
"loss": 0.1205,
"step": 610
},
{
"epoch": 0.8599167822468793,
"grad_norm": 0.08933025598526001,
"learning_rate": 0.00017076659038901603,
"loss": 0.1147,
"step": 620
},
{
"epoch": 0.8737864077669902,
"grad_norm": 0.08800772577524185,
"learning_rate": 0.00017019450800915333,
"loss": 0.1201,
"step": 630
},
{
"epoch": 0.8876560332871013,
"grad_norm": 0.08623263984918594,
"learning_rate": 0.00016962242562929063,
"loss": 0.1144,
"step": 640
},
{
"epoch": 0.9015256588072122,
"grad_norm": 0.0788191556930542,
"learning_rate": 0.00016905034324942792,
"loss": 0.1188,
"step": 650
},
{
"epoch": 0.9153952843273232,
"grad_norm": 0.0787658542394638,
"learning_rate": 0.00016847826086956522,
"loss": 0.1077,
"step": 660
},
{
"epoch": 0.9292649098474342,
"grad_norm": 0.08364666253328323,
"learning_rate": 0.0001679061784897025,
"loss": 0.1072,
"step": 670
},
{
"epoch": 0.9431345353675451,
"grad_norm": 0.08853990584611893,
"learning_rate": 0.00016733409610983983,
"loss": 0.1097,
"step": 680
},
{
"epoch": 0.957004160887656,
"grad_norm": 0.08456674963235855,
"learning_rate": 0.00016676201372997713,
"loss": 0.1167,
"step": 690
},
{
"epoch": 0.970873786407767,
"grad_norm": 0.0840703621506691,
"learning_rate": 0.00016618993135011443,
"loss": 0.1231,
"step": 700
},
{
"epoch": 0.970873786407767,
"eval_loss": 0.11245531588792801,
"eval_runtime": 86.613,
"eval_samples_per_second": 14.582,
"eval_steps_per_second": 0.912,
"step": 700
}
],
"logging_steps": 10,
"max_steps": 3605,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.0638251619228058e+17,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}