gemma-2-Ifable-9B / trainer_state.json
linzaiyun's picture
Upload folder using huggingface_hub
eea2579 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9807355516637478,
"eval_steps": 18,
"global_step": 35,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.028021015761821366,
"grad_norm": 97.3614384660342,
"learning_rate": 8e-08,
"logits/chosen": -7.731139183044434,
"logits/rejected": -8.25313663482666,
"logps/chosen": -1.6605920791625977,
"logps/rejected": -1.683160424232483,
"loss": 6.8307,
"rewards/accuracies": 0.5,
"rewards/chosen": -16.60592269897461,
"rewards/margins": 0.22568130493164062,
"rewards/rejected": -16.83160400390625,
"sft_loss": 0.04547927528619766,
"step": 1
},
{
"epoch": 0.05604203152364273,
"grad_norm": 121.91139131956491,
"learning_rate": 2.6e-07,
"logits/chosen": -7.893815040588379,
"logits/rejected": -8.019620895385742,
"logps/chosen": -1.952430248260498,
"logps/rejected": -1.8113142251968384,
"loss": 7.3627,
"rewards/accuracies": 0.5,
"rewards/chosen": -19.52429962158203,
"rewards/margins": -1.4111591577529907,
"rewards/rejected": -18.113142013549805,
"sft_loss": 0.00891563668847084,
"step": 2
},
{
"epoch": 0.0840630472854641,
"grad_norm": 115.99875317435591,
"learning_rate": 4.4e-07,
"logits/chosen": -9.035008430480957,
"logits/rejected": -8.950678825378418,
"logps/chosen": -1.4869132041931152,
"logps/rejected": -1.50464928150177,
"loss": 7.025,
"rewards/accuracies": 0.6875,
"rewards/chosen": -14.869132995605469,
"rewards/margins": 0.17736005783081055,
"rewards/rejected": -15.046493530273438,
"sft_loss": 0.010355046950280666,
"step": 3
},
{
"epoch": 0.11208406304728546,
"grad_norm": 143.98542912632848,
"learning_rate": 6.2e-07,
"logits/chosen": -9.924211502075195,
"logits/rejected": -9.45657730102539,
"logps/chosen": -1.4292300939559937,
"logps/rejected": -1.2033114433288574,
"loss": 6.4674,
"rewards/accuracies": 0.3125,
"rewards/chosen": -14.292301177978516,
"rewards/margins": -2.2591874599456787,
"rewards/rejected": -12.033114433288574,
"sft_loss": 0.006642716005444527,
"step": 4
},
{
"epoch": 0.14010507880910683,
"grad_norm": 105.0416363604681,
"learning_rate": 7.981529564210822e-07,
"logits/chosen": -8.538932800292969,
"logits/rejected": -8.046061515808105,
"logps/chosen": -1.5071882009506226,
"logps/rejected": -1.675721526145935,
"loss": 7.1364,
"rewards/accuracies": 0.625,
"rewards/chosen": -15.071882247924805,
"rewards/margins": 1.6853327751159668,
"rewards/rejected": -16.757217407226562,
"sft_loss": 0.01169000007212162,
"step": 5
},
{
"epoch": 0.1681260945709282,
"grad_norm": 136.46572113440772,
"learning_rate": 7.926307788508979e-07,
"logits/chosen": -8.856929779052734,
"logits/rejected": -9.344861030578613,
"logps/chosen": -1.6353546380996704,
"logps/rejected": -1.7302504777908325,
"loss": 5.6445,
"rewards/accuracies": 0.6875,
"rewards/chosen": -16.353546142578125,
"rewards/margins": 0.9489572644233704,
"rewards/rejected": -17.30250358581543,
"sft_loss": 0.03519538417458534,
"step": 6
},
{
"epoch": 0.19614711033274956,
"grad_norm": 125.56078536529542,
"learning_rate": 7.834901323040175e-07,
"logits/chosen": -7.622992515563965,
"logits/rejected": -7.627020835876465,
"logps/chosen": -2.047703981399536,
"logps/rejected": -1.6984105110168457,
"loss": 6.5965,
"rewards/accuracies": 0.375,
"rewards/chosen": -20.477039337158203,
"rewards/margins": -3.4929349422454834,
"rewards/rejected": -16.98410415649414,
"sft_loss": 0.006214356515556574,
"step": 7
},
{
"epoch": 0.22416812609457093,
"grad_norm": 135.07237608120852,
"learning_rate": 7.70824812183283e-07,
"logits/chosen": -8.93583869934082,
"logits/rejected": -9.021809577941895,
"logps/chosen": -1.3730394840240479,
"logps/rejected": -1.4458472728729248,
"loss": 6.3454,
"rewards/accuracies": 0.6875,
"rewards/chosen": -13.730398178100586,
"rewards/margins": 0.7280769944190979,
"rewards/rejected": -14.458473205566406,
"sft_loss": 0.03004990890622139,
"step": 8
},
{
"epoch": 0.2521891418563923,
"grad_norm": 239.98627324152338,
"learning_rate": 7.547647818120495e-07,
"logits/chosen": -9.406291961669922,
"logits/rejected": -9.9324951171875,
"logps/chosen": -1.5991909503936768,
"logps/rejected": -1.6271830797195435,
"loss": 5.2256,
"rewards/accuracies": 0.5,
"rewards/chosen": -15.991909980773926,
"rewards/margins": 0.2799214720726013,
"rewards/rejected": -16.271831512451172,
"sft_loss": 0.03231532499194145,
"step": 9
},
{
"epoch": 0.28021015761821366,
"grad_norm": 234.53437032749468,
"learning_rate": 7.354748388346194e-07,
"logits/chosen": -7.813473701477051,
"logits/rejected": -9.137899398803711,
"logps/chosen": -1.8943036794662476,
"logps/rejected": -1.9154584407806396,
"loss": 4.8325,
"rewards/accuracies": 0.5625,
"rewards/chosen": -18.943037033081055,
"rewards/margins": 0.2115485668182373,
"rewards/rejected": -19.154584884643555,
"sft_loss": 0.008024048060178757,
"step": 10
},
{
"epoch": 0.30823117338003503,
"grad_norm": 287.5499688149926,
"learning_rate": 7.131529241694047e-07,
"logits/chosen": -10.093656539916992,
"logits/rejected": -10.823583602905273,
"logps/chosen": -1.676328182220459,
"logps/rejected": -2.193615198135376,
"loss": 4.8686,
"rewards/accuracies": 0.875,
"rewards/chosen": -16.763280868530273,
"rewards/margins": 5.17287015914917,
"rewards/rejected": -21.9361515045166,
"sft_loss": 0.018158258870244026,
"step": 11
},
{
"epoch": 0.3362521891418564,
"grad_norm": 279.35544138821984,
"learning_rate": 6.880280908672471e-07,
"logits/chosen": -7.598231792449951,
"logits/rejected": -8.863749504089355,
"logps/chosen": -1.8558087348937988,
"logps/rejected": -2.048665761947632,
"loss": 4.1241,
"rewards/accuracies": 0.625,
"rewards/chosen": -18.558086395263672,
"rewards/margins": 1.9285707473754883,
"rewards/rejected": -20.486658096313477,
"sft_loss": 0.003531986614689231,
"step": 12
},
{
"epoch": 0.36427320490367776,
"grad_norm": 308.20678092603185,
"learning_rate": 6.603581537171586e-07,
"logits/chosen": -8.397397994995117,
"logits/rejected": -10.13599681854248,
"logps/chosen": -1.745999813079834,
"logps/rejected": -1.8406281471252441,
"loss": 3.8904,
"rewards/accuracies": 0.75,
"rewards/chosen": -17.459999084472656,
"rewards/margins": 0.9462810754776001,
"rewards/rejected": -18.406280517578125,
"sft_loss": 0.010710272938013077,
"step": 13
},
{
"epoch": 0.3922942206654991,
"grad_norm": 88.53513750352447,
"learning_rate": 6.304270437177064e-07,
"logits/chosen": -11.391769409179688,
"logits/rejected": -13.656466484069824,
"logps/chosen": -1.820195198059082,
"logps/rejected": -2.2572662830352783,
"loss": 2.6976,
"rewards/accuracies": 0.75,
"rewards/chosen": -18.201950073242188,
"rewards/margins": 4.370712757110596,
"rewards/rejected": -22.572664260864258,
"sft_loss": 0.006363618653267622,
"step": 14
},
{
"epoch": 0.4203152364273205,
"grad_norm": 82.22128437782617,
"learning_rate": 5.985418945607484e-07,
"logits/chosen": -12.035834312438965,
"logits/rejected": -14.527205467224121,
"logps/chosen": -2.3111374378204346,
"logps/rejected": -3.1396684646606445,
"loss": 1.2788,
"rewards/accuracies": 0.9375,
"rewards/chosen": -23.111371994018555,
"rewards/margins": 8.285309791564941,
"rewards/rejected": -31.396682739257812,
"sft_loss": 0.007131902035325766,
"step": 15
},
{
"epoch": 0.44833625218914186,
"grad_norm": 120.40204277011782,
"learning_rate": 5.650298910241353e-07,
"logits/chosen": -12.784805297851562,
"logits/rejected": -15.177325248718262,
"logps/chosen": -2.0324227809906006,
"logps/rejected": -3.119992971420288,
"loss": 1.4116,
"rewards/accuracies": 0.875,
"rewards/chosen": -20.324228286743164,
"rewards/margins": 10.875699996948242,
"rewards/rejected": -31.199928283691406,
"sft_loss": 0.005817623808979988,
"step": 16
},
{
"epoch": 0.4763572679509632,
"grad_norm": 97.73770625062221,
"learning_rate": 5.302349116131393e-07,
"logits/chosen": -15.652244567871094,
"logits/rejected": -17.80880355834961,
"logps/chosen": -1.9218964576721191,
"logps/rejected": -2.738666296005249,
"loss": 1.6013,
"rewards/accuracies": 0.9375,
"rewards/chosen": -19.218965530395508,
"rewards/margins": 8.167696952819824,
"rewards/rejected": -27.386659622192383,
"sft_loss": 0.004993550945073366,
"step": 17
},
{
"epoch": 0.5043782837127846,
"grad_norm": 189.96208267597956,
"learning_rate": 4.945139999016476e-07,
"logits/chosen": -15.17531681060791,
"logits/rejected": -15.677058219909668,
"logps/chosen": -1.9880082607269287,
"logps/rejected": -3.052130699157715,
"loss": 1.8319,
"rewards/accuracies": 0.875,
"rewards/chosen": -19.880081176757812,
"rewards/margins": 10.641225814819336,
"rewards/rejected": -30.52130889892578,
"sft_loss": 0.008743491023778915,
"step": 18
},
{
"epoch": 0.532399299474606,
"grad_norm": 114.52318633727846,
"learning_rate": 4.5823370078193663e-07,
"logits/chosen": -10.547327995300293,
"logits/rejected": -14.194029808044434,
"logps/chosen": -2.2875614166259766,
"logps/rejected": -3.8111658096313477,
"loss": 1.7772,
"rewards/accuracies": 0.8125,
"rewards/chosen": -22.875612258911133,
"rewards/margins": 15.236043930053711,
"rewards/rejected": -38.111656188964844,
"sft_loss": 0.01470925658941269,
"step": 19
},
{
"epoch": 0.5604203152364273,
"grad_norm": 82.97272038448429,
"learning_rate": 4.217662992180634e-07,
"logits/chosen": -10.6708345413208,
"logits/rejected": -15.503955841064453,
"logps/chosen": -2.2511441707611084,
"logps/rejected": -3.763054132461548,
"loss": 1.6103,
"rewards/accuracies": 0.8125,
"rewards/chosen": -22.511442184448242,
"rewards/margins": 15.119099617004395,
"rewards/rejected": -37.63053894042969,
"sft_loss": 0.012447498738765717,
"step": 20
},
{
"epoch": 0.5884413309982487,
"grad_norm": 93.13289791788793,
"learning_rate": 3.8548600009835237e-07,
"logits/chosen": -11.672554016113281,
"logits/rejected": -16.79704475402832,
"logps/chosen": -2.7983806133270264,
"logps/rejected": -4.703005790710449,
"loss": 1.4708,
"rewards/accuracies": 0.9375,
"rewards/chosen": -27.983802795410156,
"rewards/margins": 19.046255111694336,
"rewards/rejected": -47.030059814453125,
"sft_loss": 0.0047310409136116505,
"step": 21
},
{
"epoch": 0.6164623467600701,
"grad_norm": 97.54389148924957,
"learning_rate": 3.4976508838686066e-07,
"logits/chosen": -16.980070114135742,
"logits/rejected": -18.32730484008789,
"logps/chosen": -2.150972843170166,
"logps/rejected": -3.424234390258789,
"loss": 1.1655,
"rewards/accuracies": 1.0,
"rewards/chosen": -21.509729385375977,
"rewards/margins": 12.732614517211914,
"rewards/rejected": -34.242340087890625,
"sft_loss": 0.007569438312202692,
"step": 22
},
{
"epoch": 0.6444833625218914,
"grad_norm": 73.41731467567557,
"learning_rate": 3.149701089758648e-07,
"logits/chosen": -11.438061714172363,
"logits/rejected": -14.675212860107422,
"logps/chosen": -2.3481569290161133,
"logps/rejected": -4.2203450202941895,
"loss": 1.2242,
"rewards/accuracies": 0.9375,
"rewards/chosen": -23.481565475463867,
"rewards/margins": 18.721879959106445,
"rewards/rejected": -42.20344543457031,
"sft_loss": 0.010319937951862812,
"step": 23
},
{
"epoch": 0.6725043782837128,
"grad_norm": 96.48356219878691,
"learning_rate": 2.8145810543925163e-07,
"logits/chosen": -11.793488502502441,
"logits/rejected": -15.576847076416016,
"logps/chosen": -2.3053696155548096,
"logps/rejected": -4.343653202056885,
"loss": 1.4752,
"rewards/accuracies": 0.9375,
"rewards/chosen": -23.053693771362305,
"rewards/margins": 20.382837295532227,
"rewards/rejected": -43.4365348815918,
"sft_loss": 0.024834871292114258,
"step": 24
},
{
"epoch": 0.7005253940455342,
"grad_norm": 122.92321660024119,
"learning_rate": 2.495729562822935e-07,
"logits/chosen": -16.064531326293945,
"logits/rejected": -17.884010314941406,
"logps/chosen": -2.647796869277954,
"logps/rejected": -3.9600863456726074,
"loss": 1.4732,
"rewards/accuracies": 1.0,
"rewards/chosen": -26.477968215942383,
"rewards/margins": 13.12289810180664,
"rewards/rejected": -39.60086441040039,
"sft_loss": 0.021936513483524323,
"step": 25
},
{
"epoch": 0.7285464098073555,
"grad_norm": 80.26268266585235,
"learning_rate": 2.196418462828415e-07,
"logits/chosen": -12.343572616577148,
"logits/rejected": -15.574173927307129,
"logps/chosen": -2.2388336658477783,
"logps/rejected": -4.106793403625488,
"loss": 1.2593,
"rewards/accuracies": 0.8125,
"rewards/chosen": -22.388338088989258,
"rewards/margins": 18.679595947265625,
"rewards/rejected": -41.06793212890625,
"sft_loss": 0.013025043532252312,
"step": 26
},
{
"epoch": 0.7565674255691769,
"grad_norm": 66.28989889413502,
"learning_rate": 1.9197190913275294e-07,
"logits/chosen": -12.330286026000977,
"logits/rejected": -15.901168823242188,
"logps/chosen": -2.5048580169677734,
"logps/rejected": -4.5386857986450195,
"loss": 1.1459,
"rewards/accuracies": 0.8125,
"rewards/chosen": -25.048580169677734,
"rewards/margins": 20.338275909423828,
"rewards/rejected": -45.38685607910156,
"sft_loss": 0.006859698798507452,
"step": 27
},
{
"epoch": 0.7845884413309983,
"grad_norm": 81.14927404545568,
"learning_rate": 1.6684707583059529e-07,
"logits/chosen": -17.190406799316406,
"logits/rejected": -19.05614471435547,
"logps/chosen": -2.768648147583008,
"logps/rejected": -4.382925033569336,
"loss": 1.3002,
"rewards/accuracies": 0.9375,
"rewards/chosen": -27.68647575378418,
"rewards/margins": 16.142770767211914,
"rewards/rejected": -43.82925033569336,
"sft_loss": 0.005794988479465246,
"step": 28
},
{
"epoch": 0.8126094570928196,
"grad_norm": 70.98888001094448,
"learning_rate": 1.4452516116538054e-07,
"logits/chosen": -10.294852256774902,
"logits/rejected": -15.053112030029297,
"logps/chosen": -2.106778383255005,
"logps/rejected": -4.66357421875,
"loss": 0.8862,
"rewards/accuracies": 0.9375,
"rewards/chosen": -21.067781448364258,
"rewards/margins": 25.567956924438477,
"rewards/rejected": -46.6357421875,
"sft_loss": 0.008147615939378738,
"step": 29
},
{
"epoch": 0.840630472854641,
"grad_norm": 99.24795263935427,
"learning_rate": 1.2523521818795044e-07,
"logits/chosen": -9.799162864685059,
"logits/rejected": -15.37686538696289,
"logps/chosen": -2.270378828048706,
"logps/rejected": -5.39860725402832,
"loss": 1.3672,
"rewards/accuracies": 1.0,
"rewards/chosen": -22.70379066467285,
"rewards/margins": 31.282283782958984,
"rewards/rejected": -53.98607635498047,
"sft_loss": 0.005661052651703358,
"step": 30
},
{
"epoch": 0.8686514886164624,
"grad_norm": 105.42160708641147,
"learning_rate": 1.0917518781671699e-07,
"logits/chosen": -10.193641662597656,
"logits/rejected": -13.275125503540039,
"logps/chosen": -2.434311866760254,
"logps/rejected": -3.9474875926971436,
"loss": 1.3672,
"rewards/accuracies": 0.875,
"rewards/chosen": -24.343116760253906,
"rewards/margins": 15.131752967834473,
"rewards/rejected": -39.47487258911133,
"sft_loss": 0.01922934129834175,
"step": 31
},
{
"epoch": 0.8966725043782837,
"grad_norm": 75.46584751909548,
"learning_rate": 9.650986769598242e-08,
"logits/chosen": -9.355328559875488,
"logits/rejected": -16.710058212280273,
"logps/chosen": -2.3611741065979004,
"logps/rejected": -5.737414360046387,
"loss": 1.6485,
"rewards/accuracies": 0.9375,
"rewards/chosen": -23.61174201965332,
"rewards/margins": 33.76239776611328,
"rewards/rejected": -57.3741455078125,
"sft_loss": 0.010620678775012493,
"step": 32
},
{
"epoch": 0.9246935201401051,
"grad_norm": 84.00269490747272,
"learning_rate": 8.736922114910199e-08,
"logits/chosen": -10.082009315490723,
"logits/rejected": -14.500289916992188,
"logps/chosen": -2.227128267288208,
"logps/rejected": -5.038478374481201,
"loss": 1.3242,
"rewards/accuracies": 0.9375,
"rewards/chosen": -22.271284103393555,
"rewards/margins": 28.113502502441406,
"rewards/rejected": -50.384788513183594,
"sft_loss": 0.009218152612447739,
"step": 33
},
{
"epoch": 0.9527145359019265,
"grad_norm": 75.42419711071338,
"learning_rate": 8.184704357891779e-08,
"logits/chosen": -10.325528144836426,
"logits/rejected": -16.278217315673828,
"logps/chosen": -2.2617452144622803,
"logps/rejected": -4.84274435043335,
"loss": 0.8932,
"rewards/accuracies": 0.9375,
"rewards/chosen": -22.617454528808594,
"rewards/margins": 25.809988021850586,
"rewards/rejected": -48.42743682861328,
"sft_loss": 0.014543527737259865,
"step": 34
},
{
"epoch": 0.9807355516637478,
"grad_norm": 108.8748972501278,
"learning_rate": 8e-08,
"logits/chosen": -12.176219940185547,
"logits/rejected": -16.480701446533203,
"logps/chosen": -2.5212955474853516,
"logps/rejected": -4.665997505187988,
"loss": 1.4444,
"rewards/accuracies": 0.9375,
"rewards/chosen": -25.21295166015625,
"rewards/margins": 21.447019577026367,
"rewards/rejected": -46.65997314453125,
"sft_loss": 0.04498640075325966,
"step": 35
},
{
"epoch": 0.9807355516637478,
"eval_logits/chosen": -12.004097938537598,
"eval_logits/rejected": -17.047502517700195,
"eval_logps/chosen": -2.168222427368164,
"eval_logps/rejected": -4.787535667419434,
"eval_loss": 1.0162526369094849,
"eval_rewards/accuracies": 0.9166666865348816,
"eval_rewards/chosen": -21.682226181030273,
"eval_rewards/margins": 26.193130493164062,
"eval_rewards/rejected": -47.875362396240234,
"eval_runtime": 9.3123,
"eval_samples_per_second": 10.094,
"eval_sft_loss": 0.01844729855656624,
"eval_steps_per_second": 1.289,
"step": 35
},
{
"before_init_mem_cpu": 3802071040,
"before_init_mem_gpu": 22016,
"epoch": 0.9807355516637478,
"init_mem_cpu_alloc_delta": 364544,
"init_mem_cpu_peaked_delta": 0,
"init_mem_gpu_alloc_delta": 0,
"init_mem_gpu_peaked_delta": 0,
"step": 35,
"total_flos": 39867492466688.0,
"train_loss": 3.085822834287371,
"train_mem_cpu_alloc_delta": 5213659136,
"train_mem_cpu_peaked_delta": 22737326080,
"train_mem_gpu_alloc_delta": 16267848704,
"train_mem_gpu_peaked_delta": 36029468160,
"train_runtime": 1628.7465,
"train_samples_per_second": 2.805,
"train_steps_per_second": 0.021
}
],
"logging_steps": 1.0,
"max_steps": 35,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 18,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 39867492466688.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}