kevinoli's picture
Training in progress, step 25500, checkpoint
8d96966 verified
{
"best_metric": 0.4704216718673706,
"best_model_checkpoint": "./output/clip-finetuned-csu-p14-336-e4l57-l/checkpoint-25000",
"epoch": 2.7197098976109215,
"eval_steps": 500,
"global_step": 25500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05332764505119454,
"grad_norm": 414.2552795410156,
"learning_rate": 4.911120591581342e-07,
"loss": 0.3809,
"step": 500
},
{
"epoch": 0.05332764505119454,
"eval_loss": 1.1109352111816406,
"eval_runtime": 63.6734,
"eval_samples_per_second": 15.501,
"eval_steps_per_second": 1.947,
"step": 500
},
{
"epoch": 0.10665529010238908,
"grad_norm": 45.880027770996094,
"learning_rate": 4.822241183162685e-07,
"loss": 0.2649,
"step": 1000
},
{
"epoch": 0.10665529010238908,
"eval_loss": 0.9588962197303772,
"eval_runtime": 62.6825,
"eval_samples_per_second": 15.746,
"eval_steps_per_second": 1.978,
"step": 1000
},
{
"epoch": 0.1599829351535836,
"grad_norm": 432.9915466308594,
"learning_rate": 4.733361774744027e-07,
"loss": 0.2104,
"step": 1500
},
{
"epoch": 0.1599829351535836,
"eval_loss": 0.90110844373703,
"eval_runtime": 62.6757,
"eval_samples_per_second": 15.748,
"eval_steps_per_second": 1.978,
"step": 1500
},
{
"epoch": 0.21331058020477817,
"grad_norm": 0.014208819717168808,
"learning_rate": 4.6444823663253695e-07,
"loss": 0.1849,
"step": 2000
},
{
"epoch": 0.21331058020477817,
"eval_loss": 0.8570474982261658,
"eval_runtime": 63.7296,
"eval_samples_per_second": 15.487,
"eval_steps_per_second": 1.946,
"step": 2000
},
{
"epoch": 0.2666382252559727,
"grad_norm": 174.73324584960938,
"learning_rate": 4.5556029579067116e-07,
"loss": 0.2056,
"step": 2500
},
{
"epoch": 0.2666382252559727,
"eval_loss": 0.7895939350128174,
"eval_runtime": 63.8387,
"eval_samples_per_second": 15.461,
"eval_steps_per_second": 1.942,
"step": 2500
},
{
"epoch": 0.3199658703071672,
"grad_norm": 9.852073823424234e-09,
"learning_rate": 4.4667235494880547e-07,
"loss": 0.159,
"step": 3000
},
{
"epoch": 0.3199658703071672,
"eval_loss": 0.7647623419761658,
"eval_runtime": 63.9155,
"eval_samples_per_second": 15.442,
"eval_steps_per_second": 1.94,
"step": 3000
},
{
"epoch": 0.37329351535836175,
"grad_norm": 0.0025215104687958956,
"learning_rate": 4.377844141069397e-07,
"loss": 0.1696,
"step": 3500
},
{
"epoch": 0.37329351535836175,
"eval_loss": 0.7638036012649536,
"eval_runtime": 63.8003,
"eval_samples_per_second": 15.47,
"eval_steps_per_second": 1.944,
"step": 3500
},
{
"epoch": 0.42662116040955633,
"grad_norm": 2.4611830711364746,
"learning_rate": 4.2889647326507393e-07,
"loss": 0.1125,
"step": 4000
},
{
"epoch": 0.42662116040955633,
"eval_loss": 0.7486374378204346,
"eval_runtime": 63.9847,
"eval_samples_per_second": 15.426,
"eval_steps_per_second": 1.938,
"step": 4000
},
{
"epoch": 0.47994880546075086,
"grad_norm": 5.230295658111572,
"learning_rate": 4.2000853242320814e-07,
"loss": 0.1322,
"step": 4500
},
{
"epoch": 0.47994880546075086,
"eval_loss": 0.7251659035682678,
"eval_runtime": 64.4782,
"eval_samples_per_second": 15.308,
"eval_steps_per_second": 1.923,
"step": 4500
},
{
"epoch": 0.5332764505119454,
"grad_norm": 3.821002974291332e-05,
"learning_rate": 4.1112059158134245e-07,
"loss": 0.1633,
"step": 5000
},
{
"epoch": 0.5332764505119454,
"eval_loss": 0.7035414576530457,
"eval_runtime": 62.6186,
"eval_samples_per_second": 15.762,
"eval_steps_per_second": 1.98,
"step": 5000
},
{
"epoch": 0.58660409556314,
"grad_norm": 5.916939244343666e-06,
"learning_rate": 4.0223265073947665e-07,
"loss": 0.125,
"step": 5500
},
{
"epoch": 0.58660409556314,
"eval_loss": 0.6792380809783936,
"eval_runtime": 65.0551,
"eval_samples_per_second": 15.172,
"eval_steps_per_second": 1.906,
"step": 5500
},
{
"epoch": 0.6399317406143344,
"grad_norm": 92.8386459350586,
"learning_rate": 3.933447098976109e-07,
"loss": 0.1297,
"step": 6000
},
{
"epoch": 0.6399317406143344,
"eval_loss": 0.6549482345581055,
"eval_runtime": 64.0827,
"eval_samples_per_second": 15.402,
"eval_steps_per_second": 1.935,
"step": 6000
},
{
"epoch": 0.693259385665529,
"grad_norm": 0.00014717792510055006,
"learning_rate": 3.8445676905574517e-07,
"loss": 0.1114,
"step": 6500
},
{
"epoch": 0.693259385665529,
"eval_loss": 0.6223254799842834,
"eval_runtime": 64.89,
"eval_samples_per_second": 15.21,
"eval_steps_per_second": 1.911,
"step": 6500
},
{
"epoch": 0.7465870307167235,
"grad_norm": 4.789559397977428e-07,
"learning_rate": 3.755688282138794e-07,
"loss": 0.144,
"step": 7000
},
{
"epoch": 0.7465870307167235,
"eval_loss": 0.6204637289047241,
"eval_runtime": 62.7674,
"eval_samples_per_second": 15.725,
"eval_steps_per_second": 1.976,
"step": 7000
},
{
"epoch": 0.7999146757679181,
"grad_norm": 1.7078508138656616,
"learning_rate": 3.6668088737201363e-07,
"loss": 0.1399,
"step": 7500
},
{
"epoch": 0.7999146757679181,
"eval_loss": 0.6074371933937073,
"eval_runtime": 63.4694,
"eval_samples_per_second": 15.551,
"eval_steps_per_second": 1.954,
"step": 7500
},
{
"epoch": 0.8532423208191127,
"grad_norm": 870.1183471679688,
"learning_rate": 3.5779294653014783e-07,
"loss": 0.141,
"step": 8000
},
{
"epoch": 0.8532423208191127,
"eval_loss": 0.5950366854667664,
"eval_runtime": 62.915,
"eval_samples_per_second": 15.688,
"eval_steps_per_second": 1.971,
"step": 8000
},
{
"epoch": 0.9065699658703071,
"grad_norm": 5.781071013188921e-05,
"learning_rate": 3.4890500568828214e-07,
"loss": 0.1291,
"step": 8500
},
{
"epoch": 0.9065699658703071,
"eval_loss": 0.5800224542617798,
"eval_runtime": 62.3336,
"eval_samples_per_second": 15.834,
"eval_steps_per_second": 1.989,
"step": 8500
},
{
"epoch": 0.9598976109215017,
"grad_norm": 1.3669992685317993,
"learning_rate": 3.4001706484641635e-07,
"loss": 0.1246,
"step": 9000
},
{
"epoch": 0.9598976109215017,
"eval_loss": 0.5702486038208008,
"eval_runtime": 64.055,
"eval_samples_per_second": 15.409,
"eval_steps_per_second": 1.936,
"step": 9000
},
{
"epoch": 1.0132252559726962,
"grad_norm": 644.68115234375,
"learning_rate": 3.311291240045506e-07,
"loss": 0.1257,
"step": 9500
},
{
"epoch": 1.0132252559726962,
"eval_loss": 0.5807780623435974,
"eval_runtime": 62.5312,
"eval_samples_per_second": 15.784,
"eval_steps_per_second": 1.983,
"step": 9500
},
{
"epoch": 1.0665529010238908,
"grad_norm": 27.854568481445312,
"learning_rate": 3.2224118316268486e-07,
"loss": 0.0336,
"step": 10000
},
{
"epoch": 1.0665529010238908,
"eval_loss": 0.5713164210319519,
"eval_runtime": 62.5622,
"eval_samples_per_second": 15.776,
"eval_steps_per_second": 1.982,
"step": 10000
},
{
"epoch": 1.1198805460750854,
"grad_norm": 0.00045933027286082506,
"learning_rate": 3.133532423208191e-07,
"loss": 0.0663,
"step": 10500
},
{
"epoch": 1.1198805460750854,
"eval_loss": 0.5601483583450317,
"eval_runtime": 63.4675,
"eval_samples_per_second": 15.551,
"eval_steps_per_second": 1.954,
"step": 10500
},
{
"epoch": 1.17320819112628,
"grad_norm": 0.14330387115478516,
"learning_rate": 3.044653014789533e-07,
"loss": 0.078,
"step": 11000
},
{
"epoch": 1.17320819112628,
"eval_loss": 0.5580261945724487,
"eval_runtime": 63.3927,
"eval_samples_per_second": 15.57,
"eval_steps_per_second": 1.956,
"step": 11000
},
{
"epoch": 1.2265358361774745,
"grad_norm": 4.066005518388316e-11,
"learning_rate": 2.955773606370876e-07,
"loss": 0.1118,
"step": 11500
},
{
"epoch": 1.2265358361774745,
"eval_loss": 0.5491370558738708,
"eval_runtime": 63.7293,
"eval_samples_per_second": 15.487,
"eval_steps_per_second": 1.946,
"step": 11500
},
{
"epoch": 1.2798634812286689,
"grad_norm": 7.519358769059181e-05,
"learning_rate": 2.8668941979522184e-07,
"loss": 0.0534,
"step": 12000
},
{
"epoch": 1.2798634812286689,
"eval_loss": 0.5478764772415161,
"eval_runtime": 62.7547,
"eval_samples_per_second": 15.728,
"eval_steps_per_second": 1.976,
"step": 12000
},
{
"epoch": 1.3331911262798635,
"grad_norm": 8.400617446113756e-08,
"learning_rate": 2.778014789533561e-07,
"loss": 0.0507,
"step": 12500
},
{
"epoch": 1.3331911262798635,
"eval_loss": 0.576600968837738,
"eval_runtime": 64.301,
"eval_samples_per_second": 15.35,
"eval_steps_per_second": 1.928,
"step": 12500
},
{
"epoch": 1.386518771331058,
"grad_norm": 2.5385464130084356e-12,
"learning_rate": 2.689135381114903e-07,
"loss": 0.0784,
"step": 13000
},
{
"epoch": 1.386518771331058,
"eval_loss": 0.5729931592941284,
"eval_runtime": 63.652,
"eval_samples_per_second": 15.506,
"eval_steps_per_second": 1.948,
"step": 13000
},
{
"epoch": 1.4398464163822526,
"grad_norm": 0.08409273624420166,
"learning_rate": 2.600255972696246e-07,
"loss": 0.079,
"step": 13500
},
{
"epoch": 1.4398464163822526,
"eval_loss": 0.5694039463996887,
"eval_runtime": 62.8247,
"eval_samples_per_second": 15.71,
"eval_steps_per_second": 1.974,
"step": 13500
},
{
"epoch": 1.493174061433447,
"grad_norm": 3.7334253022436314e-08,
"learning_rate": 2.511376564277588e-07,
"loss": 0.0507,
"step": 14000
},
{
"epoch": 1.493174061433447,
"eval_loss": 0.5713907480239868,
"eval_runtime": 62.3577,
"eval_samples_per_second": 15.828,
"eval_steps_per_second": 1.989,
"step": 14000
},
{
"epoch": 1.5465017064846416,
"grad_norm": 1.5219894647598267,
"learning_rate": 2.422497155858931e-07,
"loss": 0.0774,
"step": 14500
},
{
"epoch": 1.5465017064846416,
"eval_loss": 0.5674533843994141,
"eval_runtime": 62.3753,
"eval_samples_per_second": 15.824,
"eval_steps_per_second": 1.988,
"step": 14500
},
{
"epoch": 1.5998293515358362,
"grad_norm": 879.1193237304688,
"learning_rate": 2.333617747440273e-07,
"loss": 0.0376,
"step": 15000
},
{
"epoch": 1.5998293515358362,
"eval_loss": 0.561253547668457,
"eval_runtime": 62.4788,
"eval_samples_per_second": 15.797,
"eval_steps_per_second": 1.985,
"step": 15000
},
{
"epoch": 1.6531569965870307,
"grad_norm": 0.0006221308722160757,
"learning_rate": 2.2447383390216154e-07,
"loss": 0.0929,
"step": 15500
},
{
"epoch": 1.6531569965870307,
"eval_loss": 0.5395999550819397,
"eval_runtime": 62.5651,
"eval_samples_per_second": 15.776,
"eval_steps_per_second": 1.982,
"step": 15500
},
{
"epoch": 1.7064846416382253,
"grad_norm": 37.85109329223633,
"learning_rate": 2.155858930602958e-07,
"loss": 0.0442,
"step": 16000
},
{
"epoch": 1.7064846416382253,
"eval_loss": 0.5307884216308594,
"eval_runtime": 62.4681,
"eval_samples_per_second": 15.8,
"eval_steps_per_second": 1.985,
"step": 16000
},
{
"epoch": 1.75981228668942,
"grad_norm": 9.600337885773413e-11,
"learning_rate": 2.0669795221843002e-07,
"loss": 0.0593,
"step": 16500
},
{
"epoch": 1.75981228668942,
"eval_loss": 0.5230685472488403,
"eval_runtime": 63.8124,
"eval_samples_per_second": 15.467,
"eval_steps_per_second": 1.943,
"step": 16500
},
{
"epoch": 1.8131399317406145,
"grad_norm": 9.067448940129619e-13,
"learning_rate": 1.9781001137656428e-07,
"loss": 0.0578,
"step": 17000
},
{
"epoch": 1.8131399317406145,
"eval_loss": 0.5200281739234924,
"eval_runtime": 62.3767,
"eval_samples_per_second": 15.823,
"eval_steps_per_second": 1.988,
"step": 17000
},
{
"epoch": 1.8664675767918089,
"grad_norm": 0.00843018852174282,
"learning_rate": 1.889220705346985e-07,
"loss": 0.0928,
"step": 17500
},
{
"epoch": 1.8664675767918089,
"eval_loss": 0.509168803691864,
"eval_runtime": 62.4108,
"eval_samples_per_second": 15.815,
"eval_steps_per_second": 1.987,
"step": 17500
},
{
"epoch": 1.9197952218430034,
"grad_norm": 4.264631314754297e-09,
"learning_rate": 1.8003412969283277e-07,
"loss": 0.0633,
"step": 18000
},
{
"epoch": 1.9197952218430034,
"eval_loss": 0.4954444468021393,
"eval_runtime": 63.3639,
"eval_samples_per_second": 15.577,
"eval_steps_per_second": 1.957,
"step": 18000
},
{
"epoch": 1.9731228668941978,
"grad_norm": 459.5137939453125,
"learning_rate": 1.7114618885096697e-07,
"loss": 0.067,
"step": 18500
},
{
"epoch": 1.9731228668941978,
"eval_loss": 0.49844062328338623,
"eval_runtime": 63.6573,
"eval_samples_per_second": 15.505,
"eval_steps_per_second": 1.948,
"step": 18500
},
{
"epoch": 2.0264505119453924,
"grad_norm": 0.27590689063072205,
"learning_rate": 1.6225824800910123e-07,
"loss": 0.0525,
"step": 19000
},
{
"epoch": 2.0264505119453924,
"eval_loss": 0.4999145269393921,
"eval_runtime": 63.1727,
"eval_samples_per_second": 15.624,
"eval_steps_per_second": 1.963,
"step": 19000
},
{
"epoch": 2.079778156996587,
"grad_norm": 1.349623744317796e-06,
"learning_rate": 1.533703071672355e-07,
"loss": 0.0301,
"step": 19500
},
{
"epoch": 2.079778156996587,
"eval_loss": 0.4926412105560303,
"eval_runtime": 63.4421,
"eval_samples_per_second": 15.557,
"eval_steps_per_second": 1.955,
"step": 19500
},
{
"epoch": 2.1331058020477816,
"grad_norm": 4.928138878312893e-06,
"learning_rate": 1.4448236632536972e-07,
"loss": 0.0408,
"step": 20000
},
{
"epoch": 2.1331058020477816,
"eval_loss": 0.4872625172138214,
"eval_runtime": 63.2047,
"eval_samples_per_second": 15.616,
"eval_steps_per_second": 1.962,
"step": 20000
},
{
"epoch": 2.186433447098976,
"grad_norm": 0.003605826525017619,
"learning_rate": 1.3559442548350398e-07,
"loss": 0.0489,
"step": 20500
},
{
"epoch": 2.186433447098976,
"eval_loss": 0.48834049701690674,
"eval_runtime": 63.5831,
"eval_samples_per_second": 15.523,
"eval_steps_per_second": 1.95,
"step": 20500
},
{
"epoch": 2.2397610921501707,
"grad_norm": 0.0027391298208385706,
"learning_rate": 1.267064846416382e-07,
"loss": 0.0354,
"step": 21000
},
{
"epoch": 2.2397610921501707,
"eval_loss": 0.4887964129447937,
"eval_runtime": 63.0462,
"eval_samples_per_second": 15.655,
"eval_steps_per_second": 1.967,
"step": 21000
},
{
"epoch": 2.2930887372013653,
"grad_norm": 1.5921540352792363e-07,
"learning_rate": 1.1781854379977247e-07,
"loss": 0.0667,
"step": 21500
},
{
"epoch": 2.2930887372013653,
"eval_loss": 0.4866587221622467,
"eval_runtime": 63.4143,
"eval_samples_per_second": 15.564,
"eval_steps_per_second": 1.955,
"step": 21500
},
{
"epoch": 2.34641638225256,
"grad_norm": 3.954861771404694e-09,
"learning_rate": 1.0893060295790671e-07,
"loss": 0.0315,
"step": 22000
},
{
"epoch": 2.34641638225256,
"eval_loss": 0.48805058002471924,
"eval_runtime": 63.4731,
"eval_samples_per_second": 15.55,
"eval_steps_per_second": 1.954,
"step": 22000
},
{
"epoch": 2.399744027303754,
"grad_norm": 1.405540217547241e-07,
"learning_rate": 1.0004266211604096e-07,
"loss": 0.0579,
"step": 22500
},
{
"epoch": 2.399744027303754,
"eval_loss": 0.48956525325775146,
"eval_runtime": 63.5675,
"eval_samples_per_second": 15.527,
"eval_steps_per_second": 1.951,
"step": 22500
},
{
"epoch": 2.453071672354949,
"grad_norm": 2.2330443698592717e-06,
"learning_rate": 9.11547212741752e-08,
"loss": 0.0387,
"step": 23000
},
{
"epoch": 2.453071672354949,
"eval_loss": 0.48340705037117004,
"eval_runtime": 62.238,
"eval_samples_per_second": 15.858,
"eval_steps_per_second": 1.992,
"step": 23000
},
{
"epoch": 2.506399317406143,
"grad_norm": 0.000771363964304328,
"learning_rate": 8.226678043230943e-08,
"loss": 0.0495,
"step": 23500
},
{
"epoch": 2.506399317406143,
"eval_loss": 0.48572421073913574,
"eval_runtime": 62.5175,
"eval_samples_per_second": 15.788,
"eval_steps_per_second": 1.983,
"step": 23500
},
{
"epoch": 2.5597269624573378,
"grad_norm": 4.620137588062789e-06,
"learning_rate": 7.337883959044368e-08,
"loss": 0.0364,
"step": 24000
},
{
"epoch": 2.5597269624573378,
"eval_loss": 0.4860812723636627,
"eval_runtime": 64.1674,
"eval_samples_per_second": 15.382,
"eval_steps_per_second": 1.932,
"step": 24000
},
{
"epoch": 2.6130546075085324,
"grad_norm": 0.002093038521707058,
"learning_rate": 6.449089874857792e-08,
"loss": 0.0567,
"step": 24500
},
{
"epoch": 2.6130546075085324,
"eval_loss": 0.47645464539527893,
"eval_runtime": 62.5764,
"eval_samples_per_second": 15.773,
"eval_steps_per_second": 1.982,
"step": 24500
},
{
"epoch": 2.666382252559727,
"grad_norm": 0.06298387795686722,
"learning_rate": 5.560295790671217e-08,
"loss": 0.0293,
"step": 25000
},
{
"epoch": 2.666382252559727,
"eval_loss": 0.4704216718673706,
"eval_runtime": 62.5161,
"eval_samples_per_second": 15.788,
"eval_steps_per_second": 1.983,
"step": 25000
},
{
"epoch": 2.7197098976109215,
"grad_norm": 8.860251546138898e-05,
"learning_rate": 4.6715017064846414e-08,
"loss": 0.0423,
"step": 25500
},
{
"epoch": 2.7197098976109215,
"eval_loss": 0.4728304445743561,
"eval_runtime": 64.3471,
"eval_samples_per_second": 15.339,
"eval_steps_per_second": 1.927,
"step": 25500
}
],
"logging_steps": 500,
"max_steps": 28128,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 9174557473832520.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}