{ "best_metric": 0.4718119204044342, "best_model_checkpoint": "./output/clip-finetuned-csu-p14-336-e3l57-l/checkpoint-26000", "epoch": 2.8263651877133107, "eval_steps": 500, "global_step": 26500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05332764505119454, "grad_norm": 414.62841796875, "learning_rate": 4.911120591581342e-07, "loss": 0.3812, "step": 500 }, { "epoch": 0.05332764505119454, "eval_loss": 1.1163015365600586, "eval_runtime": 61.8962, "eval_samples_per_second": 15.946, "eval_steps_per_second": 2.003, "step": 500 }, { "epoch": 0.10665529010238908, "grad_norm": 34.08418655395508, "learning_rate": 4.822241183162685e-07, "loss": 0.2683, "step": 1000 }, { "epoch": 0.10665529010238908, "eval_loss": 0.9684447646141052, "eval_runtime": 61.821, "eval_samples_per_second": 15.965, "eval_steps_per_second": 2.006, "step": 1000 }, { "epoch": 0.1599829351535836, "grad_norm": 350.4574890136719, "learning_rate": 4.733361774744027e-07, "loss": 0.2119, "step": 1500 }, { "epoch": 0.1599829351535836, "eval_loss": 0.9099885821342468, "eval_runtime": 61.9606, "eval_samples_per_second": 15.929, "eval_steps_per_second": 2.001, "step": 1500 }, { "epoch": 0.21331058020477817, "grad_norm": 0.01787523180246353, "learning_rate": 4.6444823663253695e-07, "loss": 0.1889, "step": 2000 }, { "epoch": 0.21331058020477817, "eval_loss": 0.8620074987411499, "eval_runtime": 63.2824, "eval_samples_per_second": 15.597, "eval_steps_per_second": 1.959, "step": 2000 }, { "epoch": 0.2666382252559727, "grad_norm": 118.66907501220703, "learning_rate": 4.5556029579067116e-07, "loss": 0.2071, "step": 2500 }, { "epoch": 0.2666382252559727, "eval_loss": 0.791786253452301, "eval_runtime": 63.7932, "eval_samples_per_second": 15.472, "eval_steps_per_second": 1.944, "step": 2500 }, { "epoch": 0.3199658703071672, "grad_norm": 7.21087323185543e-09, "learning_rate": 4.4667235494880547e-07, "loss": 0.1588, "step": 3000 }, { "epoch": 0.3199658703071672, "eval_loss": 0.7657254338264465, "eval_runtime": 62.4738, "eval_samples_per_second": 15.799, "eval_steps_per_second": 1.985, "step": 3000 }, { "epoch": 0.37329351535836175, "grad_norm": 0.0014836661284789443, "learning_rate": 4.377844141069397e-07, "loss": 0.1718, "step": 3500 }, { "epoch": 0.37329351535836175, "eval_loss": 0.7610095739364624, "eval_runtime": 63.9118, "eval_samples_per_second": 15.443, "eval_steps_per_second": 1.94, "step": 3500 }, { "epoch": 0.42662116040955633, "grad_norm": 3.2739710807800293, "learning_rate": 4.2889647326507393e-07, "loss": 0.1113, "step": 4000 }, { "epoch": 0.42662116040955633, "eval_loss": 0.7458393573760986, "eval_runtime": 62.1803, "eval_samples_per_second": 15.873, "eval_steps_per_second": 1.994, "step": 4000 }, { "epoch": 0.47994880546075086, "grad_norm": 1.788787841796875, "learning_rate": 4.2000853242320814e-07, "loss": 0.1313, "step": 4500 }, { "epoch": 0.47994880546075086, "eval_loss": 0.7168156504631042, "eval_runtime": 62.1477, "eval_samples_per_second": 15.882, "eval_steps_per_second": 1.995, "step": 4500 }, { "epoch": 0.5332764505119454, "grad_norm": 4.031916250823997e-05, "learning_rate": 4.1112059158134245e-07, "loss": 0.1649, "step": 5000 }, { "epoch": 0.5332764505119454, "eval_loss": 0.7019046545028687, "eval_runtime": 62.2092, "eval_samples_per_second": 15.866, "eval_steps_per_second": 1.993, "step": 5000 }, { "epoch": 0.58660409556314, "grad_norm": 3.883213139488362e-06, "learning_rate": 4.0223265073947665e-07, "loss": 0.1245, "step": 5500 }, { "epoch": 0.58660409556314, "eval_loss": 0.6812178492546082, "eval_runtime": 62.492, "eval_samples_per_second": 15.794, "eval_steps_per_second": 1.984, "step": 5500 }, { "epoch": 0.6399317406143344, "grad_norm": 80.17021942138672, "learning_rate": 3.933447098976109e-07, "loss": 0.1286, "step": 6000 }, { "epoch": 0.6399317406143344, "eval_loss": 0.6501584649085999, "eval_runtime": 62.5236, "eval_samples_per_second": 15.786, "eval_steps_per_second": 1.983, "step": 6000 }, { "epoch": 0.693259385665529, "grad_norm": 0.0001418297761119902, "learning_rate": 3.8445676905574517e-07, "loss": 0.1076, "step": 6500 }, { "epoch": 0.693259385665529, "eval_loss": 0.6153982281684875, "eval_runtime": 62.2927, "eval_samples_per_second": 15.845, "eval_steps_per_second": 1.991, "step": 6500 }, { "epoch": 0.7465870307167235, "grad_norm": 5.804526495012396e-07, "learning_rate": 3.755688282138794e-07, "loss": 0.1477, "step": 7000 }, { "epoch": 0.7465870307167235, "eval_loss": 0.6118016839027405, "eval_runtime": 63.1588, "eval_samples_per_second": 15.627, "eval_steps_per_second": 1.963, "step": 7000 }, { "epoch": 0.7999146757679181, "grad_norm": 2.66489315032959, "learning_rate": 3.6668088737201363e-07, "loss": 0.1315, "step": 7500 }, { "epoch": 0.7999146757679181, "eval_loss": 0.601563036441803, "eval_runtime": 62.9163, "eval_samples_per_second": 15.688, "eval_steps_per_second": 1.971, "step": 7500 }, { "epoch": 0.8532423208191127, "grad_norm": 851.2547607421875, "learning_rate": 3.5779294653014783e-07, "loss": 0.1413, "step": 8000 }, { "epoch": 0.8532423208191127, "eval_loss": 0.584888756275177, "eval_runtime": 62.6735, "eval_samples_per_second": 15.748, "eval_steps_per_second": 1.979, "step": 8000 }, { "epoch": 0.9065699658703071, "grad_norm": 6.041275628376752e-05, "learning_rate": 3.4890500568828214e-07, "loss": 0.124, "step": 8500 }, { "epoch": 0.9065699658703071, "eval_loss": 0.5766209363937378, "eval_runtime": 63.3493, "eval_samples_per_second": 15.58, "eval_steps_per_second": 1.957, "step": 8500 }, { "epoch": 0.9598976109215017, "grad_norm": 3.0901598930358887, "learning_rate": 3.4001706484641635e-07, "loss": 0.1215, "step": 9000 }, { "epoch": 0.9598976109215017, "eval_loss": 0.5558739304542542, "eval_runtime": 63.0862, "eval_samples_per_second": 15.645, "eval_steps_per_second": 1.966, "step": 9000 }, { "epoch": 1.0132252559726962, "grad_norm": 2509.93212890625, "learning_rate": 3.311291240045506e-07, "loss": 0.131, "step": 9500 }, { "epoch": 1.0132252559726962, "eval_loss": 0.563326358795166, "eval_runtime": 62.3607, "eval_samples_per_second": 15.827, "eval_steps_per_second": 1.988, "step": 9500 }, { "epoch": 1.0665529010238908, "grad_norm": 32.05735778808594, "learning_rate": 3.2224118316268486e-07, "loss": 0.0348, "step": 10000 }, { "epoch": 1.0665529010238908, "eval_loss": 0.553093671798706, "eval_runtime": 62.9895, "eval_samples_per_second": 15.669, "eval_steps_per_second": 1.969, "step": 10000 }, { "epoch": 1.1198805460750854, "grad_norm": 0.004324838053435087, "learning_rate": 3.133532423208191e-07, "loss": 0.0687, "step": 10500 }, { "epoch": 1.1198805460750854, "eval_loss": 0.5494681000709534, "eval_runtime": 62.3211, "eval_samples_per_second": 15.837, "eval_steps_per_second": 1.99, "step": 10500 }, { "epoch": 1.17320819112628, "grad_norm": 0.09391242265701294, "learning_rate": 3.044653014789533e-07, "loss": 0.0749, "step": 11000 }, { "epoch": 1.17320819112628, "eval_loss": 0.5473881363868713, "eval_runtime": 62.3454, "eval_samples_per_second": 15.831, "eval_steps_per_second": 1.989, "step": 11000 }, { "epoch": 1.2265358361774745, "grad_norm": 1.9322777752850406e-11, "learning_rate": 2.955773606370876e-07, "loss": 0.1083, "step": 11500 }, { "epoch": 1.2265358361774745, "eval_loss": 0.541618287563324, "eval_runtime": 63.1868, "eval_samples_per_second": 15.62, "eval_steps_per_second": 1.962, "step": 11500 }, { "epoch": 1.2798634812286689, "grad_norm": 0.0002804399700835347, "learning_rate": 2.8668941979522184e-07, "loss": 0.0485, "step": 12000 }, { "epoch": 1.2798634812286689, "eval_loss": 0.5412191152572632, "eval_runtime": 62.9217, "eval_samples_per_second": 15.686, "eval_steps_per_second": 1.971, "step": 12000 }, { "epoch": 1.3331911262798635, "grad_norm": 8.90180089641035e-08, "learning_rate": 2.778014789533561e-07, "loss": 0.0449, "step": 12500 }, { "epoch": 1.3331911262798635, "eval_loss": 0.5510581731796265, "eval_runtime": 62.525, "eval_samples_per_second": 15.786, "eval_steps_per_second": 1.983, "step": 12500 }, { "epoch": 1.386518771331058, "grad_norm": 1.0178096429905303e-12, "learning_rate": 2.689135381114903e-07, "loss": 0.0847, "step": 13000 }, { "epoch": 1.386518771331058, "eval_loss": 0.5492247939109802, "eval_runtime": 62.8314, "eval_samples_per_second": 15.709, "eval_steps_per_second": 1.974, "step": 13000 }, { "epoch": 1.4398464163822526, "grad_norm": 0.06238893046975136, "learning_rate": 2.600255972696246e-07, "loss": 0.0702, "step": 13500 }, { "epoch": 1.4398464163822526, "eval_loss": 0.5508909821510315, "eval_runtime": 63.08, "eval_samples_per_second": 15.647, "eval_steps_per_second": 1.966, "step": 13500 }, { "epoch": 1.493174061433447, "grad_norm": 3.558750449883519e-08, "learning_rate": 2.511376564277588e-07, "loss": 0.0487, "step": 14000 }, { "epoch": 1.493174061433447, "eval_loss": 0.544665515422821, "eval_runtime": 62.2772, "eval_samples_per_second": 15.849, "eval_steps_per_second": 1.991, "step": 14000 }, { "epoch": 1.5465017064846416, "grad_norm": 0.29742875695228577, "learning_rate": 2.422497155858931e-07, "loss": 0.072, "step": 14500 }, { "epoch": 1.5465017064846416, "eval_loss": 0.5490172505378723, "eval_runtime": 62.363, "eval_samples_per_second": 15.827, "eval_steps_per_second": 1.988, "step": 14500 }, { "epoch": 1.5998293515358362, "grad_norm": 664.765380859375, "learning_rate": 2.333617747440273e-07, "loss": 0.0325, "step": 15000 }, { "epoch": 1.5998293515358362, "eval_loss": 0.5443011522293091, "eval_runtime": 62.1023, "eval_samples_per_second": 15.893, "eval_steps_per_second": 1.997, "step": 15000 }, { "epoch": 1.6531569965870307, "grad_norm": 0.0024932425003498793, "learning_rate": 2.2447383390216154e-07, "loss": 0.0894, "step": 15500 }, { "epoch": 1.6531569965870307, "eval_loss": 0.5264189839363098, "eval_runtime": 62.2333, "eval_samples_per_second": 15.86, "eval_steps_per_second": 1.993, "step": 15500 }, { "epoch": 1.7064846416382253, "grad_norm": 1.1819661855697632, "learning_rate": 2.155858930602958e-07, "loss": 0.0503, "step": 16000 }, { "epoch": 1.7064846416382253, "eval_loss": 0.52089524269104, "eval_runtime": 62.2763, "eval_samples_per_second": 15.849, "eval_steps_per_second": 1.991, "step": 16000 }, { "epoch": 1.75981228668942, "grad_norm": 9.822515023572009e-11, "learning_rate": 2.0669795221843002e-07, "loss": 0.0568, "step": 16500 }, { "epoch": 1.75981228668942, "eval_loss": 0.5083423852920532, "eval_runtime": 63.6327, "eval_samples_per_second": 15.511, "eval_steps_per_second": 1.949, "step": 16500 }, { "epoch": 1.8131399317406145, "grad_norm": 9.34051013617987e-13, "learning_rate": 1.9781001137656428e-07, "loss": 0.0589, "step": 17000 }, { "epoch": 1.8131399317406145, "eval_loss": 0.5093010067939758, "eval_runtime": 62.0049, "eval_samples_per_second": 15.918, "eval_steps_per_second": 2.0, "step": 17000 }, { "epoch": 1.8664675767918089, "grad_norm": 0.00014494755305349827, "learning_rate": 1.889220705346985e-07, "loss": 0.0892, "step": 17500 }, { "epoch": 1.8664675767918089, "eval_loss": 0.4983443319797516, "eval_runtime": 62.0795, "eval_samples_per_second": 15.899, "eval_steps_per_second": 1.997, "step": 17500 }, { "epoch": 1.9197952218430034, "grad_norm": 1.963637918933614e-09, "learning_rate": 1.8003412969283277e-07, "loss": 0.0584, "step": 18000 }, { "epoch": 1.9197952218430034, "eval_loss": 0.48863324522972107, "eval_runtime": 63.6145, "eval_samples_per_second": 15.515, "eval_steps_per_second": 1.949, "step": 18000 }, { "epoch": 1.9731228668941978, "grad_norm": 386.5431823730469, "learning_rate": 1.7114618885096697e-07, "loss": 0.063, "step": 18500 }, { "epoch": 1.9731228668941978, "eval_loss": 0.4945463240146637, "eval_runtime": 62.2077, "eval_samples_per_second": 15.866, "eval_steps_per_second": 1.993, "step": 18500 }, { "epoch": 2.0264505119453924, "grad_norm": 0.10051131248474121, "learning_rate": 1.6225824800910123e-07, "loss": 0.0493, "step": 19000 }, { "epoch": 2.0264505119453924, "eval_loss": 0.4956463575363159, "eval_runtime": 62.1433, "eval_samples_per_second": 15.883, "eval_steps_per_second": 1.995, "step": 19000 }, { "epoch": 2.079778156996587, "grad_norm": 1.1283660569461063e-05, "learning_rate": 1.533703071672355e-07, "loss": 0.0246, "step": 19500 }, { "epoch": 2.079778156996587, "eval_loss": 0.48714280128479004, "eval_runtime": 63.7786, "eval_samples_per_second": 15.475, "eval_steps_per_second": 1.944, "step": 19500 }, { "epoch": 2.1331058020477816, "grad_norm": 2.3514085114584304e-06, "learning_rate": 1.4448236632536972e-07, "loss": 0.0385, "step": 20000 }, { "epoch": 2.1331058020477816, "eval_loss": 0.4830440580844879, "eval_runtime": 62.2754, "eval_samples_per_second": 15.849, "eval_steps_per_second": 1.991, "step": 20000 }, { "epoch": 2.186433447098976, "grad_norm": 0.045034199953079224, "learning_rate": 1.3559442548350398e-07, "loss": 0.0574, "step": 20500 }, { "epoch": 2.186433447098976, "eval_loss": 0.4848896861076355, "eval_runtime": 62.2583, "eval_samples_per_second": 15.853, "eval_steps_per_second": 1.992, "step": 20500 }, { "epoch": 2.2397610921501707, "grad_norm": 0.001391001744195819, "learning_rate": 1.267064846416382e-07, "loss": 0.039, "step": 21000 }, { "epoch": 2.2397610921501707, "eval_loss": 0.4871656596660614, "eval_runtime": 62.0998, "eval_samples_per_second": 15.894, "eval_steps_per_second": 1.997, "step": 21000 }, { "epoch": 2.2930887372013653, "grad_norm": 8.033894971504196e-08, "learning_rate": 1.1781854379977247e-07, "loss": 0.0653, "step": 21500 }, { "epoch": 2.2930887372013653, "eval_loss": 0.4837980270385742, "eval_runtime": 63.2915, "eval_samples_per_second": 15.595, "eval_steps_per_second": 1.959, "step": 21500 }, { "epoch": 2.34641638225256, "grad_norm": 2.780315089268015e-09, "learning_rate": 1.0893060295790671e-07, "loss": 0.0325, "step": 22000 }, { "epoch": 2.34641638225256, "eval_loss": 0.4875977635383606, "eval_runtime": 61.9485, "eval_samples_per_second": 15.933, "eval_steps_per_second": 2.002, "step": 22000 }, { "epoch": 2.399744027303754, "grad_norm": 6.230102655990777e-08, "learning_rate": 1.0004266211604096e-07, "loss": 0.0578, "step": 22500 }, { "epoch": 2.399744027303754, "eval_loss": 0.4869585931301117, "eval_runtime": 61.8532, "eval_samples_per_second": 15.957, "eval_steps_per_second": 2.005, "step": 22500 }, { "epoch": 2.453071672354949, "grad_norm": 8.999632683526215e-08, "learning_rate": 9.11547212741752e-08, "loss": 0.039, "step": 23000 }, { "epoch": 2.453071672354949, "eval_loss": 0.4805457293987274, "eval_runtime": 61.7778, "eval_samples_per_second": 15.977, "eval_steps_per_second": 2.007, "step": 23000 }, { "epoch": 2.506399317406143, "grad_norm": 0.0005359333008527756, "learning_rate": 8.226678043230943e-08, "loss": 0.0536, "step": 23500 }, { "epoch": 2.506399317406143, "eval_loss": 0.4824087619781494, "eval_runtime": 61.8986, "eval_samples_per_second": 15.945, "eval_steps_per_second": 2.003, "step": 23500 }, { "epoch": 2.5597269624573378, "grad_norm": 5.697604137822054e-05, "learning_rate": 7.337883959044368e-08, "loss": 0.0382, "step": 24000 }, { "epoch": 2.5597269624573378, "eval_loss": 0.48091721534729004, "eval_runtime": 63.5693, "eval_samples_per_second": 15.526, "eval_steps_per_second": 1.951, "step": 24000 }, { "epoch": 2.6130546075085324, "grad_norm": 0.03709675371646881, "learning_rate": 6.449089874857792e-08, "loss": 0.0479, "step": 24500 }, { "epoch": 2.6130546075085324, "eval_loss": 0.4748595654964447, "eval_runtime": 62.2799, "eval_samples_per_second": 15.848, "eval_steps_per_second": 1.991, "step": 24500 }, { "epoch": 2.666382252559727, "grad_norm": 0.06212961673736572, "learning_rate": 5.560295790671217e-08, "loss": 0.0268, "step": 25000 }, { "epoch": 2.666382252559727, "eval_loss": 0.47227105498313904, "eval_runtime": 62.2013, "eval_samples_per_second": 15.868, "eval_steps_per_second": 1.994, "step": 25000 }, { "epoch": 2.7197098976109215, "grad_norm": 3.1343068258138373e-05, "learning_rate": 4.6715017064846414e-08, "loss": 0.0406, "step": 25500 }, { "epoch": 2.7197098976109215, "eval_loss": 0.47432422637939453, "eval_runtime": 63.8047, "eval_samples_per_second": 15.469, "eval_steps_per_second": 1.943, "step": 25500 }, { "epoch": 2.773037542662116, "grad_norm": 0.016286909580230713, "learning_rate": 3.782707622298066e-08, "loss": 0.0349, "step": 26000 }, { "epoch": 2.773037542662116, "eval_loss": 0.4718119204044342, "eval_runtime": 62.273, "eval_samples_per_second": 15.85, "eval_steps_per_second": 1.991, "step": 26000 }, { "epoch": 2.8263651877133107, "grad_norm": 9.701019007479772e-05, "learning_rate": 2.8939135381114903e-08, "loss": 0.017, "step": 26500 }, { "epoch": 2.8263651877133107, "eval_loss": 0.4721411168575287, "eval_runtime": 62.174, "eval_samples_per_second": 15.875, "eval_steps_per_second": 1.994, "step": 26500 } ], "logging_steps": 500, "max_steps": 28128, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 9534315825092520.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }