{ "best_metric": 0.38235806339254613, "best_model_checkpoint": "/groups/claytonm/enoriega/kw_pubmed/kw_pubmed_1000_0.000006/checkpoint-108", "epoch": 3.283101045296167, "global_step": 148, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09, "eval_accuracy": 0.36533437826541276, "eval_loss": 4.0918426513671875, "eval_runtime": 16.5878, "eval_samples_per_second": 602.854, "eval_steps_per_second": 18.869, "step": 4 }, { "epoch": 0.11, "learning_rate": 5.92e-06, "loss": 4.4572, "step": 5 }, { "epoch": 0.17, "eval_accuracy": 0.36964472309299895, "eval_loss": 4.006958961486816, "eval_runtime": 16.4206, "eval_samples_per_second": 608.992, "eval_steps_per_second": 19.061, "step": 8 }, { "epoch": 0.22, "learning_rate": 5.786666666666667e-06, "loss": 4.0734, "step": 10 }, { "epoch": 0.26, "eval_accuracy": 0.37338906304423547, "eval_loss": 3.9496026039123535, "eval_runtime": 16.513, "eval_samples_per_second": 605.585, "eval_steps_per_second": 18.955, "step": 12 }, { "epoch": 0.33, "learning_rate": 5.653333333333333e-06, "loss": 3.8884, "step": 15 }, { "epoch": 0.35, "eval_accuracy": 0.37656739811912227, "eval_loss": 3.918774127960205, "eval_runtime": 16.5065, "eval_samples_per_second": 605.823, "eval_steps_per_second": 18.962, "step": 16 }, { "epoch": 0.44, "learning_rate": 5.5200000000000005e-06, "loss": 3.8037, "step": 20 }, { "epoch": 0.44, "eval_accuracy": 0.37495646116335773, "eval_loss": 3.9296493530273438, "eval_runtime": 16.4773, "eval_samples_per_second": 606.897, "eval_steps_per_second": 18.996, "step": 20 }, { "epoch": 0.52, "eval_accuracy": 0.37891849529780564, "eval_loss": 3.903676748275757, "eval_runtime": 16.4879, "eval_samples_per_second": 606.507, "eval_steps_per_second": 18.984, "step": 24 }, { "epoch": 0.54, "learning_rate": 5.386666666666667e-06, "loss": 3.7326, "step": 25 }, { "epoch": 0.61, "eval_accuracy": 0.3787878787878788, "eval_loss": 3.8809974193573, "eval_runtime": 16.5092, "eval_samples_per_second": 605.724, "eval_steps_per_second": 18.959, "step": 28 }, { "epoch": 0.65, "learning_rate": 5.2533333333333336e-06, "loss": 3.6766, "step": 30 }, { "epoch": 0.7, "eval_accuracy": 0.3770463253221874, "eval_loss": 3.8815793991088867, "eval_runtime": 16.4832, "eval_samples_per_second": 606.677, "eval_steps_per_second": 18.989, "step": 32 }, { "epoch": 0.76, "learning_rate": 5.12e-06, "loss": 3.6152, "step": 35 }, { "epoch": 0.78, "eval_accuracy": 0.3797892720306513, "eval_loss": 3.8686890602111816, "eval_runtime": 16.4735, "eval_samples_per_second": 607.035, "eval_steps_per_second": 19.0, "step": 36 }, { "epoch": 0.87, "learning_rate": 4.986666666666667e-06, "loss": 3.6143, "step": 40 }, { "epoch": 0.87, "eval_accuracy": 0.3770463253221874, "eval_loss": 3.874457597732544, "eval_runtime": 16.4674, "eval_samples_per_second": 607.26, "eval_steps_per_second": 19.007, "step": 40 }, { "epoch": 0.96, "eval_accuracy": 0.3777429467084639, "eval_loss": 3.872670888900757, "eval_runtime": 16.447, "eval_samples_per_second": 608.012, "eval_steps_per_second": 19.031, "step": 44 }, { "epoch": 0.98, "learning_rate": 4.853333333333333e-06, "loss": 3.54, "step": 45 }, { "epoch": 1.07, "eval_accuracy": 0.3747387669801463, "eval_loss": 3.882456064224243, "eval_runtime": 16.5148, "eval_samples_per_second": 605.517, "eval_steps_per_second": 18.953, "step": 48 }, { "epoch": 1.11, "learning_rate": 4.72e-06, "loss": 4.1293, "step": 50 }, { "epoch": 1.15, "eval_accuracy": 0.38079066527342387, "eval_loss": 3.8683102130889893, "eval_runtime": 16.4717, "eval_samples_per_second": 607.102, "eval_steps_per_second": 19.002, "step": 52 }, { "epoch": 1.22, "learning_rate": 4.586666666666667e-06, "loss": 3.4379, "step": 55 }, { "epoch": 1.24, "eval_accuracy": 0.37983281086729365, "eval_loss": 3.8646554946899414, "eval_runtime": 16.4967, "eval_samples_per_second": 606.182, "eval_steps_per_second": 18.973, "step": 56 }, { "epoch": 1.33, "learning_rate": 4.453333333333334e-06, "loss": 3.4215, "step": 60 }, { "epoch": 1.33, "eval_accuracy": 0.3801375827237896, "eval_loss": 3.8663816452026367, "eval_runtime": 16.4695, "eval_samples_per_second": 607.185, "eval_steps_per_second": 19.005, "step": 60 }, { "epoch": 1.41, "eval_accuracy": 0.3804858934169279, "eval_loss": 3.8687705993652344, "eval_runtime": 16.462, "eval_samples_per_second": 607.461, "eval_steps_per_second": 19.014, "step": 64 }, { "epoch": 1.44, "learning_rate": 4.32e-06, "loss": 3.3448, "step": 65 }, { "epoch": 1.5, "eval_accuracy": 0.38140020898641586, "eval_loss": 3.876781463623047, "eval_runtime": 16.4781, "eval_samples_per_second": 606.866, "eval_steps_per_second": 18.995, "step": 68 }, { "epoch": 1.54, "learning_rate": 4.186666666666667e-06, "loss": 3.3324, "step": 70 }, { "epoch": 1.59, "eval_accuracy": 0.37717694183211425, "eval_loss": 3.913390636444092, "eval_runtime": 16.4943, "eval_samples_per_second": 606.268, "eval_steps_per_second": 18.976, "step": 72 }, { "epoch": 1.65, "learning_rate": 4.053333333333333e-06, "loss": 3.3088, "step": 75 }, { "epoch": 1.68, "eval_accuracy": 0.3799634273772205, "eval_loss": 3.882460832595825, "eval_runtime": 16.4932, "eval_samples_per_second": 606.311, "eval_steps_per_second": 18.978, "step": 76 }, { "epoch": 1.76, "learning_rate": 3.92e-06, "loss": 3.2765, "step": 80 }, { "epoch": 1.76, "eval_accuracy": 0.37787356321839083, "eval_loss": 3.8959219455718994, "eval_runtime": 16.5043, "eval_samples_per_second": 605.902, "eval_steps_per_second": 18.965, "step": 80 }, { "epoch": 1.85, "eval_accuracy": 0.37913618948101707, "eval_loss": 3.9021382331848145, "eval_runtime": 16.4868, "eval_samples_per_second": 606.546, "eval_steps_per_second": 18.985, "step": 84 }, { "epoch": 1.87, "learning_rate": 3.7866666666666667e-06, "loss": 3.2439, "step": 85 }, { "epoch": 1.94, "eval_accuracy": 0.38148728665970044, "eval_loss": 3.8731861114501953, "eval_runtime": 16.4672, "eval_samples_per_second": 607.269, "eval_steps_per_second": 19.008, "step": 88 }, { "epoch": 1.98, "learning_rate": 3.6533333333333336e-06, "loss": 3.2546, "step": 90 }, { "epoch": 2.04, "eval_accuracy": 0.38044235458028564, "eval_loss": 3.8909058570861816, "eval_runtime": 16.4399, "eval_samples_per_second": 608.277, "eval_steps_per_second": 19.039, "step": 92 }, { "epoch": 2.11, "learning_rate": 3.52e-06, "loss": 3.818, "step": 95 }, { "epoch": 2.13, "eval_accuracy": 0.3787443399512365, "eval_loss": 3.917863368988037, "eval_runtime": 16.4722, "eval_samples_per_second": 607.083, "eval_steps_per_second": 19.002, "step": 96 }, { "epoch": 2.22, "learning_rate": 3.3866666666666667e-06, "loss": 3.1567, "step": 100 }, { "epoch": 2.22, "eval_accuracy": 0.3803988157436433, "eval_loss": 3.906223773956299, "eval_runtime": 16.5109, "eval_samples_per_second": 605.66, "eval_steps_per_second": 18.957, "step": 100 }, { "epoch": 2.3, "eval_accuracy": 0.3793974225008708, "eval_loss": 3.9190924167633057, "eval_runtime": 16.4677, "eval_samples_per_second": 607.249, "eval_steps_per_second": 19.007, "step": 104 }, { "epoch": 2.33, "learning_rate": 3.2533333333333337e-06, "loss": 3.1563, "step": 105 }, { "epoch": 2.39, "eval_accuracy": 0.38235806339254613, "eval_loss": 3.8962931632995605, "eval_runtime": 16.4504, "eval_samples_per_second": 607.888, "eval_steps_per_second": 19.027, "step": 108 }, { "epoch": 2.44, "learning_rate": 3.12e-06, "loss": 3.1417, "step": 110 }, { "epoch": 2.48, "eval_accuracy": 0.37948450017415536, "eval_loss": 3.924445152282715, "eval_runtime": 16.4336, "eval_samples_per_second": 608.508, "eval_steps_per_second": 19.046, "step": 112 }, { "epoch": 2.54, "learning_rate": 2.9866666666666667e-06, "loss": 3.1247, "step": 115 }, { "epoch": 2.57, "eval_accuracy": 0.3790055729710902, "eval_loss": 3.9178736209869385, "eval_runtime": 16.4751, "eval_samples_per_second": 606.976, "eval_steps_per_second": 18.998, "step": 116 }, { "epoch": 2.65, "learning_rate": 2.8533333333333333e-06, "loss": 3.1307, "step": 120 }, { "epoch": 2.65, "eval_accuracy": 0.3778300243817485, "eval_loss": 3.939385175704956, "eval_runtime": 16.4922, "eval_samples_per_second": 606.346, "eval_steps_per_second": 18.979, "step": 120 }, { "epoch": 2.74, "eval_accuracy": 0.38179205851619646, "eval_loss": 3.9248249530792236, "eval_runtime": 16.503, "eval_samples_per_second": 605.949, "eval_steps_per_second": 18.966, "step": 124 }, { "epoch": 2.76, "learning_rate": 2.72e-06, "loss": 3.0893, "step": 125 }, { "epoch": 2.83, "eval_accuracy": 0.37752525252525254, "eval_loss": 3.94762921333313, "eval_runtime": 16.4815, "eval_samples_per_second": 606.74, "eval_steps_per_second": 18.991, "step": 128 }, { "epoch": 2.87, "learning_rate": 2.5866666666666667e-06, "loss": 3.0906, "step": 130 }, { "epoch": 2.91, "eval_accuracy": 0.3811825148032045, "eval_loss": 3.9266726970672607, "eval_runtime": 16.4398, "eval_samples_per_second": 608.279, "eval_steps_per_second": 19.039, "step": 132 }, { "epoch": 2.98, "learning_rate": 2.4533333333333337e-06, "loss": 3.0575, "step": 135 }, { "epoch": 3.02, "eval_accuracy": 0.37813479623824453, "eval_loss": 3.937913417816162, "eval_runtime": 16.4796, "eval_samples_per_second": 606.812, "eval_steps_per_second": 18.993, "step": 136 }, { "epoch": 3.11, "learning_rate": 2.32e-06, "loss": 3.6148, "step": 140 }, { "epoch": 3.11, "eval_accuracy": 0.3803988157436433, "eval_loss": 3.9217216968536377, "eval_runtime": 16.4193, "eval_samples_per_second": 609.04, "eval_steps_per_second": 19.063, "step": 140 }, { "epoch": 3.2, "eval_accuracy": 0.37861372344130967, "eval_loss": 3.934666156768799, "eval_runtime": 16.4991, "eval_samples_per_second": 606.093, "eval_steps_per_second": 18.971, "step": 144 }, { "epoch": 3.22, "learning_rate": 2.1866666666666668e-06, "loss": 3.035, "step": 145 }, { "epoch": 3.28, "eval_accuracy": 0.37944096133751304, "eval_loss": 3.9348745346069336, "eval_runtime": 16.4542, "eval_samples_per_second": 607.749, "eval_steps_per_second": 19.023, "step": 148 }, { "epoch": 3.28, "step": 148, "total_flos": 9.09876408404562e+16, "train_loss": 3.4590183206506677, "train_runtime": 5524.5303, "train_samples_per_second": 332.471, "train_steps_per_second": 0.041 }, { "epoch": 3.28, "eval_accuracy": 0.38079066527342387, "eval_loss": 3.901303291320801, "eval_runtime": 16.4247, "eval_samples_per_second": 608.84, "eval_steps_per_second": 19.057, "step": 148 } ], "max_steps": 225, "num_train_epochs": 5, "total_flos": 9.09876408404562e+16, "trial_name": null, "trial_params": null }