{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.999966663333, "global_step": 7499, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.013, "learning_rate": 1.7006980333014778e-05, "loss": 0.6595, "r_loss": 0.0, "step": 100, "steps": 100 }, { "epoch": 0.027, "learning_rate": 1.9565280790988883e-05, "loss": 0.6458, "r_loss": 0.0, "step": 200, "steps": 200 }, { "epoch": 0.04, "learning_rate": 1.9799385482540556e-05, "loss": 0.6391, "r_loss": 0.0, "step": 300, "steps": 300 }, { "epoch": 0.053, "learning_rate": 1.9524571075061864e-05, "loss": 0.635, "r_loss": 0.0, "step": 400, "steps": 400 }, { "epoch": 0.067, "learning_rate": 1.925250481165796e-05, "loss": 0.6268, "r_loss": 0.0, "step": 500, "steps": 500 }, { "epoch": 0.08, "learning_rate": 1.897769040417927e-05, "loss": 0.6208, "r_loss": 0.0, "step": 600, "steps": 600 }, { "epoch": 0.093, "learning_rate": 1.8702875996700577e-05, "loss": 0.6161, "r_loss": 0.0, "step": 700, "steps": 700 }, { "epoch": 0.107, "learning_rate": 1.842806158922189e-05, "loss": 0.6145, "r_loss": 0.0, "step": 800, "steps": 800 }, { "epoch": 0.12, "learning_rate": 1.8153247181743197e-05, "loss": 0.6041, "r_loss": 0.0, "step": 900, "steps": 900 }, { "epoch": 0.133, "learning_rate": 1.7878432774264505e-05, "loss": 0.6049, "r_loss": 0.0, "step": 1000, "steps": 1000 }, { "epoch": 0.147, "learning_rate": 1.7603618366785813e-05, "loss": 0.6019, "r_loss": 0.0, "step": 1100, "steps": 1100 }, { "epoch": 0.16, "learning_rate": 1.732880395930712e-05, "loss": 0.5995, "r_loss": 0.0, "step": 1200, "steps": 1200 }, { "epoch": 0.173, "learning_rate": 1.7056737695903218e-05, "loss": 0.5949, "r_loss": 0.0, "step": 1300, "steps": 1300 }, { "epoch": 0.187, "learning_rate": 1.6781923288424526e-05, "loss": 0.5914, "r_loss": 0.0, "step": 1400, "steps": 1400 }, { "epoch": 0.2, "learning_rate": 1.6507108880945834e-05, "loss": 0.5884, "r_loss": 0.0, "step": 1500, "steps": 1500 }, { "epoch": 0.213, "learning_rate": 1.6232294473467146e-05, "loss": 0.5839, "r_loss": 0.0, "step": 1600, "steps": 1600 }, { "epoch": 0.227, "learning_rate": 1.5957480065988454e-05, "loss": 0.5816, "r_loss": 0.0, "step": 1700, "steps": 1700 }, { "epoch": 0.24, "learning_rate": 1.5682665658509762e-05, "loss": 0.5807, "r_loss": 0.0, "step": 1800, "steps": 1800 }, { "epoch": 0.253, "learning_rate": 1.540785125103107e-05, "loss": 0.5821, "r_loss": 0.0, "step": 1900, "steps": 1900 }, { "epoch": 0.267, "learning_rate": 1.5133036843552378e-05, "loss": 0.5767, "r_loss": 0.0, "step": 2000, "steps": 2000 }, { "epoch": 0.28, "learning_rate": 1.4858222436073688e-05, "loss": 0.5704, "r_loss": 0.0, "step": 2100, "steps": 2100 }, { "epoch": 0.293, "learning_rate": 1.4583408028594996e-05, "loss": 0.5702, "r_loss": 0.0, "step": 2200, "steps": 2200 }, { "epoch": 0.307, "learning_rate": 1.4308593621116305e-05, "loss": 0.573, "r_loss": 0.0, "step": 2300, "steps": 2300 }, { "epoch": 0.32, "learning_rate": 1.4033779213637613e-05, "loss": 0.5683, "r_loss": 0.0, "step": 2400, "steps": 2400 }, { "epoch": 0.333, "learning_rate": 1.3758964806158922e-05, "loss": 0.5654, "r_loss": 0.0, "step": 2500, "steps": 2500 }, { "epoch": 0.347, "learning_rate": 1.3484150398680231e-05, "loss": 0.5613, "r_loss": 0.0, "step": 2600, "steps": 2600 }, { "epoch": 0.36, "learning_rate": 1.320933599120154e-05, "loss": 0.5604, "r_loss": 0.0, "step": 2700, "steps": 2700 }, { "epoch": 0.373, "learning_rate": 1.2934521583722849e-05, "loss": 0.5551, "r_loss": 0.0, "step": 2800, "steps": 2800 }, { "epoch": 0.387, "learning_rate": 1.2659707176244157e-05, "loss": 0.5551, "r_loss": 0.0, "step": 2900, "steps": 2900 }, { "epoch": 0.4, "learning_rate": 1.2384892768765467e-05, "loss": 0.5574, "r_loss": 0.0, "step": 3000, "steps": 3000 }, { "epoch": 0.413, "learning_rate": 1.2110078361286775e-05, "loss": 0.5539, "r_loss": 0.0, "step": 3100, "steps": 3100 }, { "epoch": 0.427, "learning_rate": 1.1835263953808083e-05, "loss": 0.5525, "r_loss": 0.0, "step": 3200, "steps": 3200 }, { "epoch": 0.44, "learning_rate": 1.1560449546329393e-05, "loss": 0.5503, "r_loss": 0.0, "step": 3300, "steps": 3300 }, { "epoch": 0.453, "learning_rate": 1.12856351388507e-05, "loss": 0.5506, "r_loss": 0.0, "step": 3400, "steps": 3400 }, { "epoch": 0.467, "learning_rate": 1.101082073137201e-05, "loss": 0.5422, "r_loss": 0.0, "step": 3500, "steps": 3500 }, { "epoch": 0.48, "learning_rate": 1.0736006323893319e-05, "loss": 0.5443, "r_loss": 0.0, "step": 3600, "steps": 3600 }, { "epoch": 0.493, "learning_rate": 1.0461191916414627e-05, "loss": 0.5422, "r_loss": 0.0, "step": 3700, "steps": 3700 }, { "epoch": 0.507, "learning_rate": 1.0189125653010724e-05, "loss": 0.5378, "r_loss": 0.0, "step": 3800, "steps": 3800 }, { "epoch": 0.52, "learning_rate": 9.914311245532032e-06, "loss": 0.5401, "r_loss": 0.0, "step": 3900, "steps": 3900 }, { "epoch": 0.533, "learning_rate": 9.63949683805334e-06, "loss": 0.538, "r_loss": 0.0, "step": 4000, "steps": 4000 }, { "epoch": 0.547, "learning_rate": 9.36468243057465e-06, "loss": 0.5384, "r_loss": 0.0, "step": 4100, "steps": 4100 }, { "epoch": 0.56, "learning_rate": 9.08986802309596e-06, "loss": 0.5306, "r_loss": 0.0, "step": 4200, "steps": 4200 }, { "epoch": 0.573, "learning_rate": 8.815053615617266e-06, "loss": 0.5317, "r_loss": 0.0, "step": 4300, "steps": 4300 }, { "epoch": 0.587, "learning_rate": 8.540239208138575e-06, "loss": 0.5305, "r_loss": 0.0, "step": 4400, "steps": 4400 }, { "epoch": 0.6, "learning_rate": 8.265424800659885e-06, "loss": 0.5313, "r_loss": 0.0, "step": 4500, "steps": 4500 }, { "epoch": 0.613, "learning_rate": 7.990610393181193e-06, "loss": 0.531, "r_loss": 0.0, "step": 4600, "steps": 4600 }, { "epoch": 0.627, "learning_rate": 7.715795985702503e-06, "loss": 0.5285, "r_loss": 0.0, "step": 4700, "steps": 4700 }, { "epoch": 0.64, "learning_rate": 7.440981578223811e-06, "loss": 0.5261, "r_loss": 0.0, "step": 4800, "steps": 4800 }, { "epoch": 0.653, "learning_rate": 7.166167170745119e-06, "loss": 0.5178, "r_loss": 0.0, "step": 4900, "steps": 4900 }, { "epoch": 0.667, "learning_rate": 6.891352763266428e-06, "loss": 0.5199, "r_loss": 0.0, "step": 5000, "steps": 5000 }, { "epoch": 0.68, "learning_rate": 6.616538355787737e-06, "loss": 0.5191, "r_loss": 0.0, "step": 5100, "steps": 5100 }, { "epoch": 0.693, "learning_rate": 6.341723948309046e-06, "loss": 0.5197, "r_loss": 0.0, "step": 5200, "steps": 5200 }, { "epoch": 0.707, "learning_rate": 6.066909540830355e-06, "loss": 0.5172, "r_loss": 0.0, "step": 5300, "steps": 5300 }, { "epoch": 0.72, "learning_rate": 5.792095133351663e-06, "loss": 0.5196, "r_loss": 0.0, "step": 5400, "steps": 5400 }, { "epoch": 0.733, "learning_rate": 5.517280725872972e-06, "loss": 0.5113, "r_loss": 0.0, "step": 5500, "steps": 5500 }, { "epoch": 0.747, "learning_rate": 5.245214462469068e-06, "loss": 0.5123, "r_loss": 0.0, "step": 5600, "steps": 5600 }, { "epoch": 0.76, "learning_rate": 4.973148199065163e-06, "loss": 0.513, "r_loss": 0.0, "step": 5700, "steps": 5700 }, { "epoch": 0.773, "learning_rate": 4.698333791586473e-06, "loss": 0.5123, "r_loss": 0.0, "step": 5800, "steps": 5800 }, { "epoch": 0.787, "learning_rate": 4.423519384107781e-06, "loss": 0.5103, "r_loss": 0.0, "step": 5900, "steps": 5900 }, { "epoch": 0.8, "learning_rate": 4.14870497662909e-06, "loss": 0.5086, "r_loss": 0.0, "step": 6000, "steps": 6000 }, { "epoch": 0.813, "learning_rate": 3.876638713225185e-06, "loss": 0.5042, "r_loss": 0.0, "step": 6100, "steps": 6100 }, { "epoch": 0.827, "learning_rate": 3.6018243057464943e-06, "loss": 0.5042, "r_loss": 0.0, "step": 6200, "steps": 6200 }, { "epoch": 0.84, "learning_rate": 3.3270098982678032e-06, "loss": 0.507, "r_loss": 0.0, "step": 6300, "steps": 6300 }, { "epoch": 0.853, "learning_rate": 3.0521954907891117e-06, "loss": 0.5014, "r_loss": 0.0, "step": 6400, "steps": 6400 }, { "epoch": 0.867, "learning_rate": 2.7773810833104206e-06, "loss": 0.5033, "r_loss": 0.0, "step": 6500, "steps": 6500 }, { "epoch": 0.88, "learning_rate": 2.5025666758317295e-06, "loss": 0.4996, "r_loss": 0.0, "step": 6600, "steps": 6600 }, { "epoch": 0.893, "learning_rate": 2.227752268353038e-06, "loss": 0.5003, "r_loss": 0.0, "step": 6700, "steps": 6700 }, { "epoch": 0.907, "learning_rate": 1.952937860874347e-06, "loss": 0.5008, "r_loss": 0.0, "step": 6800, "steps": 6800 }, { "epoch": 0.92, "learning_rate": 1.6781234533956558e-06, "loss": 0.5023, "r_loss": 0.0, "step": 6900, "steps": 6900 }, { "epoch": 0.933, "learning_rate": 1.4033090459169645e-06, "loss": 0.497, "r_loss": 0.0, "step": 7000, "steps": 7000 }, { "epoch": 0.947, "learning_rate": 1.1284946384382732e-06, "loss": 0.5002, "r_loss": 0.0, "step": 7100, "steps": 7100 }, { "epoch": 0.96, "learning_rate": 8.536802309595821e-07, "loss": 0.4972, "r_loss": 0.0, "step": 7200, "steps": 7200 }, { "epoch": 0.973, "learning_rate": 5.788658234808909e-07, "loss": 0.5017, "r_loss": 0.0, "step": 7300, "steps": 7300 }, { "epoch": 0.987, "learning_rate": 3.040514160021996e-07, "loss": 0.4991, "r_loss": 0.0, "step": 7400, "steps": 7400 }, { "epoch": 1.0, "step": 7499, "steps": 7499, "total_flos": 1222855706542080.0, "train_loss": 0.5490713825319938, "train_runtime": 153878.2498, "train_samples_per_second": 6.238, "train_steps_per_second": 0.049 } ], "max_steps": 7499, "num_train_epochs": 1, "total_flos": 1222855706542080.0, "trial_name": null, "trial_params": null }