|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.999966663333, |
|
"global_step": 7499, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.013, |
|
"learning_rate": 1.7006980333014778e-05, |
|
"loss": 0.6595, |
|
"r_loss": 0.0, |
|
"step": 100, |
|
"steps": 100 |
|
}, |
|
{ |
|
"epoch": 0.027, |
|
"learning_rate": 1.9565280790988883e-05, |
|
"loss": 0.6458, |
|
"r_loss": 0.0, |
|
"step": 200, |
|
"steps": 200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9799385482540556e-05, |
|
"loss": 0.6391, |
|
"r_loss": 0.0, |
|
"step": 300, |
|
"steps": 300 |
|
}, |
|
{ |
|
"epoch": 0.053, |
|
"learning_rate": 1.9524571075061864e-05, |
|
"loss": 0.635, |
|
"r_loss": 0.0, |
|
"step": 400, |
|
"steps": 400 |
|
}, |
|
{ |
|
"epoch": 0.067, |
|
"learning_rate": 1.925250481165796e-05, |
|
"loss": 0.6268, |
|
"r_loss": 0.0, |
|
"step": 500, |
|
"steps": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.897769040417927e-05, |
|
"loss": 0.6208, |
|
"r_loss": 0.0, |
|
"step": 600, |
|
"steps": 600 |
|
}, |
|
{ |
|
"epoch": 0.093, |
|
"learning_rate": 1.8702875996700577e-05, |
|
"loss": 0.6161, |
|
"r_loss": 0.0, |
|
"step": 700, |
|
"steps": 700 |
|
}, |
|
{ |
|
"epoch": 0.107, |
|
"learning_rate": 1.842806158922189e-05, |
|
"loss": 0.6145, |
|
"r_loss": 0.0, |
|
"step": 800, |
|
"steps": 800 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.8153247181743197e-05, |
|
"loss": 0.6041, |
|
"r_loss": 0.0, |
|
"step": 900, |
|
"steps": 900 |
|
}, |
|
{ |
|
"epoch": 0.133, |
|
"learning_rate": 1.7878432774264505e-05, |
|
"loss": 0.6049, |
|
"r_loss": 0.0, |
|
"step": 1000, |
|
"steps": 1000 |
|
}, |
|
{ |
|
"epoch": 0.147, |
|
"learning_rate": 1.7603618366785813e-05, |
|
"loss": 0.6019, |
|
"r_loss": 0.0, |
|
"step": 1100, |
|
"steps": 1100 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.732880395930712e-05, |
|
"loss": 0.5995, |
|
"r_loss": 0.0, |
|
"step": 1200, |
|
"steps": 1200 |
|
}, |
|
{ |
|
"epoch": 0.173, |
|
"learning_rate": 1.7056737695903218e-05, |
|
"loss": 0.5949, |
|
"r_loss": 0.0, |
|
"step": 1300, |
|
"steps": 1300 |
|
}, |
|
{ |
|
"epoch": 0.187, |
|
"learning_rate": 1.6781923288424526e-05, |
|
"loss": 0.5914, |
|
"r_loss": 0.0, |
|
"step": 1400, |
|
"steps": 1400 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.6507108880945834e-05, |
|
"loss": 0.5884, |
|
"r_loss": 0.0, |
|
"step": 1500, |
|
"steps": 1500 |
|
}, |
|
{ |
|
"epoch": 0.213, |
|
"learning_rate": 1.6232294473467146e-05, |
|
"loss": 0.5839, |
|
"r_loss": 0.0, |
|
"step": 1600, |
|
"steps": 1600 |
|
}, |
|
{ |
|
"epoch": 0.227, |
|
"learning_rate": 1.5957480065988454e-05, |
|
"loss": 0.5816, |
|
"r_loss": 0.0, |
|
"step": 1700, |
|
"steps": 1700 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.5682665658509762e-05, |
|
"loss": 0.5807, |
|
"r_loss": 0.0, |
|
"step": 1800, |
|
"steps": 1800 |
|
}, |
|
{ |
|
"epoch": 0.253, |
|
"learning_rate": 1.540785125103107e-05, |
|
"loss": 0.5821, |
|
"r_loss": 0.0, |
|
"step": 1900, |
|
"steps": 1900 |
|
}, |
|
{ |
|
"epoch": 0.267, |
|
"learning_rate": 1.5133036843552378e-05, |
|
"loss": 0.5767, |
|
"r_loss": 0.0, |
|
"step": 2000, |
|
"steps": 2000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.4858222436073688e-05, |
|
"loss": 0.5704, |
|
"r_loss": 0.0, |
|
"step": 2100, |
|
"steps": 2100 |
|
}, |
|
{ |
|
"epoch": 0.293, |
|
"learning_rate": 1.4583408028594996e-05, |
|
"loss": 0.5702, |
|
"r_loss": 0.0, |
|
"step": 2200, |
|
"steps": 2200 |
|
}, |
|
{ |
|
"epoch": 0.307, |
|
"learning_rate": 1.4308593621116305e-05, |
|
"loss": 0.573, |
|
"r_loss": 0.0, |
|
"step": 2300, |
|
"steps": 2300 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.4033779213637613e-05, |
|
"loss": 0.5683, |
|
"r_loss": 0.0, |
|
"step": 2400, |
|
"steps": 2400 |
|
}, |
|
{ |
|
"epoch": 0.333, |
|
"learning_rate": 1.3758964806158922e-05, |
|
"loss": 0.5654, |
|
"r_loss": 0.0, |
|
"step": 2500, |
|
"steps": 2500 |
|
}, |
|
{ |
|
"epoch": 0.347, |
|
"learning_rate": 1.3484150398680231e-05, |
|
"loss": 0.5613, |
|
"r_loss": 0.0, |
|
"step": 2600, |
|
"steps": 2600 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.320933599120154e-05, |
|
"loss": 0.5604, |
|
"r_loss": 0.0, |
|
"step": 2700, |
|
"steps": 2700 |
|
}, |
|
{ |
|
"epoch": 0.373, |
|
"learning_rate": 1.2934521583722849e-05, |
|
"loss": 0.5551, |
|
"r_loss": 0.0, |
|
"step": 2800, |
|
"steps": 2800 |
|
}, |
|
{ |
|
"epoch": 0.387, |
|
"learning_rate": 1.2659707176244157e-05, |
|
"loss": 0.5551, |
|
"r_loss": 0.0, |
|
"step": 2900, |
|
"steps": 2900 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.2384892768765467e-05, |
|
"loss": 0.5574, |
|
"r_loss": 0.0, |
|
"step": 3000, |
|
"steps": 3000 |
|
}, |
|
{ |
|
"epoch": 0.413, |
|
"learning_rate": 1.2110078361286775e-05, |
|
"loss": 0.5539, |
|
"r_loss": 0.0, |
|
"step": 3100, |
|
"steps": 3100 |
|
}, |
|
{ |
|
"epoch": 0.427, |
|
"learning_rate": 1.1835263953808083e-05, |
|
"loss": 0.5525, |
|
"r_loss": 0.0, |
|
"step": 3200, |
|
"steps": 3200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.1560449546329393e-05, |
|
"loss": 0.5503, |
|
"r_loss": 0.0, |
|
"step": 3300, |
|
"steps": 3300 |
|
}, |
|
{ |
|
"epoch": 0.453, |
|
"learning_rate": 1.12856351388507e-05, |
|
"loss": 0.5506, |
|
"r_loss": 0.0, |
|
"step": 3400, |
|
"steps": 3400 |
|
}, |
|
{ |
|
"epoch": 0.467, |
|
"learning_rate": 1.101082073137201e-05, |
|
"loss": 0.5422, |
|
"r_loss": 0.0, |
|
"step": 3500, |
|
"steps": 3500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.0736006323893319e-05, |
|
"loss": 0.5443, |
|
"r_loss": 0.0, |
|
"step": 3600, |
|
"steps": 3600 |
|
}, |
|
{ |
|
"epoch": 0.493, |
|
"learning_rate": 1.0461191916414627e-05, |
|
"loss": 0.5422, |
|
"r_loss": 0.0, |
|
"step": 3700, |
|
"steps": 3700 |
|
}, |
|
{ |
|
"epoch": 0.507, |
|
"learning_rate": 1.0189125653010724e-05, |
|
"loss": 0.5378, |
|
"r_loss": 0.0, |
|
"step": 3800, |
|
"steps": 3800 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.914311245532032e-06, |
|
"loss": 0.5401, |
|
"r_loss": 0.0, |
|
"step": 3900, |
|
"steps": 3900 |
|
}, |
|
{ |
|
"epoch": 0.533, |
|
"learning_rate": 9.63949683805334e-06, |
|
"loss": 0.538, |
|
"r_loss": 0.0, |
|
"step": 4000, |
|
"steps": 4000 |
|
}, |
|
{ |
|
"epoch": 0.547, |
|
"learning_rate": 9.36468243057465e-06, |
|
"loss": 0.5384, |
|
"r_loss": 0.0, |
|
"step": 4100, |
|
"steps": 4100 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.08986802309596e-06, |
|
"loss": 0.5306, |
|
"r_loss": 0.0, |
|
"step": 4200, |
|
"steps": 4200 |
|
}, |
|
{ |
|
"epoch": 0.573, |
|
"learning_rate": 8.815053615617266e-06, |
|
"loss": 0.5317, |
|
"r_loss": 0.0, |
|
"step": 4300, |
|
"steps": 4300 |
|
}, |
|
{ |
|
"epoch": 0.587, |
|
"learning_rate": 8.540239208138575e-06, |
|
"loss": 0.5305, |
|
"r_loss": 0.0, |
|
"step": 4400, |
|
"steps": 4400 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.265424800659885e-06, |
|
"loss": 0.5313, |
|
"r_loss": 0.0, |
|
"step": 4500, |
|
"steps": 4500 |
|
}, |
|
{ |
|
"epoch": 0.613, |
|
"learning_rate": 7.990610393181193e-06, |
|
"loss": 0.531, |
|
"r_loss": 0.0, |
|
"step": 4600, |
|
"steps": 4600 |
|
}, |
|
{ |
|
"epoch": 0.627, |
|
"learning_rate": 7.715795985702503e-06, |
|
"loss": 0.5285, |
|
"r_loss": 0.0, |
|
"step": 4700, |
|
"steps": 4700 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.440981578223811e-06, |
|
"loss": 0.5261, |
|
"r_loss": 0.0, |
|
"step": 4800, |
|
"steps": 4800 |
|
}, |
|
{ |
|
"epoch": 0.653, |
|
"learning_rate": 7.166167170745119e-06, |
|
"loss": 0.5178, |
|
"r_loss": 0.0, |
|
"step": 4900, |
|
"steps": 4900 |
|
}, |
|
{ |
|
"epoch": 0.667, |
|
"learning_rate": 6.891352763266428e-06, |
|
"loss": 0.5199, |
|
"r_loss": 0.0, |
|
"step": 5000, |
|
"steps": 5000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 6.616538355787737e-06, |
|
"loss": 0.5191, |
|
"r_loss": 0.0, |
|
"step": 5100, |
|
"steps": 5100 |
|
}, |
|
{ |
|
"epoch": 0.693, |
|
"learning_rate": 6.341723948309046e-06, |
|
"loss": 0.5197, |
|
"r_loss": 0.0, |
|
"step": 5200, |
|
"steps": 5200 |
|
}, |
|
{ |
|
"epoch": 0.707, |
|
"learning_rate": 6.066909540830355e-06, |
|
"loss": 0.5172, |
|
"r_loss": 0.0, |
|
"step": 5300, |
|
"steps": 5300 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5.792095133351663e-06, |
|
"loss": 0.5196, |
|
"r_loss": 0.0, |
|
"step": 5400, |
|
"steps": 5400 |
|
}, |
|
{ |
|
"epoch": 0.733, |
|
"learning_rate": 5.517280725872972e-06, |
|
"loss": 0.5113, |
|
"r_loss": 0.0, |
|
"step": 5500, |
|
"steps": 5500 |
|
}, |
|
{ |
|
"epoch": 0.747, |
|
"learning_rate": 5.245214462469068e-06, |
|
"loss": 0.5123, |
|
"r_loss": 0.0, |
|
"step": 5600, |
|
"steps": 5600 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.973148199065163e-06, |
|
"loss": 0.513, |
|
"r_loss": 0.0, |
|
"step": 5700, |
|
"steps": 5700 |
|
}, |
|
{ |
|
"epoch": 0.773, |
|
"learning_rate": 4.698333791586473e-06, |
|
"loss": 0.5123, |
|
"r_loss": 0.0, |
|
"step": 5800, |
|
"steps": 5800 |
|
}, |
|
{ |
|
"epoch": 0.787, |
|
"learning_rate": 4.423519384107781e-06, |
|
"loss": 0.5103, |
|
"r_loss": 0.0, |
|
"step": 5900, |
|
"steps": 5900 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.14870497662909e-06, |
|
"loss": 0.5086, |
|
"r_loss": 0.0, |
|
"step": 6000, |
|
"steps": 6000 |
|
}, |
|
{ |
|
"epoch": 0.813, |
|
"learning_rate": 3.876638713225185e-06, |
|
"loss": 0.5042, |
|
"r_loss": 0.0, |
|
"step": 6100, |
|
"steps": 6100 |
|
}, |
|
{ |
|
"epoch": 0.827, |
|
"learning_rate": 3.6018243057464943e-06, |
|
"loss": 0.5042, |
|
"r_loss": 0.0, |
|
"step": 6200, |
|
"steps": 6200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.3270098982678032e-06, |
|
"loss": 0.507, |
|
"r_loss": 0.0, |
|
"step": 6300, |
|
"steps": 6300 |
|
}, |
|
{ |
|
"epoch": 0.853, |
|
"learning_rate": 3.0521954907891117e-06, |
|
"loss": 0.5014, |
|
"r_loss": 0.0, |
|
"step": 6400, |
|
"steps": 6400 |
|
}, |
|
{ |
|
"epoch": 0.867, |
|
"learning_rate": 2.7773810833104206e-06, |
|
"loss": 0.5033, |
|
"r_loss": 0.0, |
|
"step": 6500, |
|
"steps": 6500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.5025666758317295e-06, |
|
"loss": 0.4996, |
|
"r_loss": 0.0, |
|
"step": 6600, |
|
"steps": 6600 |
|
}, |
|
{ |
|
"epoch": 0.893, |
|
"learning_rate": 2.227752268353038e-06, |
|
"loss": 0.5003, |
|
"r_loss": 0.0, |
|
"step": 6700, |
|
"steps": 6700 |
|
}, |
|
{ |
|
"epoch": 0.907, |
|
"learning_rate": 1.952937860874347e-06, |
|
"loss": 0.5008, |
|
"r_loss": 0.0, |
|
"step": 6800, |
|
"steps": 6800 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.6781234533956558e-06, |
|
"loss": 0.5023, |
|
"r_loss": 0.0, |
|
"step": 6900, |
|
"steps": 6900 |
|
}, |
|
{ |
|
"epoch": 0.933, |
|
"learning_rate": 1.4033090459169645e-06, |
|
"loss": 0.497, |
|
"r_loss": 0.0, |
|
"step": 7000, |
|
"steps": 7000 |
|
}, |
|
{ |
|
"epoch": 0.947, |
|
"learning_rate": 1.1284946384382732e-06, |
|
"loss": 0.5002, |
|
"r_loss": 0.0, |
|
"step": 7100, |
|
"steps": 7100 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 8.536802309595821e-07, |
|
"loss": 0.4972, |
|
"r_loss": 0.0, |
|
"step": 7200, |
|
"steps": 7200 |
|
}, |
|
{ |
|
"epoch": 0.973, |
|
"learning_rate": 5.788658234808909e-07, |
|
"loss": 0.5017, |
|
"r_loss": 0.0, |
|
"step": 7300, |
|
"steps": 7300 |
|
}, |
|
{ |
|
"epoch": 0.987, |
|
"learning_rate": 3.040514160021996e-07, |
|
"loss": 0.4991, |
|
"r_loss": 0.0, |
|
"step": 7400, |
|
"steps": 7400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 7499, |
|
"steps": 7499, |
|
"total_flos": 1222855706542080.0, |
|
"train_loss": 0.5490713825319938, |
|
"train_runtime": 153878.2498, |
|
"train_samples_per_second": 6.238, |
|
"train_steps_per_second": 0.049 |
|
} |
|
], |
|
"max_steps": 7499, |
|
"num_train_epochs": 1, |
|
"total_flos": 1222855706542080.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|