|
{ |
|
"best_metric": 69.66985602287733, |
|
"best_model_checkpoint": "/data/jcanete/all_results/tar/distillbeto/epochs_4_bs_16_lr_5e-5/checkpoint-16800", |
|
"epoch": 4.0, |
|
"global_step": 21932, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"eval_exact_match": 26.310312204351938, |
|
"eval_f1": 40.606556876208344, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.886695239832209e-05, |
|
"loss": 3.3838, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_exact_match": 35.042573320719015, |
|
"eval_f1": 51.1874637199927, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_exact_match": 37.59697256385998, |
|
"eval_f1": 55.70379412517118, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.772706547510487e-05, |
|
"loss": 2.6487, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_exact_match": 40.30274361400189, |
|
"eval_f1": 57.845372827145646, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.658717855188766e-05, |
|
"loss": 2.4073, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_exact_match": 41.5042573320719, |
|
"eval_f1": 59.344242314747255, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_exact_match": 43.528855250709555, |
|
"eval_f1": 60.57412156026134, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.544729162867044e-05, |
|
"loss": 2.3006, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_exact_match": 43.85998107852412, |
|
"eval_f1": 60.74218457977161, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_exact_match": 45.32639545884579, |
|
"eval_f1": 62.60100745933504, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.4309684479299654e-05, |
|
"loss": 2.2202, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_exact_match": 45.87511825922422, |
|
"eval_f1": 62.66596281773377, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.3169797556082435e-05, |
|
"loss": 2.154, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_exact_match": 45.53453169347209, |
|
"eval_f1": 62.82975014751129, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_exact_match": 46.395458845789975, |
|
"eval_f1": 63.47874761462123, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.202991063286522e-05, |
|
"loss": 2.1242, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_exact_match": 47.44560075685904, |
|
"eval_f1": 64.43571994761247, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_exact_match": 47.62535477767266, |
|
"eval_f1": 65.01568330950501, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.0890023709648004e-05, |
|
"loss": 2.0919, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_exact_match": 48.58088930936613, |
|
"eval_f1": 65.844186710796, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.9750136786430785e-05, |
|
"loss": 2.0242, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_exact_match": 48.618732261116364, |
|
"eval_f1": 65.93288102675687, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_exact_match": 47.62535477767266, |
|
"eval_f1": 65.07422958403791, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.861252963706001e-05, |
|
"loss": 2.0193, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_exact_match": 49.09176915799432, |
|
"eval_f1": 66.2146996747605, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_exact_match": 49.26206244087039, |
|
"eval_f1": 67.13615087977189, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.7474922487689224e-05, |
|
"loss": 1.9651, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_exact_match": 49.92431409649953, |
|
"eval_f1": 66.56942877421614, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.633503556447201e-05, |
|
"loss": 1.6733, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_exact_match": 50.08514664143803, |
|
"eval_f1": 66.7344480216334, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_exact_match": 49.85808893093661, |
|
"eval_f1": 67.17831265861523, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.5195148641254786e-05, |
|
"loss": 1.7062, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_exact_match": 50.07568590350047, |
|
"eval_f1": 67.34676690188185, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_exact_match": 49.92431409649953, |
|
"eval_f1": 66.86831954211718, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.4055261718037574e-05, |
|
"loss": 1.6685, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_exact_match": 50.3689687795648, |
|
"eval_f1": 67.58117044489109, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.2915374794820355e-05, |
|
"loss": 1.683, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_exact_match": 51.116367076631974, |
|
"eval_f1": 67.65638675710437, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_exact_match": 50.879848628193, |
|
"eval_f1": 67.74322775380313, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 3.177776764544958e-05, |
|
"loss": 1.6271, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_exact_match": 51.14474929044466, |
|
"eval_f1": 67.41847561216078, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_exact_match": 50.90823084200568, |
|
"eval_f1": 67.83244584175503, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 3.063788072223235e-05, |
|
"loss": 1.6509, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_exact_match": 51.485335856196784, |
|
"eval_f1": 68.03877774930996, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.949799379901514e-05, |
|
"loss": 1.6591, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_exact_match": 51.390728476821195, |
|
"eval_f1": 68.12396413437372, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_exact_match": 51.91106906338695, |
|
"eval_f1": 68.28274234884077, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.8358106875797924e-05, |
|
"loss": 1.6597, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_exact_match": 52.147587511825925, |
|
"eval_f1": 68.44926769664524, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_exact_match": 51.920529801324506, |
|
"eval_f1": 68.25478495844789, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.7218219952580705e-05, |
|
"loss": 1.6526, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_exact_match": 51.88268684957427, |
|
"eval_f1": 68.59555473771334, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 2.607833302936349e-05, |
|
"loss": 1.6083, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_exact_match": 52.223273415326396, |
|
"eval_f1": 68.3894855383881, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_exact_match": 52.04351939451277, |
|
"eval_f1": 68.85931291757578, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 2.493844610614627e-05, |
|
"loss": 1.5584, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_exact_match": 51.41911069063387, |
|
"eval_f1": 68.45020620158613, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_exact_match": 51.42857142857143, |
|
"eval_f1": 68.42914280017077, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 2.3798559182929054e-05, |
|
"loss": 1.2913, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"eval_exact_match": 51.788079470198674, |
|
"eval_f1": 68.62435982099952, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 2.265867225971184e-05, |
|
"loss": 1.2903, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_exact_match": 51.83538315988647, |
|
"eval_f1": 68.70784934153268, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"eval_exact_match": 52.29895931882687, |
|
"eval_f1": 68.91609842633096, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 2.1521065110341055e-05, |
|
"loss": 1.3203, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"eval_exact_match": 51.9678334910123, |
|
"eval_f1": 68.89606884437242, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"eval_exact_match": 52.21381267738884, |
|
"eval_f1": 68.5166390817109, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 2.0383457960970274e-05, |
|
"loss": 1.2923, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"eval_exact_match": 52.11920529801324, |
|
"eval_f1": 68.80154847964492, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.9243571037753055e-05, |
|
"loss": 1.3007, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_exact_match": 51.93945127719962, |
|
"eval_f1": 68.56070274331668, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"eval_exact_match": 52.450331125827816, |
|
"eval_f1": 68.92160672281842, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 1.8103684114535836e-05, |
|
"loss": 1.3022, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_exact_match": 52.081362346263006, |
|
"eval_f1": 68.734639590144, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_exact_match": 52.56385998107852, |
|
"eval_f1": 69.14264904059371, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 1.6963797191318624e-05, |
|
"loss": 1.2735, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"eval_exact_match": 52.65846736045412, |
|
"eval_f1": 69.27808981447744, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 1.5823910268101405e-05, |
|
"loss": 1.3057, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"eval_exact_match": 52.37464522232734, |
|
"eval_f1": 69.28041873050952, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"eval_exact_match": 52.639545884579, |
|
"eval_f1": 69.10864509041865, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.4684023344884188e-05, |
|
"loss": 1.2689, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"eval_exact_match": 52.157048249763484, |
|
"eval_f1": 68.73617410296806, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"eval_exact_match": 52.58278145695364, |
|
"eval_f1": 69.30018384216147, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.3544136421666972e-05, |
|
"loss": 1.2489, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"eval_exact_match": 52.639545884579, |
|
"eval_f1": 69.01439480212419, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 1.2406529272296188e-05, |
|
"loss": 1.2788, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"eval_exact_match": 52.61116367076632, |
|
"eval_f1": 69.30278936364587, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"eval_exact_match": 53.074739829706715, |
|
"eval_f1": 69.66985602287733, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 1.1266642349078972e-05, |
|
"loss": 1.0115, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"eval_exact_match": 52.12866603595081, |
|
"eval_f1": 68.93516419404564, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"eval_exact_match": 52.544938505203405, |
|
"eval_f1": 68.96323841158566, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 1.0126755425861755e-05, |
|
"loss": 1.0276, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"eval_exact_match": 52.49763481551561, |
|
"eval_f1": 69.07220845517458, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 8.986868502644538e-06, |
|
"loss": 1.0218, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"eval_exact_match": 52.29895931882687, |
|
"eval_f1": 69.02811306482906, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"eval_exact_match": 52.639545884579, |
|
"eval_f1": 69.2185250484871, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 7.846981579427322e-06, |
|
"loss": 1.0386, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"eval_exact_match": 52.459791863765375, |
|
"eval_f1": 69.36180052659964, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"eval_exact_match": 52.57332071901608, |
|
"eval_f1": 69.30319952546826, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 6.707094656210104e-06, |
|
"loss": 1.0086, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"eval_exact_match": 52.47871333964049, |
|
"eval_f1": 69.30902776116545, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 5.5694875068393215e-06, |
|
"loss": 1.0143, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"eval_exact_match": 52.28949858088931, |
|
"eval_f1": 69.21311662285441, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"eval_exact_match": 52.61116367076632, |
|
"eval_f1": 69.32649279145429, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 4.429600583622105e-06, |
|
"loss": 1.0071, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"eval_exact_match": 52.544938505203405, |
|
"eval_f1": 68.97807013082176, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"eval_exact_match": 52.5922421948912, |
|
"eval_f1": 69.18859635059812, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 3.289713660404888e-06, |
|
"loss": 1.0236, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"eval_exact_match": 52.55439924314096, |
|
"eval_f1": 69.08256750898333, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 2.149826737187671e-06, |
|
"loss": 1.0015, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"eval_exact_match": 52.70577105014191, |
|
"eval_f1": 69.22107041576008, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"eval_exact_match": 52.73415326395459, |
|
"eval_f1": 69.36938667664833, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 1.0099398139704543e-06, |
|
"loss": 1.0189, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"eval_exact_match": 52.63008514664144, |
|
"eval_f1": 69.28051735265512, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"eval_exact_match": 52.71523178807947, |
|
"eval_f1": 69.35772325822661, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 21932, |
|
"total_flos": 2.986644942216499e+16, |
|
"train_loss": 1.5664932693461966, |
|
"train_runtime": 16788.5182, |
|
"train_samples_per_second": 20.902, |
|
"train_steps_per_second": 1.306 |
|
} |
|
], |
|
"max_steps": 21932, |
|
"num_train_epochs": 4, |
|
"total_flos": 2.986644942216499e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|