{ "best_metric": 69.66985602287733, "best_model_checkpoint": "/data/jcanete/all_results/tar/distillbeto/epochs_4_bs_16_lr_5e-5/checkpoint-16800", "epoch": 4.0, "global_step": 21932, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "eval_exact_match": 26.310312204351938, "eval_f1": 40.606556876208344, "step": 300 }, { "epoch": 0.09, "learning_rate": 4.886695239832209e-05, "loss": 3.3838, "step": 500 }, { "epoch": 0.11, "eval_exact_match": 35.042573320719015, "eval_f1": 51.1874637199927, "step": 600 }, { "epoch": 0.16, "eval_exact_match": 37.59697256385998, "eval_f1": 55.70379412517118, "step": 900 }, { "epoch": 0.18, "learning_rate": 4.772706547510487e-05, "loss": 2.6487, "step": 1000 }, { "epoch": 0.22, "eval_exact_match": 40.30274361400189, "eval_f1": 57.845372827145646, "step": 1200 }, { "epoch": 0.27, "learning_rate": 4.658717855188766e-05, "loss": 2.4073, "step": 1500 }, { "epoch": 0.27, "eval_exact_match": 41.5042573320719, "eval_f1": 59.344242314747255, "step": 1500 }, { "epoch": 0.33, "eval_exact_match": 43.528855250709555, "eval_f1": 60.57412156026134, "step": 1800 }, { "epoch": 0.36, "learning_rate": 4.544729162867044e-05, "loss": 2.3006, "step": 2000 }, { "epoch": 0.38, "eval_exact_match": 43.85998107852412, "eval_f1": 60.74218457977161, "step": 2100 }, { "epoch": 0.44, "eval_exact_match": 45.32639545884579, "eval_f1": 62.60100745933504, "step": 2400 }, { "epoch": 0.46, "learning_rate": 4.4309684479299654e-05, "loss": 2.2202, "step": 2500 }, { "epoch": 0.49, "eval_exact_match": 45.87511825922422, "eval_f1": 62.66596281773377, "step": 2700 }, { "epoch": 0.55, "learning_rate": 4.3169797556082435e-05, "loss": 2.154, "step": 3000 }, { "epoch": 0.55, "eval_exact_match": 45.53453169347209, "eval_f1": 62.82975014751129, "step": 3000 }, { "epoch": 0.6, "eval_exact_match": 46.395458845789975, "eval_f1": 63.47874761462123, "step": 3300 }, { "epoch": 0.64, "learning_rate": 4.202991063286522e-05, "loss": 2.1242, "step": 3500 }, { "epoch": 0.66, "eval_exact_match": 47.44560075685904, "eval_f1": 64.43571994761247, "step": 3600 }, { "epoch": 0.71, "eval_exact_match": 47.62535477767266, "eval_f1": 65.01568330950501, "step": 3900 }, { "epoch": 0.73, "learning_rate": 4.0890023709648004e-05, "loss": 2.0919, "step": 4000 }, { "epoch": 0.77, "eval_exact_match": 48.58088930936613, "eval_f1": 65.844186710796, "step": 4200 }, { "epoch": 0.82, "learning_rate": 3.9750136786430785e-05, "loss": 2.0242, "step": 4500 }, { "epoch": 0.82, "eval_exact_match": 48.618732261116364, "eval_f1": 65.93288102675687, "step": 4500 }, { "epoch": 0.88, "eval_exact_match": 47.62535477767266, "eval_f1": 65.07422958403791, "step": 4800 }, { "epoch": 0.91, "learning_rate": 3.861252963706001e-05, "loss": 2.0193, "step": 5000 }, { "epoch": 0.93, "eval_exact_match": 49.09176915799432, "eval_f1": 66.2146996747605, "step": 5100 }, { "epoch": 0.98, "eval_exact_match": 49.26206244087039, "eval_f1": 67.13615087977189, "step": 5400 }, { "epoch": 1.0, "learning_rate": 3.7474922487689224e-05, "loss": 1.9651, "step": 5500 }, { "epoch": 1.04, "eval_exact_match": 49.92431409649953, "eval_f1": 66.56942877421614, "step": 5700 }, { "epoch": 1.09, "learning_rate": 3.633503556447201e-05, "loss": 1.6733, "step": 6000 }, { "epoch": 1.09, "eval_exact_match": 50.08514664143803, "eval_f1": 66.7344480216334, "step": 6000 }, { "epoch": 1.15, "eval_exact_match": 49.85808893093661, "eval_f1": 67.17831265861523, "step": 6300 }, { "epoch": 1.19, "learning_rate": 3.5195148641254786e-05, "loss": 1.7062, "step": 6500 }, { "epoch": 1.2, "eval_exact_match": 50.07568590350047, "eval_f1": 67.34676690188185, "step": 6600 }, { "epoch": 1.26, "eval_exact_match": 49.92431409649953, "eval_f1": 66.86831954211718, "step": 6900 }, { "epoch": 1.28, "learning_rate": 3.4055261718037574e-05, "loss": 1.6685, "step": 7000 }, { "epoch": 1.31, "eval_exact_match": 50.3689687795648, "eval_f1": 67.58117044489109, "step": 7200 }, { "epoch": 1.37, "learning_rate": 3.2915374794820355e-05, "loss": 1.683, "step": 7500 }, { "epoch": 1.37, "eval_exact_match": 51.116367076631974, "eval_f1": 67.65638675710437, "step": 7500 }, { "epoch": 1.42, "eval_exact_match": 50.879848628193, "eval_f1": 67.74322775380313, "step": 7800 }, { "epoch": 1.46, "learning_rate": 3.177776764544958e-05, "loss": 1.6271, "step": 8000 }, { "epoch": 1.48, "eval_exact_match": 51.14474929044466, "eval_f1": 67.41847561216078, "step": 8100 }, { "epoch": 1.53, "eval_exact_match": 50.90823084200568, "eval_f1": 67.83244584175503, "step": 8400 }, { "epoch": 1.55, "learning_rate": 3.063788072223235e-05, "loss": 1.6509, "step": 8500 }, { "epoch": 1.59, "eval_exact_match": 51.485335856196784, "eval_f1": 68.03877774930996, "step": 8700 }, { "epoch": 1.64, "learning_rate": 2.949799379901514e-05, "loss": 1.6591, "step": 9000 }, { "epoch": 1.64, "eval_exact_match": 51.390728476821195, "eval_f1": 68.12396413437372, "step": 9000 }, { "epoch": 1.7, "eval_exact_match": 51.91106906338695, "eval_f1": 68.28274234884077, "step": 9300 }, { "epoch": 1.73, "learning_rate": 2.8358106875797924e-05, "loss": 1.6597, "step": 9500 }, { "epoch": 1.75, "eval_exact_match": 52.147587511825925, "eval_f1": 68.44926769664524, "step": 9600 }, { "epoch": 1.81, "eval_exact_match": 51.920529801324506, "eval_f1": 68.25478495844789, "step": 9900 }, { "epoch": 1.82, "learning_rate": 2.7218219952580705e-05, "loss": 1.6526, "step": 10000 }, { "epoch": 1.86, "eval_exact_match": 51.88268684957427, "eval_f1": 68.59555473771334, "step": 10200 }, { "epoch": 1.92, "learning_rate": 2.607833302936349e-05, "loss": 1.6083, "step": 10500 }, { "epoch": 1.92, "eval_exact_match": 52.223273415326396, "eval_f1": 68.3894855383881, "step": 10500 }, { "epoch": 1.97, "eval_exact_match": 52.04351939451277, "eval_f1": 68.85931291757578, "step": 10800 }, { "epoch": 2.01, "learning_rate": 2.493844610614627e-05, "loss": 1.5584, "step": 11000 }, { "epoch": 2.02, "eval_exact_match": 51.41911069063387, "eval_f1": 68.45020620158613, "step": 11100 }, { "epoch": 2.08, "eval_exact_match": 51.42857142857143, "eval_f1": 68.42914280017077, "step": 11400 }, { "epoch": 2.1, "learning_rate": 2.3798559182929054e-05, "loss": 1.2913, "step": 11500 }, { "epoch": 2.13, "eval_exact_match": 51.788079470198674, "eval_f1": 68.62435982099952, "step": 11700 }, { "epoch": 2.19, "learning_rate": 2.265867225971184e-05, "loss": 1.2903, "step": 12000 }, { "epoch": 2.19, "eval_exact_match": 51.83538315988647, "eval_f1": 68.70784934153268, "step": 12000 }, { "epoch": 2.24, "eval_exact_match": 52.29895931882687, "eval_f1": 68.91609842633096, "step": 12300 }, { "epoch": 2.28, "learning_rate": 2.1521065110341055e-05, "loss": 1.3203, "step": 12500 }, { "epoch": 2.3, "eval_exact_match": 51.9678334910123, "eval_f1": 68.89606884437242, "step": 12600 }, { "epoch": 2.35, "eval_exact_match": 52.21381267738884, "eval_f1": 68.5166390817109, "step": 12900 }, { "epoch": 2.37, "learning_rate": 2.0383457960970274e-05, "loss": 1.2923, "step": 13000 }, { "epoch": 2.41, "eval_exact_match": 52.11920529801324, "eval_f1": 68.80154847964492, "step": 13200 }, { "epoch": 2.46, "learning_rate": 1.9243571037753055e-05, "loss": 1.3007, "step": 13500 }, { "epoch": 2.46, "eval_exact_match": 51.93945127719962, "eval_f1": 68.56070274331668, "step": 13500 }, { "epoch": 2.52, "eval_exact_match": 52.450331125827816, "eval_f1": 68.92160672281842, "step": 13800 }, { "epoch": 2.55, "learning_rate": 1.8103684114535836e-05, "loss": 1.3022, "step": 14000 }, { "epoch": 2.57, "eval_exact_match": 52.081362346263006, "eval_f1": 68.734639590144, "step": 14100 }, { "epoch": 2.63, "eval_exact_match": 52.56385998107852, "eval_f1": 69.14264904059371, "step": 14400 }, { "epoch": 2.64, "learning_rate": 1.6963797191318624e-05, "loss": 1.2735, "step": 14500 }, { "epoch": 2.68, "eval_exact_match": 52.65846736045412, "eval_f1": 69.27808981447744, "step": 14700 }, { "epoch": 2.74, "learning_rate": 1.5823910268101405e-05, "loss": 1.3057, "step": 15000 }, { "epoch": 2.74, "eval_exact_match": 52.37464522232734, "eval_f1": 69.28041873050952, "step": 15000 }, { "epoch": 2.79, "eval_exact_match": 52.639545884579, "eval_f1": 69.10864509041865, "step": 15300 }, { "epoch": 2.83, "learning_rate": 1.4684023344884188e-05, "loss": 1.2689, "step": 15500 }, { "epoch": 2.85, "eval_exact_match": 52.157048249763484, "eval_f1": 68.73617410296806, "step": 15600 }, { "epoch": 2.9, "eval_exact_match": 52.58278145695364, "eval_f1": 69.30018384216147, "step": 15900 }, { "epoch": 2.92, "learning_rate": 1.3544136421666972e-05, "loss": 1.2489, "step": 16000 }, { "epoch": 2.95, "eval_exact_match": 52.639545884579, "eval_f1": 69.01439480212419, "step": 16200 }, { "epoch": 3.01, "learning_rate": 1.2406529272296188e-05, "loss": 1.2788, "step": 16500 }, { "epoch": 3.01, "eval_exact_match": 52.61116367076632, "eval_f1": 69.30278936364587, "step": 16500 }, { "epoch": 3.06, "eval_exact_match": 53.074739829706715, "eval_f1": 69.66985602287733, "step": 16800 }, { "epoch": 3.1, "learning_rate": 1.1266642349078972e-05, "loss": 1.0115, "step": 17000 }, { "epoch": 3.12, "eval_exact_match": 52.12866603595081, "eval_f1": 68.93516419404564, "step": 17100 }, { "epoch": 3.17, "eval_exact_match": 52.544938505203405, "eval_f1": 68.96323841158566, "step": 17400 }, { "epoch": 3.19, "learning_rate": 1.0126755425861755e-05, "loss": 1.0276, "step": 17500 }, { "epoch": 3.23, "eval_exact_match": 52.49763481551561, "eval_f1": 69.07220845517458, "step": 17700 }, { "epoch": 3.28, "learning_rate": 8.986868502644538e-06, "loss": 1.0218, "step": 18000 }, { "epoch": 3.28, "eval_exact_match": 52.29895931882687, "eval_f1": 69.02811306482906, "step": 18000 }, { "epoch": 3.34, "eval_exact_match": 52.639545884579, "eval_f1": 69.2185250484871, "step": 18300 }, { "epoch": 3.37, "learning_rate": 7.846981579427322e-06, "loss": 1.0386, "step": 18500 }, { "epoch": 3.39, "eval_exact_match": 52.459791863765375, "eval_f1": 69.36180052659964, "step": 18600 }, { "epoch": 3.45, "eval_exact_match": 52.57332071901608, "eval_f1": 69.30319952546826, "step": 18900 }, { "epoch": 3.47, "learning_rate": 6.707094656210104e-06, "loss": 1.0086, "step": 19000 }, { "epoch": 3.5, "eval_exact_match": 52.47871333964049, "eval_f1": 69.30902776116545, "step": 19200 }, { "epoch": 3.56, "learning_rate": 5.5694875068393215e-06, "loss": 1.0143, "step": 19500 }, { "epoch": 3.56, "eval_exact_match": 52.28949858088931, "eval_f1": 69.21311662285441, "step": 19500 }, { "epoch": 3.61, "eval_exact_match": 52.61116367076632, "eval_f1": 69.32649279145429, "step": 19800 }, { "epoch": 3.65, "learning_rate": 4.429600583622105e-06, "loss": 1.0071, "step": 20000 }, { "epoch": 3.67, "eval_exact_match": 52.544938505203405, "eval_f1": 68.97807013082176, "step": 20100 }, { "epoch": 3.72, "eval_exact_match": 52.5922421948912, "eval_f1": 69.18859635059812, "step": 20400 }, { "epoch": 3.74, "learning_rate": 3.289713660404888e-06, "loss": 1.0236, "step": 20500 }, { "epoch": 3.78, "eval_exact_match": 52.55439924314096, "eval_f1": 69.08256750898333, "step": 20700 }, { "epoch": 3.83, "learning_rate": 2.149826737187671e-06, "loss": 1.0015, "step": 21000 }, { "epoch": 3.83, "eval_exact_match": 52.70577105014191, "eval_f1": 69.22107041576008, "step": 21000 }, { "epoch": 3.88, "eval_exact_match": 52.73415326395459, "eval_f1": 69.36938667664833, "step": 21300 }, { "epoch": 3.92, "learning_rate": 1.0099398139704543e-06, "loss": 1.0189, "step": 21500 }, { "epoch": 3.94, "eval_exact_match": 52.63008514664144, "eval_f1": 69.28051735265512, "step": 21600 }, { "epoch": 3.99, "eval_exact_match": 52.71523178807947, "eval_f1": 69.35772325822661, "step": 21900 }, { "epoch": 4.0, "step": 21932, "total_flos": 2.986644942216499e+16, "train_loss": 1.5664932693461966, "train_runtime": 16788.5182, "train_samples_per_second": 20.902, "train_steps_per_second": 1.306 } ], "max_steps": 21932, "num_train_epochs": 4, "total_flos": 2.986644942216499e+16, "trial_name": null, "trial_params": null }