{ "best_metric": 70.44465468306528, "best_model_checkpoint": "/content/drive/MyDrive/TCC/final/neuralmind_bert-base-portuguese-cased/checkpoint-31000", "epoch": 2.7790228597041686, "eval_steps": 500, "global_step": 31000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 4.9257433139100554e-05, "loss": 2.3635, "step": 500 }, { "epoch": 0.04, "eval_exact_match": 55.714285714285715, "eval_f1": 69.59838660839185, "eval_runtime": 93.6252, "eval_samples_per_second": 116.603, "eval_steps_per_second": 14.579, "step": 500 }, { "epoch": 0.09, "learning_rate": 4.85103839832661e-05, "loss": 1.564, "step": 1000 }, { "epoch": 0.09, "eval_exact_match": 59.6310312204352, "eval_f1": 73.43689270301098, "eval_runtime": 94.3533, "eval_samples_per_second": 115.703, "eval_steps_per_second": 14.467, "step": 1000 }, { "epoch": 0.13, "learning_rate": 4.7763334827431646e-05, "loss": 1.4656, "step": 1500 }, { "epoch": 0.13, "eval_exact_match": 61.21097445600757, "eval_f1": 74.72296532794967, "eval_runtime": 94.6793, "eval_samples_per_second": 115.305, "eval_steps_per_second": 14.417, "step": 1500 }, { "epoch": 0.18, "learning_rate": 4.7016285671597196e-05, "loss": 1.3779, "step": 2000 }, { "epoch": 0.18, "eval_exact_match": 62.838221381267736, "eval_f1": 76.12650138006829, "eval_runtime": 94.5066, "eval_samples_per_second": 115.516, "eval_steps_per_second": 14.443, "step": 2000 }, { "epoch": 0.22, "learning_rate": 4.626923651576274e-05, "loss": 1.3188, "step": 2500 }, { "epoch": 0.22, "eval_exact_match": 63.39640491958373, "eval_f1": 77.11550249930004, "eval_runtime": 93.9534, "eval_samples_per_second": 116.196, "eval_steps_per_second": 14.528, "step": 2500 }, { "epoch": 0.27, "learning_rate": 4.552218735992829e-05, "loss": 1.2921, "step": 3000 }, { "epoch": 0.27, "eval_exact_match": 63.91674550614948, "eval_f1": 77.48352297901853, "eval_runtime": 93.6349, "eval_samples_per_second": 116.591, "eval_steps_per_second": 14.578, "step": 3000 }, { "epoch": 0.31, "learning_rate": 4.477513820409383e-05, "loss": 1.2934, "step": 3500 }, { "epoch": 0.31, "eval_exact_match": 64.61684011352885, "eval_f1": 77.4199421582579, "eval_runtime": 92.0676, "eval_samples_per_second": 118.576, "eval_steps_per_second": 14.826, "step": 3500 }, { "epoch": 0.36, "learning_rate": 4.4029583146571045e-05, "loss": 1.2225, "step": 4000 }, { "epoch": 0.36, "eval_exact_match": 65.61967833491012, "eval_f1": 78.46841197787658, "eval_runtime": 93.9916, "eval_samples_per_second": 116.149, "eval_steps_per_second": 14.523, "step": 4000 }, { "epoch": 0.4, "learning_rate": 4.328253399073659e-05, "loss": 1.2239, "step": 4500 }, { "epoch": 0.4, "eval_exact_match": 66.07379375591296, "eval_f1": 78.43551372280929, "eval_runtime": 93.5624, "eval_samples_per_second": 116.682, "eval_steps_per_second": 14.589, "step": 4500 }, { "epoch": 0.45, "learning_rate": 4.253548483490214e-05, "loss": 1.1797, "step": 5000 }, { "epoch": 0.45, "eval_exact_match": 65.88457899716178, "eval_f1": 78.82847243839159, "eval_runtime": 93.7538, "eval_samples_per_second": 116.443, "eval_steps_per_second": 14.559, "step": 5000 }, { "epoch": 0.49, "learning_rate": 4.178992977737936e-05, "loss": 1.1699, "step": 5500 }, { "epoch": 0.49, "eval_exact_match": 66.3197729422895, "eval_f1": 79.50291621832777, "eval_runtime": 94.2029, "eval_samples_per_second": 115.888, "eval_steps_per_second": 14.49, "step": 5500 }, { "epoch": 0.54, "learning_rate": 4.10428806215449e-05, "loss": 1.1824, "step": 6000 }, { "epoch": 0.54, "eval_exact_match": 66.97256385998108, "eval_f1": 79.54163598754296, "eval_runtime": 93.6716, "eval_samples_per_second": 116.545, "eval_steps_per_second": 14.572, "step": 6000 }, { "epoch": 0.58, "learning_rate": 4.0295831465710444e-05, "loss": 1.1543, "step": 6500 }, { "epoch": 0.58, "eval_exact_match": 67.00094607379376, "eval_f1": 79.70190311251304, "eval_runtime": 94.0564, "eval_samples_per_second": 116.069, "eval_steps_per_second": 14.513, "step": 6500 }, { "epoch": 0.63, "learning_rate": 3.954878230987599e-05, "loss": 1.1569, "step": 7000 }, { "epoch": 0.63, "eval_exact_match": 67.41721854304636, "eval_f1": 80.15789605227158, "eval_runtime": 94.421, "eval_samples_per_second": 115.62, "eval_steps_per_second": 14.457, "step": 7000 }, { "epoch": 0.67, "learning_rate": 3.880322725235321e-05, "loss": 1.121, "step": 7500 }, { "epoch": 0.67, "eval_exact_match": 68.04162724692526, "eval_f1": 80.5276353718758, "eval_runtime": 93.7353, "eval_samples_per_second": 116.466, "eval_steps_per_second": 14.562, "step": 7500 }, { "epoch": 0.72, "learning_rate": 3.805617809651875e-05, "loss": 1.0796, "step": 8000 }, { "epoch": 0.72, "eval_exact_match": 67.65373699148533, "eval_f1": 80.30398381876631, "eval_runtime": 93.674, "eval_samples_per_second": 116.543, "eval_steps_per_second": 14.572, "step": 8000 }, { "epoch": 0.76, "learning_rate": 3.73091289406843e-05, "loss": 1.1176, "step": 8500 }, { "epoch": 0.76, "eval_exact_match": 68.09839167455061, "eval_f1": 80.53785711864037, "eval_runtime": 94.6643, "eval_samples_per_second": 115.323, "eval_steps_per_second": 14.419, "step": 8500 }, { "epoch": 0.81, "learning_rate": 3.656207978484984e-05, "loss": 1.0626, "step": 9000 }, { "epoch": 0.81, "eval_exact_match": 68.61873226111636, "eval_f1": 80.81646879779186, "eval_runtime": 93.9801, "eval_samples_per_second": 116.163, "eval_steps_per_second": 14.524, "step": 9000 }, { "epoch": 0.85, "learning_rate": 3.581503062901539e-05, "loss": 1.1484, "step": 9500 }, { "epoch": 0.85, "eval_exact_match": 68.6092715231788, "eval_f1": 81.03614759103229, "eval_runtime": 93.7103, "eval_samples_per_second": 116.497, "eval_steps_per_second": 14.566, "step": 9500 }, { "epoch": 0.9, "learning_rate": 3.5069475571492606e-05, "loss": 1.0847, "step": 10000 }, { "epoch": 0.9, "eval_exact_match": 68.48628192999054, "eval_f1": 81.00822592040217, "eval_runtime": 94.5526, "eval_samples_per_second": 115.46, "eval_steps_per_second": 14.436, "step": 10000 }, { "epoch": 0.94, "learning_rate": 3.4322426415658156e-05, "loss": 1.069, "step": 10500 }, { "epoch": 0.94, "eval_exact_match": 68.22138126773888, "eval_f1": 80.91059554693693, "eval_runtime": 94.5852, "eval_samples_per_second": 115.42, "eval_steps_per_second": 14.431, "step": 10500 }, { "epoch": 0.99, "learning_rate": 3.35753772598237e-05, "loss": 1.0829, "step": 11000 }, { "epoch": 0.99, "eval_exact_match": 68.87417218543047, "eval_f1": 81.37580051217576, "eval_runtime": 94.0691, "eval_samples_per_second": 116.053, "eval_steps_per_second": 14.511, "step": 11000 }, { "epoch": 1.03, "learning_rate": 3.282832810398924e-05, "loss": 0.9091, "step": 11500 }, { "epoch": 1.03, "eval_exact_match": 69.31882686849575, "eval_f1": 81.69135446904038, "eval_runtime": 93.8852, "eval_samples_per_second": 116.28, "eval_steps_per_second": 14.539, "step": 11500 }, { "epoch": 1.08, "learning_rate": 3.208127894815479e-05, "loss": 0.8182, "step": 12000 }, { "epoch": 1.08, "eval_exact_match": 68.85525070955535, "eval_f1": 81.35681188242668, "eval_runtime": 94.5168, "eval_samples_per_second": 115.503, "eval_steps_per_second": 14.442, "step": 12000 }, { "epoch": 1.12, "learning_rate": 3.1335723890632005e-05, "loss": 0.8036, "step": 12500 }, { "epoch": 1.12, "eval_exact_match": 68.9120151371807, "eval_f1": 81.42398358953557, "eval_runtime": 94.298, "eval_samples_per_second": 115.771, "eval_steps_per_second": 14.475, "step": 12500 }, { "epoch": 1.17, "learning_rate": 3.0588674734797554e-05, "loss": 0.7934, "step": 13000 }, { "epoch": 1.17, "eval_exact_match": 69.22421948912014, "eval_f1": 81.60610451768409, "eval_runtime": 94.1794, "eval_samples_per_second": 115.917, "eval_steps_per_second": 14.494, "step": 13000 }, { "epoch": 1.21, "learning_rate": 2.9841625578963094e-05, "loss": 0.8257, "step": 13500 }, { "epoch": 1.21, "eval_exact_match": 69.24314096499526, "eval_f1": 81.31246415271767, "eval_runtime": 94.1457, "eval_samples_per_second": 115.959, "eval_steps_per_second": 14.499, "step": 13500 }, { "epoch": 1.26, "learning_rate": 2.9097564619751984e-05, "loss": 0.7647, "step": 14000 }, { "epoch": 1.26, "eval_exact_match": 69.54588457899716, "eval_f1": 81.54865605083789, "eval_runtime": 97.2493, "eval_samples_per_second": 112.258, "eval_steps_per_second": 14.036, "step": 14000 }, { "epoch": 1.3, "learning_rate": 2.8350515463917526e-05, "loss": 0.8159, "step": 14500 }, { "epoch": 1.3, "eval_exact_match": 69.75402081362347, "eval_f1": 81.80799852605104, "eval_runtime": 97.2885, "eval_samples_per_second": 112.213, "eval_steps_per_second": 14.03, "step": 14500 }, { "epoch": 1.34, "learning_rate": 2.760496040639474e-05, "loss": 0.7891, "step": 15000 }, { "epoch": 1.34, "eval_exact_match": 69.40397350993378, "eval_f1": 81.66765811080356, "eval_runtime": 97.7124, "eval_samples_per_second": 111.726, "eval_steps_per_second": 13.97, "step": 15000 }, { "epoch": 1.39, "learning_rate": 2.6857911250560287e-05, "loss": 0.7682, "step": 15500 }, { "epoch": 1.39, "eval_exact_match": 69.80132450331126, "eval_f1": 81.76733572870543, "eval_runtime": 97.5471, "eval_samples_per_second": 111.915, "eval_steps_per_second": 13.993, "step": 15500 }, { "epoch": 1.43, "learning_rate": 2.6110862094725836e-05, "loss": 0.8344, "step": 16000 }, { "epoch": 1.43, "eval_exact_match": 69.80132450331126, "eval_f1": 82.02027802692976, "eval_runtime": 97.8822, "eval_samples_per_second": 111.532, "eval_steps_per_second": 13.945, "step": 16000 }, { "epoch": 1.48, "learning_rate": 2.536381293889138e-05, "loss": 0.7962, "step": 16500 }, { "epoch": 1.48, "eval_exact_match": 69.5837275307474, "eval_f1": 81.83447343491943, "eval_runtime": 97.4639, "eval_samples_per_second": 112.011, "eval_steps_per_second": 14.005, "step": 16500 }, { "epoch": 1.52, "learning_rate": 2.4616763783056925e-05, "loss": 0.791, "step": 17000 }, { "epoch": 1.52, "eval_exact_match": 69.24314096499526, "eval_f1": 81.74385590401585, "eval_runtime": 97.6389, "eval_samples_per_second": 111.81, "eval_steps_per_second": 13.98, "step": 17000 }, { "epoch": 1.57, "learning_rate": 2.3871208725534143e-05, "loss": 0.7899, "step": 17500 }, { "epoch": 1.57, "eval_exact_match": 70.17029328287606, "eval_f1": 82.07855269370206, "eval_runtime": 97.9053, "eval_samples_per_second": 111.506, "eval_steps_per_second": 13.942, "step": 17500 }, { "epoch": 1.61, "learning_rate": 2.312415956969969e-05, "loss": 0.7818, "step": 18000 }, { "epoch": 1.61, "eval_exact_match": 70.18921475875118, "eval_f1": 82.17592999499101, "eval_runtime": 97.839, "eval_samples_per_second": 111.581, "eval_steps_per_second": 13.951, "step": 18000 }, { "epoch": 1.66, "learning_rate": 2.237711041386523e-05, "loss": 0.7788, "step": 18500 }, { "epoch": 1.66, "eval_exact_match": 70.1135288552507, "eval_f1": 81.98266726601958, "eval_runtime": 97.5383, "eval_samples_per_second": 111.925, "eval_steps_per_second": 13.995, "step": 18500 }, { "epoch": 1.7, "learning_rate": 2.1630061258030777e-05, "loss": 0.7978, "step": 19000 }, { "epoch": 1.7, "eval_exact_match": 70.27436140018922, "eval_f1": 82.13962330348271, "eval_runtime": 97.6123, "eval_samples_per_second": 111.84, "eval_steps_per_second": 13.984, "step": 19000 }, { "epoch": 1.75, "learning_rate": 2.0884506200507995e-05, "loss": 0.7564, "step": 19500 }, { "epoch": 1.75, "eval_exact_match": 70.2554399243141, "eval_f1": 82.27552433766321, "eval_runtime": 97.699, "eval_samples_per_second": 111.741, "eval_steps_per_second": 13.971, "step": 19500 }, { "epoch": 1.79, "learning_rate": 2.013895114298521e-05, "loss": 0.7745, "step": 20000 }, { "epoch": 1.79, "eval_exact_match": 69.51750236518448, "eval_f1": 81.77959198176083, "eval_runtime": 97.2216, "eval_samples_per_second": 112.29, "eval_steps_per_second": 14.04, "step": 20000 }, { "epoch": 1.84, "learning_rate": 1.9391901987150756e-05, "loss": 0.7765, "step": 20500 }, { "epoch": 1.84, "eval_exact_match": 70.01892147587512, "eval_f1": 82.14097214992692, "eval_runtime": 97.7071, "eval_samples_per_second": 111.732, "eval_steps_per_second": 13.97, "step": 20500 }, { "epoch": 1.88, "learning_rate": 1.8644852831316302e-05, "loss": 0.7733, "step": 21000 }, { "epoch": 1.88, "eval_exact_match": 70.08514664143803, "eval_f1": 82.06505358181253, "eval_runtime": 97.8163, "eval_samples_per_second": 111.607, "eval_steps_per_second": 13.955, "step": 21000 }, { "epoch": 1.93, "learning_rate": 1.7897803675481848e-05, "loss": 0.7246, "step": 21500 }, { "epoch": 1.93, "eval_exact_match": 70.24597918637653, "eval_f1": 82.25827890698926, "eval_runtime": 97.5446, "eval_samples_per_second": 111.918, "eval_steps_per_second": 13.994, "step": 21500 }, { "epoch": 1.97, "learning_rate": 1.7150754519647394e-05, "loss": 0.7547, "step": 22000 }, { "epoch": 1.97, "eval_exact_match": 70.34058656575213, "eval_f1": 82.56582310303766, "eval_runtime": 97.629, "eval_samples_per_second": 111.821, "eval_steps_per_second": 13.982, "step": 22000 }, { "epoch": 2.02, "learning_rate": 1.640370536381294e-05, "loss": 0.6578, "step": 22500 }, { "epoch": 2.02, "eval_exact_match": 69.9526963103122, "eval_f1": 82.2037062819399, "eval_runtime": 97.6847, "eval_samples_per_second": 111.757, "eval_steps_per_second": 13.974, "step": 22500 }, { "epoch": 2.06, "learning_rate": 1.5656656207978486e-05, "loss": 0.5617, "step": 23000 }, { "epoch": 2.06, "eval_exact_match": 69.81078524124882, "eval_f1": 82.22378250212931, "eval_runtime": 98.723, "eval_samples_per_second": 110.582, "eval_steps_per_second": 13.827, "step": 23000 }, { "epoch": 2.11, "learning_rate": 1.490960705214403e-05, "loss": 0.5364, "step": 23500 }, { "epoch": 2.11, "eval_exact_match": 69.27152317880795, "eval_f1": 81.65647111542785, "eval_runtime": 97.2755, "eval_samples_per_second": 112.228, "eval_steps_per_second": 14.032, "step": 23500 }, { "epoch": 2.15, "learning_rate": 1.4162557896309578e-05, "loss": 0.563, "step": 24000 }, { "epoch": 2.15, "eval_exact_match": 69.60264900662251, "eval_f1": 81.87189877766106, "eval_runtime": 96.991, "eval_samples_per_second": 112.557, "eval_steps_per_second": 14.073, "step": 24000 }, { "epoch": 2.2, "learning_rate": 1.3415508740475124e-05, "loss": 0.5428, "step": 24500 }, { "epoch": 2.2, "eval_exact_match": 69.99053926206244, "eval_f1": 82.09975966355653, "eval_runtime": 97.2843, "eval_samples_per_second": 112.218, "eval_steps_per_second": 14.031, "step": 24500 }, { "epoch": 2.24, "learning_rate": 1.2668459584640672e-05, "loss": 0.5422, "step": 25000 }, { "epoch": 2.24, "eval_exact_match": 69.96215704824976, "eval_f1": 82.10145580749625, "eval_runtime": 97.9452, "eval_samples_per_second": 111.46, "eval_steps_per_second": 13.936, "step": 25000 }, { "epoch": 2.29, "learning_rate": 1.1921410428806216e-05, "loss": 0.53, "step": 25500 }, { "epoch": 2.29, "eval_exact_match": 70.0473036896878, "eval_f1": 82.2514724984976, "eval_runtime": 97.3143, "eval_samples_per_second": 112.183, "eval_steps_per_second": 14.027, "step": 25500 }, { "epoch": 2.33, "learning_rate": 1.1174361272971762e-05, "loss": 0.5413, "step": 26000 }, { "epoch": 2.33, "eval_exact_match": 69.64049195837275, "eval_f1": 82.09628194591657, "eval_runtime": 97.5703, "eval_samples_per_second": 111.889, "eval_steps_per_second": 13.99, "step": 26000 }, { "epoch": 2.38, "learning_rate": 1.0427312117137308e-05, "loss": 0.5577, "step": 26500 }, { "epoch": 2.38, "eval_exact_match": 69.73509933774834, "eval_f1": 81.98747499732926, "eval_runtime": 97.6595, "eval_samples_per_second": 111.786, "eval_steps_per_second": 13.977, "step": 26500 }, { "epoch": 2.42, "learning_rate": 9.681757059614523e-06, "loss": 0.5425, "step": 27000 }, { "epoch": 2.42, "eval_exact_match": 70.22705771050141, "eval_f1": 82.25772829273936, "eval_runtime": 97.2026, "eval_samples_per_second": 112.312, "eval_steps_per_second": 14.043, "step": 27000 }, { "epoch": 2.47, "learning_rate": 8.934707903780069e-06, "loss": 0.5559, "step": 27500 }, { "epoch": 2.47, "eval_exact_match": 70.1608325449385, "eval_f1": 82.01041983836598, "eval_runtime": 97.2719, "eval_samples_per_second": 112.232, "eval_steps_per_second": 14.033, "step": 27500 }, { "epoch": 2.51, "learning_rate": 8.189152846257283e-06, "loss": 0.5303, "step": 28000 }, { "epoch": 2.51, "eval_exact_match": 70.03784295175024, "eval_f1": 82.07818470354358, "eval_runtime": 97.4923, "eval_samples_per_second": 111.978, "eval_steps_per_second": 14.001, "step": 28000 }, { "epoch": 2.55, "learning_rate": 7.44210369042283e-06, "loss": 0.5316, "step": 28500 }, { "epoch": 2.55, "eval_exact_match": 69.81078524124882, "eval_f1": 81.89062416049632, "eval_runtime": 97.1987, "eval_samples_per_second": 112.316, "eval_steps_per_second": 14.043, "step": 28500 }, { "epoch": 2.6, "learning_rate": 6.6950545345883754e-06, "loss": 0.5394, "step": 29000 }, { "epoch": 2.6, "eval_exact_match": 69.8864711447493, "eval_f1": 82.1053760243633, "eval_runtime": 97.175, "eval_samples_per_second": 112.344, "eval_steps_per_second": 14.047, "step": 29000 }, { "epoch": 2.64, "learning_rate": 5.948005378753922e-06, "loss": 0.5347, "step": 29500 }, { "epoch": 2.64, "eval_exact_match": 69.82970671712394, "eval_f1": 81.99062166024035, "eval_runtime": 97.2892, "eval_samples_per_second": 112.212, "eval_steps_per_second": 14.03, "step": 29500 }, { "epoch": 2.69, "learning_rate": 5.200956222919468e-06, "loss": 0.5495, "step": 30000 }, { "epoch": 2.69, "eval_exact_match": 70.10406811731315, "eval_f1": 82.12375117526787, "eval_runtime": 97.1805, "eval_samples_per_second": 112.337, "eval_steps_per_second": 14.046, "step": 30000 }, { "epoch": 2.73, "learning_rate": 4.453907067085014e-06, "loss": 0.5392, "step": 30500 }, { "epoch": 2.73, "eval_exact_match": 70.35950804162725, "eval_f1": 82.23211986521385, "eval_runtime": 97.2401, "eval_samples_per_second": 112.269, "eval_steps_per_second": 14.037, "step": 30500 }, { "epoch": 2.78, "learning_rate": 3.7068579112505607e-06, "loss": 0.5443, "step": 31000 }, { "epoch": 2.78, "eval_exact_match": 70.44465468306528, "eval_f1": 82.2552836496006, "eval_runtime": 97.3462, "eval_samples_per_second": 112.146, "eval_steps_per_second": 14.022, "step": 31000 } ], "logging_steps": 500, "max_steps": 33465, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 4.86008048077609e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }