{ "best_metric": 0.49082762002944946, "best_model_checkpoint": "models/toxic-bert-mbert/checkpoint-380", "epoch": 0.41170097508125675, "eval_steps": 10, "global_step": 380, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 1.968e-05, "loss": 1.4487, "step": 10 }, { "epoch": 0.01, "eval_f1": 0.48475671310316976, "eval_loss": 1.1357542276382446, "eval_runtime": 20.5083, "eval_samples_per_second": 270.037, "eval_steps_per_second": 11.264, "step": 10 }, { "epoch": 0.02, "learning_rate": 1.9280000000000002e-05, "loss": 1.1897, "step": 20 }, { "epoch": 0.02, "eval_f1": 0.48475671310316976, "eval_loss": 1.0628113746643066, "eval_runtime": 20.8063, "eval_samples_per_second": 266.169, "eval_steps_per_second": 11.102, "step": 20 }, { "epoch": 0.03, "learning_rate": 1.8880000000000002e-05, "loss": 1.0351, "step": 30 }, { "epoch": 0.03, "eval_f1": 0.48475671310316976, "eval_loss": 1.0026295185089111, "eval_runtime": 21.4302, "eval_samples_per_second": 258.42, "eval_steps_per_second": 10.779, "step": 30 }, { "epoch": 0.04, "learning_rate": 1.8480000000000003e-05, "loss": 0.9274, "step": 40 }, { "epoch": 0.04, "eval_f1": 0.602134407148032, "eval_loss": 0.8651727437973022, "eval_runtime": 21.7252, "eval_samples_per_second": 254.911, "eval_steps_per_second": 10.633, "step": 40 }, { "epoch": 0.05, "learning_rate": 1.8080000000000003e-05, "loss": 0.8717, "step": 50 }, { "epoch": 0.05, "eval_f1": 0.5878712152620588, "eval_loss": 0.8609752058982849, "eval_runtime": 21.4208, "eval_samples_per_second": 258.534, "eval_steps_per_second": 10.784, "step": 50 }, { "epoch": 0.07, "learning_rate": 1.768e-05, "loss": 0.8187, "step": 60 }, { "epoch": 0.07, "eval_f1": 0.6764570688570977, "eval_loss": 0.7394715547561646, "eval_runtime": 21.8392, "eval_samples_per_second": 253.58, "eval_steps_per_second": 10.577, "step": 60 }, { "epoch": 0.08, "learning_rate": 1.732e-05, "loss": 0.8706, "step": 70 }, { "epoch": 0.08, "eval_f1": 0.6850705705176812, "eval_loss": 0.7013543844223022, "eval_runtime": 21.7795, "eval_samples_per_second": 254.276, "eval_steps_per_second": 10.606, "step": 70 }, { "epoch": 0.09, "learning_rate": 1.692e-05, "loss": 0.7463, "step": 80 }, { "epoch": 0.09, "eval_f1": 0.7283246330984933, "eval_loss": 0.665144681930542, "eval_runtime": 21.4734, "eval_samples_per_second": 257.901, "eval_steps_per_second": 10.758, "step": 80 }, { "epoch": 0.1, "learning_rate": 1.652e-05, "loss": 0.7677, "step": 90 }, { "epoch": 0.1, "eval_f1": 0.6548131465387051, "eval_loss": 0.718267560005188, "eval_runtime": 21.871, "eval_samples_per_second": 253.212, "eval_steps_per_second": 10.562, "step": 90 }, { "epoch": 0.11, "learning_rate": 1.612e-05, "loss": 0.6402, "step": 100 }, { "epoch": 0.11, "eval_f1": 0.7153883418869857, "eval_loss": 0.6134028434753418, "eval_runtime": 21.3946, "eval_samples_per_second": 258.85, "eval_steps_per_second": 10.797, "step": 100 }, { "epoch": 0.12, "learning_rate": 1.5720000000000002e-05, "loss": 0.6408, "step": 110 }, { "epoch": 0.12, "eval_f1": 0.7200057070545418, "eval_loss": 0.6316511034965515, "eval_runtime": 21.4218, "eval_samples_per_second": 258.521, "eval_steps_per_second": 10.783, "step": 110 }, { "epoch": 0.13, "learning_rate": 1.5320000000000002e-05, "loss": 0.6293, "step": 120 }, { "epoch": 0.13, "eval_f1": 0.7252309612107771, "eval_loss": 0.6177955865859985, "eval_runtime": 22.0787, "eval_samples_per_second": 250.831, "eval_steps_per_second": 10.463, "step": 120 }, { "epoch": 0.14, "learning_rate": 1.4920000000000001e-05, "loss": 0.5921, "step": 130 }, { "epoch": 0.14, "eval_f1": 0.718310903510847, "eval_loss": 0.6382821798324585, "eval_runtime": 21.4528, "eval_samples_per_second": 258.148, "eval_steps_per_second": 10.768, "step": 130 }, { "epoch": 0.15, "learning_rate": 1.4520000000000002e-05, "loss": 0.6829, "step": 140 }, { "epoch": 0.15, "eval_f1": 0.7104650126557948, "eval_loss": 0.6063101291656494, "eval_runtime": 21.6996, "eval_samples_per_second": 255.212, "eval_steps_per_second": 10.645, "step": 140 }, { "epoch": 0.16, "learning_rate": 1.412e-05, "loss": 0.6528, "step": 150 }, { "epoch": 0.16, "eval_f1": 0.7266460816131931, "eval_loss": 0.5720272064208984, "eval_runtime": 21.3758, "eval_samples_per_second": 259.078, "eval_steps_per_second": 10.807, "step": 150 }, { "epoch": 0.17, "learning_rate": 1.3720000000000002e-05, "loss": 0.5472, "step": 160 }, { "epoch": 0.17, "eval_f1": 0.7174077954335052, "eval_loss": 0.6016837358474731, "eval_runtime": 21.9003, "eval_samples_per_second": 252.873, "eval_steps_per_second": 10.548, "step": 160 }, { "epoch": 0.18, "learning_rate": 1.3320000000000001e-05, "loss": 0.6625, "step": 170 }, { "epoch": 0.18, "eval_f1": 0.7238055756700145, "eval_loss": 0.5748048424720764, "eval_runtime": 21.4241, "eval_samples_per_second": 258.494, "eval_steps_per_second": 10.782, "step": 170 }, { "epoch": 0.2, "learning_rate": 1.2920000000000002e-05, "loss": 0.551, "step": 180 }, { "epoch": 0.2, "eval_f1": 0.7216203790293768, "eval_loss": 0.5944197177886963, "eval_runtime": 21.7253, "eval_samples_per_second": 254.91, "eval_steps_per_second": 10.633, "step": 180 }, { "epoch": 0.21, "learning_rate": 1.252e-05, "loss": 0.5633, "step": 190 }, { "epoch": 0.21, "eval_f1": 0.7591239466384562, "eval_loss": 0.5621122717857361, "eval_runtime": 21.1932, "eval_samples_per_second": 261.31, "eval_steps_per_second": 10.9, "step": 190 }, { "epoch": 0.22, "learning_rate": 1.2120000000000001e-05, "loss": 0.5372, "step": 200 }, { "epoch": 0.22, "eval_f1": 0.75760072154562, "eval_loss": 0.5480858087539673, "eval_runtime": 21.2836, "eval_samples_per_second": 260.201, "eval_steps_per_second": 10.853, "step": 200 }, { "epoch": 0.23, "learning_rate": 1.172e-05, "loss": 0.6353, "step": 210 }, { "epoch": 0.23, "eval_f1": 0.7511938652247329, "eval_loss": 0.5706632137298584, "eval_runtime": 21.8422, "eval_samples_per_second": 253.545, "eval_steps_per_second": 10.576, "step": 210 }, { "epoch": 0.24, "learning_rate": 1.132e-05, "loss": 0.6332, "step": 220 }, { "epoch": 0.24, "eval_f1": 0.7614490903091703, "eval_loss": 0.5543012022972107, "eval_runtime": 21.5696, "eval_samples_per_second": 256.75, "eval_steps_per_second": 10.71, "step": 220 }, { "epoch": 0.25, "learning_rate": 1.0920000000000002e-05, "loss": 0.5311, "step": 230 }, { "epoch": 0.25, "eval_f1": 0.7432260016931946, "eval_loss": 0.5397886633872986, "eval_runtime": 21.3122, "eval_samples_per_second": 259.851, "eval_steps_per_second": 10.839, "step": 230 }, { "epoch": 0.26, "learning_rate": 1.0520000000000001e-05, "loss": 0.5791, "step": 240 }, { "epoch": 0.26, "eval_f1": 0.7434033096243912, "eval_loss": 0.5391152501106262, "eval_runtime": 21.3796, "eval_samples_per_second": 259.032, "eval_steps_per_second": 10.805, "step": 240 }, { "epoch": 0.27, "learning_rate": 1.0120000000000001e-05, "loss": 0.5831, "step": 250 }, { "epoch": 0.27, "eval_f1": 0.763099957359349, "eval_loss": 0.5244932174682617, "eval_runtime": 21.3101, "eval_samples_per_second": 259.877, "eval_steps_per_second": 10.84, "step": 250 }, { "epoch": 0.28, "learning_rate": 9.72e-06, "loss": 0.5453, "step": 260 }, { "epoch": 0.28, "eval_f1": 0.7585661401268046, "eval_loss": 0.5211306214332581, "eval_runtime": 21.2639, "eval_samples_per_second": 260.442, "eval_steps_per_second": 10.863, "step": 260 }, { "epoch": 0.29, "learning_rate": 9.32e-06, "loss": 0.5087, "step": 270 }, { "epoch": 0.29, "eval_f1": 0.7549183270549422, "eval_loss": 0.5206575989723206, "eval_runtime": 21.5568, "eval_samples_per_second": 256.902, "eval_steps_per_second": 10.716, "step": 270 }, { "epoch": 0.3, "learning_rate": 8.920000000000001e-06, "loss": 0.539, "step": 280 }, { "epoch": 0.3, "eval_f1": 0.7483575758659107, "eval_loss": 0.5601561665534973, "eval_runtime": 21.7276, "eval_samples_per_second": 254.883, "eval_steps_per_second": 10.632, "step": 280 }, { "epoch": 0.31, "learning_rate": 8.52e-06, "loss": 0.502, "step": 290 }, { "epoch": 0.31, "eval_f1": 0.7497368207624416, "eval_loss": 0.5269237160682678, "eval_runtime": 21.3645, "eval_samples_per_second": 259.215, "eval_steps_per_second": 10.812, "step": 290 }, { "epoch": 0.33, "learning_rate": 8.120000000000002e-06, "loss": 0.5656, "step": 300 }, { "epoch": 0.33, "eval_f1": 0.7490172830029811, "eval_loss": 0.5704778432846069, "eval_runtime": 21.3309, "eval_samples_per_second": 259.623, "eval_steps_per_second": 10.829, "step": 300 }, { "epoch": 0.34, "learning_rate": 7.72e-06, "loss": 0.6157, "step": 310 }, { "epoch": 0.34, "eval_f1": 0.7610313116302819, "eval_loss": 0.5527724027633667, "eval_runtime": 21.5886, "eval_samples_per_second": 256.524, "eval_steps_per_second": 10.7, "step": 310 }, { "epoch": 0.35, "learning_rate": 7.32e-06, "loss": 0.5262, "step": 320 }, { "epoch": 0.35, "eval_f1": 0.7693956225207979, "eval_loss": 0.5064041614532471, "eval_runtime": 21.4519, "eval_samples_per_second": 258.159, "eval_steps_per_second": 10.768, "step": 320 }, { "epoch": 0.36, "learning_rate": 6.92e-06, "loss": 0.5032, "step": 330 }, { "epoch": 0.36, "eval_f1": 0.757594480871035, "eval_loss": 0.5091240406036377, "eval_runtime": 21.5576, "eval_samples_per_second": 256.894, "eval_steps_per_second": 10.716, "step": 330 }, { "epoch": 0.37, "learning_rate": 6.520000000000001e-06, "loss": 0.4859, "step": 340 }, { "epoch": 0.37, "eval_f1": 0.751697533021681, "eval_loss": 0.5241729021072388, "eval_runtime": 21.6466, "eval_samples_per_second": 255.837, "eval_steps_per_second": 10.671, "step": 340 }, { "epoch": 0.38, "learning_rate": 6.120000000000001e-06, "loss": 0.6227, "step": 350 }, { "epoch": 0.38, "eval_f1": 0.7821857624888272, "eval_loss": 0.4922301769256592, "eval_runtime": 21.5823, "eval_samples_per_second": 256.599, "eval_steps_per_second": 10.703, "step": 350 }, { "epoch": 0.39, "learning_rate": 5.72e-06, "loss": 0.4927, "step": 360 }, { "epoch": 0.39, "eval_f1": 0.778530969617547, "eval_loss": 0.4920683801174164, "eval_runtime": 21.173, "eval_samples_per_second": 261.559, "eval_steps_per_second": 10.91, "step": 360 }, { "epoch": 0.4, "learning_rate": 5.320000000000001e-06, "loss": 0.596, "step": 370 }, { "epoch": 0.4, "eval_f1": 0.7737080595193158, "eval_loss": 0.509250283241272, "eval_runtime": 21.9207, "eval_samples_per_second": 252.638, "eval_steps_per_second": 10.538, "step": 370 }, { "epoch": 0.41, "learning_rate": 4.92e-06, "loss": 0.5932, "step": 380 }, { "epoch": 0.41, "eval_f1": 0.7900070254555186, "eval_loss": 0.49082762002944946, "eval_runtime": 21.7381, "eval_samples_per_second": 254.76, "eval_steps_per_second": 10.627, "step": 380 } ], "logging_steps": 10, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "total_flos": 547397542755648.0, "train_batch_size": 24, "trial_name": null, "trial_params": null }