{ "best_metric": 0.5012531328320803, "best_model_checkpoint": "/content/our_data/checkpoint-9000", "epoch": 10.0, "eval_steps": 500, "global_step": 12410, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.4, "learning_rate": 1.91941982272361e-05, "loss": 1.9177, "step": 500 }, { "epoch": 0.4, "eval_accuracy": 0.6004288777698356, "eval_f1": 0.0379746835443038, "eval_loss": 1.6839170455932617, "eval_precision": 0.06, "eval_recall": 0.027777777777777776, "eval_runtime": 3.4052, "eval_samples_per_second": 89.275, "eval_steps_per_second": 44.637, "step": 500 }, { "epoch": 0.81, "learning_rate": 1.83883964544722e-05, "loss": 1.4976, "step": 1000 }, { "epoch": 0.81, "eval_accuracy": 0.63128425065523, "eval_f1": 0.24557116676847893, "eval_loss": 1.4935959577560425, "eval_precision": 0.22814982973893302, "eval_recall": 0.26587301587301587, "eval_runtime": 2.3845, "eval_samples_per_second": 127.488, "eval_steps_per_second": 63.744, "step": 1000 }, { "epoch": 1.21, "learning_rate": 1.75825946817083e-05, "loss": 1.2309, "step": 1500 }, { "epoch": 1.21, "eval_accuracy": 0.6657136049559209, "eval_f1": 0.2877871825876663, "eval_loss": 1.2914698123931885, "eval_precision": 0.2650334075723831, "eval_recall": 0.3148148148148148, "eval_runtime": 3.4851, "eval_samples_per_second": 87.229, "eval_steps_per_second": 43.615, "step": 1500 }, { "epoch": 1.61, "learning_rate": 1.67767929089444e-05, "loss": 1.0546, "step": 2000 }, { "epoch": 1.61, "eval_accuracy": 0.6803669287586371, "eval_f1": 0.33198380566801616, "eval_loss": 1.2454315423965454, "eval_precision": 0.2949640287769784, "eval_recall": 0.37962962962962965, "eval_runtime": 2.4542, "eval_samples_per_second": 123.867, "eval_steps_per_second": 61.934, "step": 2000 }, { "epoch": 2.01, "learning_rate": 1.59709911361805e-05, "loss": 0.9405, "step": 2500 }, { "epoch": 2.01, "eval_accuracy": 0.6915654038599, "eval_f1": 0.35719063545150503, "eval_loss": 1.2377290725708008, "eval_precision": 0.36129905277401897, "eval_recall": 0.3531746031746032, "eval_runtime": 3.3956, "eval_samples_per_second": 89.527, "eval_steps_per_second": 44.763, "step": 2500 }, { "epoch": 2.42, "learning_rate": 1.5165189363416601e-05, "loss": 0.7501, "step": 3000 }, { "epoch": 2.42, "eval_accuracy": 0.7170598046223493, "eval_f1": 0.3872549019607843, "eval_loss": 1.1723062992095947, "eval_precision": 0.3607305936073059, "eval_recall": 0.41798941798941797, "eval_runtime": 2.4666, "eval_samples_per_second": 123.248, "eval_steps_per_second": 61.624, "step": 3000 }, { "epoch": 2.82, "learning_rate": 1.4359387590652701e-05, "loss": 0.7133, "step": 3500 }, { "epoch": 2.82, "eval_accuracy": 0.7159876101977604, "eval_f1": 0.39976204640095175, "eval_loss": 1.1583572626113892, "eval_precision": 0.36324324324324325, "eval_recall": 0.4444444444444444, "eval_runtime": 2.4767, "eval_samples_per_second": 122.743, "eval_steps_per_second": 61.371, "step": 3500 }, { "epoch": 3.22, "learning_rate": 1.35535858178888e-05, "loss": 0.5896, "step": 4000 }, { "epoch": 3.22, "eval_accuracy": 0.7306409340004766, "eval_f1": 0.42666666666666664, "eval_loss": 1.2287709712982178, "eval_precision": 0.41025641025641024, "eval_recall": 0.4444444444444444, "eval_runtime": 3.2475, "eval_samples_per_second": 93.609, "eval_steps_per_second": 46.805, "step": 4000 }, { "epoch": 3.63, "learning_rate": 1.27477840451249e-05, "loss": 0.5353, "step": 4500 }, { "epoch": 3.63, "eval_accuracy": 0.7253990945913747, "eval_f1": 0.4356672651107121, "eval_loss": 1.2319059371948242, "eval_precision": 0.3978142076502732, "eval_recall": 0.48148148148148145, "eval_runtime": 2.3963, "eval_samples_per_second": 126.861, "eval_steps_per_second": 63.431, "step": 4500 }, { "epoch": 4.03, "learning_rate": 1.1941982272361e-05, "loss": 0.5432, "step": 5000 }, { "epoch": 4.03, "eval_accuracy": 0.7306409340004766, "eval_f1": 0.4548825710754017, "eval_loss": 1.2172613143920898, "eval_precision": 0.42691415313225056, "eval_recall": 0.48677248677248675, "eval_runtime": 3.3895, "eval_samples_per_second": 89.689, "eval_steps_per_second": 44.845, "step": 5000 }, { "epoch": 4.43, "learning_rate": 1.11361804995971e-05, "loss": 0.4062, "step": 5500 }, { "epoch": 4.43, "eval_accuracy": 0.7271860852990231, "eval_f1": 0.4691358024691359, "eval_loss": 1.283239722251892, "eval_precision": 0.4398148148148148, "eval_recall": 0.5026455026455027, "eval_runtime": 2.4143, "eval_samples_per_second": 125.916, "eval_steps_per_second": 62.958, "step": 5500 }, { "epoch": 4.83, "learning_rate": 1.0330378726833199e-05, "loss": 0.4485, "step": 6000 }, { "epoch": 4.83, "eval_accuracy": 0.7412437455325233, "eval_f1": 0.4610778443113772, "eval_loss": 1.2196030616760254, "eval_precision": 0.4212253829321663, "eval_recall": 0.5092592592592593, "eval_runtime": 2.7872, "eval_samples_per_second": 109.069, "eval_steps_per_second": 54.534, "step": 6000 }, { "epoch": 5.24, "learning_rate": 9.5245769540693e-06, "loss": 0.3614, "step": 6500 }, { "epoch": 5.24, "eval_accuracy": 0.732547057421968, "eval_f1": 0.46210720887245843, "eval_loss": 1.3155299425125122, "eval_precision": 0.43252595155709345, "eval_recall": 0.49603174603174605, "eval_runtime": 3.3893, "eval_samples_per_second": 89.694, "eval_steps_per_second": 44.847, "step": 6500 }, { "epoch": 5.64, "learning_rate": 8.7187751813054e-06, "loss": 0.3308, "step": 7000 }, { "epoch": 5.64, "eval_accuracy": 0.7354062425542054, "eval_f1": 0.4604402141582391, "eval_loss": 1.3501168489456177, "eval_precision": 0.4183783783783784, "eval_recall": 0.5119047619047619, "eval_runtime": 2.4288, "eval_samples_per_second": 125.165, "eval_steps_per_second": 62.583, "step": 7000 }, { "epoch": 6.04, "learning_rate": 7.9129734085415e-06, "loss": 0.3645, "step": 7500 }, { "epoch": 6.04, "eval_accuracy": 0.7365975696926376, "eval_f1": 0.4730792498487599, "eval_loss": 1.3390766382217407, "eval_precision": 0.4358974358974359, "eval_recall": 0.5171957671957672, "eval_runtime": 2.391, "eval_samples_per_second": 127.141, "eval_steps_per_second": 63.571, "step": 7500 }, { "epoch": 6.45, "learning_rate": 7.107171635777599e-06, "loss": 0.2982, "step": 8000 }, { "epoch": 6.45, "eval_accuracy": 0.7314748629973791, "eval_f1": 0.4590354445090064, "eval_loss": 1.3889434337615967, "eval_precision": 0.40932642487046633, "eval_recall": 0.5224867724867724, "eval_runtime": 3.0054, "eval_samples_per_second": 101.151, "eval_steps_per_second": 50.576, "step": 8000 }, { "epoch": 6.85, "learning_rate": 6.301369863013699e-06, "loss": 0.2845, "step": 8500 }, { "epoch": 6.85, "eval_accuracy": 0.7376697641172266, "eval_f1": 0.47794117647058826, "eval_loss": 1.4109262228012085, "eval_precision": 0.4452054794520548, "eval_recall": 0.5158730158730159, "eval_runtime": 2.4417, "eval_samples_per_second": 124.505, "eval_steps_per_second": 62.253, "step": 8500 }, { "epoch": 7.25, "learning_rate": 5.495568090249799e-06, "loss": 0.2482, "step": 9000 }, { "epoch": 7.25, "eval_accuracy": 0.7375506314033834, "eval_f1": 0.5012531328320803, "eval_loss": 1.4667584896087646, "eval_precision": 0.47619047619047616, "eval_recall": 0.5291005291005291, "eval_runtime": 2.4653, "eval_samples_per_second": 123.313, "eval_steps_per_second": 61.657, "step": 9000 }, { "epoch": 7.66, "learning_rate": 4.689766317485899e-06, "loss": 0.2636, "step": 9500 }, { "epoch": 7.66, "eval_accuracy": 0.73409578270193, "eval_f1": 0.49150485436893204, "eval_loss": 1.4925192594528198, "eval_precision": 0.45403587443946186, "eval_recall": 0.5357142857142857, "eval_runtime": 3.4322, "eval_samples_per_second": 88.572, "eval_steps_per_second": 44.286, "step": 9500 }, { "epoch": 8.06, "learning_rate": 3.883964544721999e-06, "loss": 0.2605, "step": 10000 }, { "epoch": 8.06, "eval_accuracy": 0.7405289492494639, "eval_f1": 0.49358582773365917, "eval_loss": 1.4916423559188843, "eval_precision": 0.4585698070374574, "eval_recall": 0.5343915343915344, "eval_runtime": 2.4755, "eval_samples_per_second": 122.805, "eval_steps_per_second": 61.403, "step": 10000 }, { "epoch": 8.46, "learning_rate": 3.0781627719580986e-06, "loss": 0.1989, "step": 10500 }, { "epoch": 8.46, "eval_accuracy": 0.7387419585418156, "eval_f1": 0.4990780577750461, "eval_loss": 1.5096321105957031, "eval_precision": 0.4661308840413318, "eval_recall": 0.5370370370370371, "eval_runtime": 3.3245, "eval_samples_per_second": 91.444, "eval_steps_per_second": 45.722, "step": 10500 }, { "epoch": 8.86, "learning_rate": 2.2723609991941985e-06, "loss": 0.2415, "step": 11000 }, { "epoch": 8.86, "eval_accuracy": 0.744341196092447, "eval_f1": 0.4990914597213809, "eval_loss": 1.4698182344436646, "eval_precision": 0.46033519553072627, "eval_recall": 0.544973544973545, "eval_runtime": 2.5811, "eval_samples_per_second": 117.779, "eval_steps_per_second": 58.889, "step": 11000 }, { "epoch": 9.27, "learning_rate": 1.4665592264302982e-06, "loss": 0.2488, "step": 11500 }, { "epoch": 9.27, "eval_accuracy": 0.7455325232308792, "eval_f1": 0.491421568627451, "eval_loss": 1.4736005067825317, "eval_precision": 0.4577625570776256, "eval_recall": 0.5304232804232805, "eval_runtime": 3.3981, "eval_samples_per_second": 89.462, "eval_steps_per_second": 44.731, "step": 11500 }, { "epoch": 9.67, "learning_rate": 6.607574536663981e-07, "loss": 0.2129, "step": 12000 }, { "epoch": 9.67, "eval_accuracy": 0.7438646652370741, "eval_f1": 0.5012165450121654, "eval_loss": 1.506749153137207, "eval_precision": 0.46396396396396394, "eval_recall": 0.544973544973545, "eval_runtime": 2.854, "eval_samples_per_second": 106.517, "eval_steps_per_second": 53.258, "step": 12000 }, { "epoch": 10.0, "step": 12410, "total_flos": 503702005049490.0, "train_loss": 0.5683070008357046, "train_runtime": 1353.7746, "train_samples_per_second": 18.327, "train_steps_per_second": 9.167 } ], "logging_steps": 500, "max_steps": 12410, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 503702005049490.0, "trial_name": null, "trial_params": null }