{ "best_metric": null, "best_model_checkpoint": null, "epoch": 16.0, "eval_steps": 500, "global_step": 21008, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.38, "learning_rate": 0.0009873064229499874, "loss": 2.3443, "step": 500 }, { "epoch": 0.76, "learning_rate": 0.0009746128458999747, "loss": 1.6863, "step": 1000 }, { "epoch": 1.0, "eval_accuracy": 0.575725640796642, "eval_loss": 1.955936312675476, "eval_runtime": 2249.8295, "eval_samples_per_second": 74.652, "eval_steps_per_second": 0.146, "step": 1313 }, { "epoch": 1.14, "learning_rate": 0.0009619192688499619, "loss": 1.4821, "step": 1500 }, { "epoch": 1.52, "learning_rate": 0.0009492256917999492, "loss": 1.3492, "step": 2000 }, { "epoch": 1.9, "learning_rate": 0.0009365321147499366, "loss": 1.3275, "step": 2500 }, { "epoch": 2.0, "eval_accuracy": 0.606090917209967, "eval_loss": 1.8276444673538208, "eval_runtime": 1553.5911, "eval_samples_per_second": 108.108, "eval_steps_per_second": 0.212, "step": 2626 }, { "epoch": 2.28, "learning_rate": 0.0009238385376999238, "loss": 1.1767, "step": 3000 }, { "epoch": 2.67, "learning_rate": 0.0009111449606499112, "loss": 1.151, "step": 3500 }, { "epoch": 3.0, "eval_accuracy": 0.6130332529546605, "eval_loss": 1.885665774345398, "eval_runtime": 1611.9735, "eval_samples_per_second": 104.192, "eval_steps_per_second": 0.204, "step": 3939 }, { "epoch": 3.05, "learning_rate": 0.0008984513835998985, "loss": 1.1393, "step": 4000 }, { "epoch": 3.43, "learning_rate": 0.0008857578065498858, "loss": 1.0116, "step": 4500 }, { "epoch": 3.81, "learning_rate": 0.0008730642294998731, "loss": 1.0336, "step": 5000 }, { "epoch": 4.0, "eval_accuracy": 0.6322288708284958, "eval_loss": 1.8160221576690674, "eval_runtime": 1568.2238, "eval_samples_per_second": 107.099, "eval_steps_per_second": 0.21, "step": 5252 }, { "epoch": 4.19, "learning_rate": 0.0008603706524498604, "loss": 0.9722, "step": 5500 }, { "epoch": 4.57, "learning_rate": 0.0008476770753998477, "loss": 0.9309, "step": 6000 }, { "epoch": 4.95, "learning_rate": 0.000834983498349835, "loss": 0.947, "step": 6500 }, { "epoch": 5.0, "eval_accuracy": 0.6317465987913429, "eval_loss": 1.8050605058670044, "eval_runtime": 1542.405, "eval_samples_per_second": 108.892, "eval_steps_per_second": 0.213, "step": 6565 }, { "epoch": 5.33, "learning_rate": 0.0008222899212998223, "loss": 0.8427, "step": 7000 }, { "epoch": 5.71, "learning_rate": 0.0008095963442498096, "loss": 0.8595, "step": 7500 }, { "epoch": 6.0, "eval_accuracy": 0.6442737638057813, "eval_loss": 1.7996306419372559, "eval_runtime": 1561.8382, "eval_samples_per_second": 107.537, "eval_steps_per_second": 0.211, "step": 7878 }, { "epoch": 6.09, "learning_rate": 0.0007969027671997969, "loss": 0.8393, "step": 8000 }, { "epoch": 6.47, "learning_rate": 0.0007842091901497842, "loss": 0.7719, "step": 8500 }, { "epoch": 6.85, "learning_rate": 0.0007715156130997715, "loss": 0.801, "step": 9000 }, { "epoch": 7.0, "eval_accuracy": 0.6533952546813134, "eval_loss": 1.7987133264541626, "eval_runtime": 1549.5067, "eval_samples_per_second": 108.393, "eval_steps_per_second": 0.212, "step": 9191 }, { "epoch": 7.24, "learning_rate": 0.0007588220360497588, "loss": 0.7443, "step": 9500 }, { "epoch": 7.62, "learning_rate": 0.0007461284589997462, "loss": 0.7296, "step": 10000 }, { "epoch": 8.0, "learning_rate": 0.0007334348819497334, "loss": 0.7508, "step": 10500 }, { "epoch": 8.0, "eval_accuracy": 0.6521925515763151, "eval_loss": 1.7864270210266113, "eval_runtime": 1567.4501, "eval_samples_per_second": 107.152, "eval_steps_per_second": 0.21, "step": 10504 }, { "epoch": 8.38, "learning_rate": 0.0007207413048997208, "loss": 0.6659, "step": 11000 }, { "epoch": 8.76, "learning_rate": 0.0007080477278497081, "loss": 0.694, "step": 11500 }, { "epoch": 9.0, "eval_accuracy": 0.6525795600011908, "eval_loss": 1.8871492147445679, "eval_runtime": 1630.371, "eval_samples_per_second": 103.016, "eval_steps_per_second": 0.202, "step": 11817 }, { "epoch": 9.14, "learning_rate": 0.0006953541507996953, "loss": 0.6611, "step": 12000 }, { "epoch": 9.52, "learning_rate": 0.0006826605737496827, "loss": 0.6326, "step": 12500 }, { "epoch": 9.9, "learning_rate": 0.00066996699669967, "loss": 0.6523, "step": 13000 }, { "epoch": 10.0, "eval_accuracy": 0.6648149802030305, "eval_loss": 1.805672287940979, "eval_runtime": 1557.784, "eval_samples_per_second": 107.817, "eval_steps_per_second": 0.211, "step": 13130 }, { "epoch": 10.28, "learning_rate": 0.0006572734196496572, "loss": 0.602, "step": 13500 }, { "epoch": 10.66, "learning_rate": 0.0006445798425996446, "loss": 0.5976, "step": 14000 }, { "epoch": 11.0, "eval_accuracy": 0.6706736923580721, "eval_loss": 1.8513981103897095, "eval_runtime": 1548.9015, "eval_samples_per_second": 108.435, "eval_steps_per_second": 0.212, "step": 14443 }, { "epoch": 11.04, "learning_rate": 0.0006318862655496319, "loss": 0.6049, "step": 14500 }, { "epoch": 11.42, "learning_rate": 0.0006191926884996192, "loss": 0.5476, "step": 15000 }, { "epoch": 11.81, "learning_rate": 0.0006064991114496065, "loss": 0.5743, "step": 15500 }, { "epoch": 12.0, "eval_accuracy": 0.6628501681998155, "eval_loss": 1.927115559577942, "eval_runtime": 1525.7141, "eval_samples_per_second": 110.083, "eval_steps_per_second": 0.216, "step": 15756 }, { "epoch": 12.19, "learning_rate": 0.0005938055343995938, "loss": 0.5441, "step": 16000 }, { "epoch": 12.57, "learning_rate": 0.0005811119573495812, "loss": 0.5232, "step": 16500 }, { "epoch": 12.95, "learning_rate": 0.0005684183802995684, "loss": 0.5426, "step": 17000 }, { "epoch": 13.0, "eval_accuracy": 0.6691911523920098, "eval_loss": 1.9220695495605469, "eval_runtime": 1530.5139, "eval_samples_per_second": 109.738, "eval_steps_per_second": 0.215, "step": 17069 }, { "epoch": 13.33, "learning_rate": 0.0005557248032495558, "loss": 0.5004, "step": 17500 }, { "epoch": 13.71, "learning_rate": 0.0005430312261995431, "loss": 0.5092, "step": 18000 }, { "epoch": 14.0, "eval_accuracy": 0.6751987139412342, "eval_loss": 1.9163544178009033, "eval_runtime": 1663.4966, "eval_samples_per_second": 100.965, "eval_steps_per_second": 0.198, "step": 18382 }, { "epoch": 14.09, "learning_rate": 0.0005303376491495303, "loss": 0.5017, "step": 18500 }, { "epoch": 14.47, "learning_rate": 0.0005176440720995177, "loss": 0.4729, "step": 19000 }, { "epoch": 14.85, "learning_rate": 0.000504950495049505, "loss": 0.4808, "step": 19500 }, { "epoch": 15.0, "eval_accuracy": 0.6743115715519038, "eval_loss": 1.925864815711975, "eval_runtime": 1533.1814, "eval_samples_per_second": 109.547, "eval_steps_per_second": 0.215, "step": 19695 }, { "epoch": 15.23, "learning_rate": 0.0004922569179994923, "loss": 0.4595, "step": 20000 }, { "epoch": 15.61, "learning_rate": 0.00047956334094947955, "loss": 0.4489, "step": 20500 }, { "epoch": 15.99, "learning_rate": 0.00046686976389946687, "loss": 0.4611, "step": 21000 }, { "epoch": 16.0, "eval_accuracy": 0.6784972165163288, "eval_loss": 1.9857261180877686, "eval_runtime": 1537.6389, "eval_samples_per_second": 109.229, "eval_steps_per_second": 0.214, "step": 21008 }, { "epoch": 16.0, "step": 21008, "total_flos": 7.331020593786202e+20, "train_loss": 0.0, "train_runtime": 372.6046, "train_samples_per_second": 27045.44, "train_steps_per_second": 52.858 } ], "logging_steps": 500, "max_steps": 19695, "num_train_epochs": 15, "save_steps": 500, "total_flos": 7.331020593786202e+20, "trial_name": null, "trial_params": null }