|
{ |
|
"best_metric": 0.781834372217275, |
|
"best_model_checkpoint": "../saved_model/cino-small-v2_tusa/checkpoint-500", |
|
"epoch": 40.0, |
|
"eval_steps": 500, |
|
"global_step": 10000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.759, |
|
"eval_f1": 0.7449735449735448, |
|
"eval_loss": 0.538531482219696, |
|
"eval_precision": 0.7910112359550562, |
|
"eval_recall": 0.704, |
|
"eval_runtime": 3.843, |
|
"eval_samples_per_second": 260.21, |
|
"eval_steps_per_second": 8.327, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.4062, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.755, |
|
"eval_f1": 0.781834372217275, |
|
"eval_loss": 0.5724849700927734, |
|
"eval_precision": 0.7046548956661316, |
|
"eval_recall": 0.878, |
|
"eval_runtime": 3.8652, |
|
"eval_samples_per_second": 258.722, |
|
"eval_steps_per_second": 8.279, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.763, |
|
"eval_f1": 0.7718960538979788, |
|
"eval_loss": 0.6156216263771057, |
|
"eval_precision": 0.7439703153988868, |
|
"eval_recall": 0.802, |
|
"eval_runtime": 3.8674, |
|
"eval_samples_per_second": 258.57, |
|
"eval_steps_per_second": 8.274, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 5e-05, |
|
"loss": 0.1767, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.763, |
|
"eval_f1": 0.7351955307262571, |
|
"eval_loss": 0.7199212908744812, |
|
"eval_precision": 0.8329113924050633, |
|
"eval_recall": 0.658, |
|
"eval_runtime": 3.8486, |
|
"eval_samples_per_second": 259.833, |
|
"eval_steps_per_second": 8.315, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.72, |
|
"eval_f1": 0.7188755020080321, |
|
"eval_loss": 1.1636953353881836, |
|
"eval_precision": 0.7217741935483871, |
|
"eval_recall": 0.716, |
|
"eval_runtime": 3.8679, |
|
"eval_samples_per_second": 258.536, |
|
"eval_steps_per_second": 8.273, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.722222222222222e-05, |
|
"loss": 0.0851, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.668, |
|
"eval_f1": 0.7167235494880547, |
|
"eval_loss": 1.6249794960021973, |
|
"eval_precision": 0.625, |
|
"eval_recall": 0.84, |
|
"eval_runtime": 3.8509, |
|
"eval_samples_per_second": 259.682, |
|
"eval_steps_per_second": 8.31, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.66, |
|
"eval_f1": 0.6903460837887068, |
|
"eval_loss": 1.97330904006958, |
|
"eval_precision": 0.6337792642140468, |
|
"eval_recall": 0.758, |
|
"eval_runtime": 3.8611, |
|
"eval_samples_per_second": 258.997, |
|
"eval_steps_per_second": 8.288, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.0374, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.672, |
|
"eval_f1": 0.6371681415929205, |
|
"eval_loss": 2.0722925662994385, |
|
"eval_precision": 0.7128712871287128, |
|
"eval_recall": 0.576, |
|
"eval_runtime": 3.8504, |
|
"eval_samples_per_second": 259.712, |
|
"eval_steps_per_second": 8.311, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.712, |
|
"eval_f1": 0.7037037037037037, |
|
"eval_loss": 2.0254833698272705, |
|
"eval_precision": 0.7245762711864406, |
|
"eval_recall": 0.684, |
|
"eval_runtime": 3.8436, |
|
"eval_samples_per_second": 260.171, |
|
"eval_steps_per_second": 8.325, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.0196, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.72, |
|
"eval_f1": 0.7421731123388583, |
|
"eval_loss": 2.1418700218200684, |
|
"eval_precision": 0.6877133105802048, |
|
"eval_recall": 0.806, |
|
"eval_runtime": 3.8516, |
|
"eval_samples_per_second": 259.63, |
|
"eval_steps_per_second": 8.308, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.732, |
|
"eval_f1": 0.7709401709401711, |
|
"eval_loss": 2.037372350692749, |
|
"eval_precision": 0.673134328358209, |
|
"eval_recall": 0.902, |
|
"eval_runtime": 3.8641, |
|
"eval_samples_per_second": 258.791, |
|
"eval_steps_per_second": 8.281, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 0.0178, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.734, |
|
"eval_f1": 0.7387033398821218, |
|
"eval_loss": 2.172621488571167, |
|
"eval_precision": 0.7258687258687259, |
|
"eval_recall": 0.752, |
|
"eval_runtime": 3.853, |
|
"eval_samples_per_second": 259.541, |
|
"eval_steps_per_second": 8.305, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.694, |
|
"eval_f1": 0.66, |
|
"eval_loss": 2.3637471199035645, |
|
"eval_precision": 0.7425, |
|
"eval_recall": 0.594, |
|
"eval_runtime": 3.8612, |
|
"eval_samples_per_second": 258.984, |
|
"eval_steps_per_second": 8.288, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 3.611111111111111e-05, |
|
"loss": 0.0166, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.679, |
|
"eval_f1": 0.664576802507837, |
|
"eval_loss": 2.3070590496063232, |
|
"eval_precision": 0.6958424507658644, |
|
"eval_recall": 0.636, |
|
"eval_runtime": 3.8463, |
|
"eval_samples_per_second": 259.991, |
|
"eval_steps_per_second": 8.32, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.72, |
|
"eval_f1": 0.7265625, |
|
"eval_loss": 2.2964775562286377, |
|
"eval_precision": 0.7099236641221374, |
|
"eval_recall": 0.744, |
|
"eval_runtime": 3.8729, |
|
"eval_samples_per_second": 258.207, |
|
"eval_steps_per_second": 8.263, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.0074, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.708, |
|
"eval_f1": 0.7176015473887815, |
|
"eval_loss": 2.449232339859009, |
|
"eval_precision": 0.6947565543071161, |
|
"eval_recall": 0.742, |
|
"eval_runtime": 3.861, |
|
"eval_samples_per_second": 258.998, |
|
"eval_steps_per_second": 8.288, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.726, |
|
"eval_f1": 0.727634194831014, |
|
"eval_loss": 2.2294154167175293, |
|
"eval_precision": 0.7233201581027668, |
|
"eval_recall": 0.732, |
|
"eval_runtime": 3.8559, |
|
"eval_samples_per_second": 259.344, |
|
"eval_steps_per_second": 8.299, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 3.055555555555556e-05, |
|
"loss": 0.0127, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.736, |
|
"eval_f1": 0.7333333333333334, |
|
"eval_loss": 2.354280471801758, |
|
"eval_precision": 0.7408163265306122, |
|
"eval_recall": 0.726, |
|
"eval_runtime": 3.8511, |
|
"eval_samples_per_second": 259.668, |
|
"eval_steps_per_second": 8.309, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.72, |
|
"eval_f1": 0.688195991091314, |
|
"eval_loss": 2.646545886993408, |
|
"eval_precision": 0.7763819095477387, |
|
"eval_recall": 0.618, |
|
"eval_runtime": 3.8559, |
|
"eval_samples_per_second": 259.341, |
|
"eval_steps_per_second": 8.299, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.0071, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.728, |
|
"eval_f1": 0.7190082644628099, |
|
"eval_loss": 2.392427682876587, |
|
"eval_precision": 0.7435897435897436, |
|
"eval_recall": 0.696, |
|
"eval_runtime": 3.841, |
|
"eval_samples_per_second": 260.348, |
|
"eval_steps_per_second": 8.331, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.712, |
|
"eval_f1": 0.7181996086105676, |
|
"eval_loss": 2.35839581489563, |
|
"eval_precision": 0.7030651340996169, |
|
"eval_recall": 0.734, |
|
"eval_runtime": 3.8576, |
|
"eval_samples_per_second": 259.231, |
|
"eval_steps_per_second": 8.295, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.0085, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.739, |
|
"eval_f1": 0.7116022099447514, |
|
"eval_loss": 1.9896154403686523, |
|
"eval_precision": 0.7950617283950617, |
|
"eval_recall": 0.644, |
|
"eval_runtime": 3.8513, |
|
"eval_samples_per_second": 259.655, |
|
"eval_steps_per_second": 8.309, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.751, |
|
"eval_f1": 0.7278688524590164, |
|
"eval_loss": 2.147411584854126, |
|
"eval_precision": 0.8024096385542169, |
|
"eval_recall": 0.666, |
|
"eval_runtime": 3.9041, |
|
"eval_samples_per_second": 256.144, |
|
"eval_steps_per_second": 8.197, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.0069, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.75, |
|
"eval_f1": 0.7306034482758621, |
|
"eval_loss": 2.172492265701294, |
|
"eval_precision": 0.7920560747663551, |
|
"eval_recall": 0.678, |
|
"eval_runtime": 3.8555, |
|
"eval_samples_per_second": 259.368, |
|
"eval_steps_per_second": 8.3, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.732, |
|
"eval_f1": 0.7112068965517241, |
|
"eval_loss": 2.3147952556610107, |
|
"eval_precision": 0.7710280373831776, |
|
"eval_recall": 0.66, |
|
"eval_runtime": 3.8522, |
|
"eval_samples_per_second": 259.59, |
|
"eval_steps_per_second": 8.307, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 1.9444444444444445e-05, |
|
"loss": 0.0067, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.726, |
|
"eval_f1": 0.7385496183206107, |
|
"eval_loss": 2.2775561809539795, |
|
"eval_precision": 0.7062043795620438, |
|
"eval_recall": 0.774, |
|
"eval_runtime": 3.8474, |
|
"eval_samples_per_second": 259.914, |
|
"eval_steps_per_second": 8.317, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.718, |
|
"eval_f1": 0.699360341151386, |
|
"eval_loss": 2.492180109024048, |
|
"eval_precision": 0.7488584474885844, |
|
"eval_recall": 0.656, |
|
"eval_runtime": 3.8591, |
|
"eval_samples_per_second": 259.131, |
|
"eval_steps_per_second": 8.292, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.0032, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.726, |
|
"eval_f1": 0.7133891213389122, |
|
"eval_loss": 2.4056484699249268, |
|
"eval_precision": 0.7478070175438597, |
|
"eval_recall": 0.682, |
|
"eval_runtime": 3.8454, |
|
"eval_samples_per_second": 260.05, |
|
"eval_steps_per_second": 8.322, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.724, |
|
"eval_f1": 0.7177914110429447, |
|
"eval_loss": 2.5566771030426025, |
|
"eval_precision": 0.7343096234309623, |
|
"eval_recall": 0.702, |
|
"eval_runtime": 3.8506, |
|
"eval_samples_per_second": 259.7, |
|
"eval_steps_per_second": 8.31, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.001, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.733, |
|
"eval_f1": 0.7516279069767442, |
|
"eval_loss": 2.586094617843628, |
|
"eval_precision": 0.7026086956521739, |
|
"eval_recall": 0.808, |
|
"eval_runtime": 3.8404, |
|
"eval_samples_per_second": 260.391, |
|
"eval_steps_per_second": 8.333, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.738, |
|
"eval_f1": 0.7282157676348547, |
|
"eval_loss": 2.566093683242798, |
|
"eval_precision": 0.7564655172413793, |
|
"eval_recall": 0.702, |
|
"eval_runtime": 3.8517, |
|
"eval_samples_per_second": 259.629, |
|
"eval_steps_per_second": 8.308, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.0016, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.74, |
|
"eval_f1": 0.7192224622030238, |
|
"eval_loss": 2.6535303592681885, |
|
"eval_precision": 0.7816901408450704, |
|
"eval_recall": 0.666, |
|
"eval_runtime": 3.9723, |
|
"eval_samples_per_second": 251.74, |
|
"eval_steps_per_second": 8.056, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.74, |
|
"eval_f1": 0.717391304347826, |
|
"eval_loss": 2.7205991744995117, |
|
"eval_precision": 0.7857142857142857, |
|
"eval_recall": 0.66, |
|
"eval_runtime": 3.9432, |
|
"eval_samples_per_second": 253.602, |
|
"eval_steps_per_second": 8.115, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.0014, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.725, |
|
"eval_f1": 0.7422680412371134, |
|
"eval_loss": 2.6660046577453613, |
|
"eval_precision": 0.6984126984126984, |
|
"eval_recall": 0.792, |
|
"eval_runtime": 3.9996, |
|
"eval_samples_per_second": 250.027, |
|
"eval_steps_per_second": 8.001, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.742, |
|
"eval_f1": 0.7361963190184049, |
|
"eval_loss": 2.5981457233428955, |
|
"eval_precision": 0.7531380753138075, |
|
"eval_recall": 0.72, |
|
"eval_runtime": 3.9964, |
|
"eval_samples_per_second": 250.226, |
|
"eval_steps_per_second": 8.007, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.0003, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.745, |
|
"eval_f1": 0.7384615384615385, |
|
"eval_loss": 2.6297309398651123, |
|
"eval_precision": 0.7578947368421053, |
|
"eval_recall": 0.72, |
|
"eval_runtime": 3.9747, |
|
"eval_samples_per_second": 251.591, |
|
"eval_steps_per_second": 8.051, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.738, |
|
"eval_f1": 0.7358870967741935, |
|
"eval_loss": 2.6337215900421143, |
|
"eval_precision": 0.741869918699187, |
|
"eval_recall": 0.73, |
|
"eval_runtime": 3.9752, |
|
"eval_samples_per_second": 251.558, |
|
"eval_steps_per_second": 8.05, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"learning_rate": 2.777777777777778e-06, |
|
"loss": 0.0007, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.739, |
|
"eval_f1": 0.7300930713547054, |
|
"eval_loss": 2.645658493041992, |
|
"eval_precision": 0.7558886509635975, |
|
"eval_recall": 0.706, |
|
"eval_runtime": 3.9919, |
|
"eval_samples_per_second": 250.508, |
|
"eval_steps_per_second": 8.016, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.741, |
|
"eval_f1": 0.7304890738813736, |
|
"eval_loss": 2.6524713039398193, |
|
"eval_precision": 0.7613882863340564, |
|
"eval_recall": 0.702, |
|
"eval_runtime": 3.9627, |
|
"eval_samples_per_second": 252.356, |
|
"eval_steps_per_second": 8.075, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.0, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.742, |
|
"eval_f1": 0.734020618556701, |
|
"eval_loss": 2.6484949588775635, |
|
"eval_precision": 0.7574468085106383, |
|
"eval_recall": 0.712, |
|
"eval_runtime": 3.9449, |
|
"eval_samples_per_second": 253.495, |
|
"eval_steps_per_second": 8.112, |
|
"step": 10000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 10000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 40, |
|
"save_steps": 500, |
|
"total_flos": 4.238956756992e+16, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|