{ "best_metric": null, "best_model_checkpoint": null, "epoch": 14.588235294117647, "eval_steps": 500, "global_step": 220, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 0.00029999428845962564, "loss": 1.4179, "step": 1 }, { "epoch": 0.2, "learning_rate": 0.00029985723323727866, "loss": 1.3836, "step": 5 }, { "epoch": 0.41, "learning_rate": 0.0002994292047137618, "loss": 1.2291, "step": 10 }, { "epoch": 0.57, "eval_loss": 1.1107903718948364, "eval_runtime": 1318.1616, "eval_samples_per_second": 17.71, "eval_steps_per_second": 0.554, "step": 14 }, { "epoch": 1.01, "learning_rate": 0.00029871672920607153, "loss": 1.171, "step": 15 }, { "epoch": 1.22, "learning_rate": 0.0002977211629518312, "loss": 1.1376, "step": 20 }, { "epoch": 1.42, "learning_rate": 0.00029644440106799, "loss": 1.1237, "step": 25 }, { "epoch": 1.59, "eval_loss": 1.0651090145111084, "eval_runtime": 1320.1229, "eval_samples_per_second": 17.683, "eval_steps_per_second": 0.553, "step": 29 }, { "epoch": 2.03, "learning_rate": 0.0002948888739433602, "loss": 1.1078, "step": 30 }, { "epoch": 2.23, "learning_rate": 0.000293057542612234, "loss": 1.0947, "step": 35 }, { "epoch": 2.43, "learning_rate": 0.0002909538931178862, "loss": 1.0918, "step": 40 }, { "epoch": 2.6, "eval_loss": 1.0472216606140137, "eval_runtime": 1319.747, "eval_samples_per_second": 17.688, "eval_steps_per_second": 0.553, "step": 44 }, { "epoch": 3.04, "learning_rate": 0.000288581929876693, "loss": 1.0832, "step": 45 }, { "epoch": 3.24, "learning_rate": 0.0002859461680554975, "loss": 1.0705, "step": 50 }, { "epoch": 3.45, "learning_rate": 0.0002830516249767332, "loss": 1.0711, "step": 55 }, { "epoch": 3.57, "eval_loss": 1.0370612144470215, "eval_runtime": 1319.0792, "eval_samples_per_second": 17.697, "eval_steps_per_second": 0.553, "step": 58 }, { "epoch": 4.05, "learning_rate": 0.0002799038105676658, "loss": 1.0589, "step": 60 }, { "epoch": 4.26, "learning_rate": 0.0002765087168719328, "loss": 1.0489, "step": 65 }, { "epoch": 4.46, "learning_rate": 0.00027287280664334875, "loss": 1.0498, "step": 70 }, { "epoch": 4.58, "eval_loss": 1.0298787355422974, "eval_runtime": 1319.3478, "eval_samples_per_second": 17.694, "eval_steps_per_second": 0.553, "step": 73 }, { "epoch": 5.06, "learning_rate": 0.00026900300104368524, "loss": 1.0358, "step": 75 }, { "epoch": 5.27, "learning_rate": 0.00026490666646784665, "loss": 1.0261, "step": 80 }, { "epoch": 5.47, "learning_rate": 0.0002605916005215186, "loss": 1.0255, "step": 85 }, { "epoch": 5.6, "eval_loss": 1.0246919393539429, "eval_runtime": 1320.0654, "eval_samples_per_second": 17.684, "eval_steps_per_second": 0.553, "step": 88 }, { "epoch": 6.08, "learning_rate": 0.00025606601717798207, "loss": 1.0165, "step": 90 }, { "epoch": 6.28, "learning_rate": 0.00025133853114234905, "loss": 1.0119, "step": 95 }, { "epoch": 6.49, "learning_rate": 0.0002464181414529809, "loss": 1.0131, "step": 100 }, { "epoch": 6.57, "eval_loss": 1.0210436582565308, "eval_runtime": 1320.0018, "eval_samples_per_second": 17.685, "eval_steps_per_second": 0.553, "step": 102 }, { "epoch": 7.09, "learning_rate": 0.00024131421435130807, "loss": 1.0053, "step": 105 }, { "epoch": 7.29, "learning_rate": 0.00023603646545265687, "loss": 1.006, "step": 110 }, { "epoch": 7.5, "learning_rate": 0.00023059494125202357, "loss": 1.0047, "step": 115 }, { "epoch": 7.58, "eval_loss": 1.0181236267089844, "eval_runtime": 1318.7669, "eval_samples_per_second": 17.701, "eval_steps_per_second": 0.554, "step": 117 }, { "epoch": 8.1, "learning_rate": 0.000225, "loss": 1.0, "step": 120 }, { "epoch": 8.31, "learning_rate": 0.0002192622919852551, "loss": 0.9948, "step": 125 }, { "epoch": 8.51, "learning_rate": 0.0002133927392611049, "loss": 1.004, "step": 130 }, { "epoch": 8.59, "eval_loss": 1.0159988403320312, "eval_runtime": 1320.6395, "eval_samples_per_second": 17.676, "eval_steps_per_second": 0.553, "step": 132 }, { "epoch": 9.12, "learning_rate": 0.00020740251485476345, "loss": 0.9934, "step": 135 }, { "epoch": 9.32, "learning_rate": 0.00020130302149885031, "loss": 0.9901, "step": 140 }, { "epoch": 9.52, "learning_rate": 0.00019510586992564093, "loss": 1.0007, "step": 145 }, { "epoch": 9.57, "eval_loss": 1.01445734500885, "eval_runtime": 1320.1156, "eval_samples_per_second": 17.683, "eval_steps_per_second": 0.553, "step": 146 }, { "epoch": 10.13, "learning_rate": 0.0001888228567653781, "loss": 0.989, "step": 150 }, { "epoch": 10.33, "learning_rate": 0.0001824659420907154, "loss": 0.9889, "step": 155 }, { "epoch": 10.54, "learning_rate": 0.00017604722665003956, "loss": 0.9938, "step": 160 }, { "epoch": 10.58, "eval_loss": 1.0132454633712769, "eval_runtime": 1321.3534, "eval_samples_per_second": 17.667, "eval_steps_per_second": 0.552, "step": 161 }, { "epoch": 11.14, "learning_rate": 0.00016957892883300775, "loss": 0.9875, "step": 165 }, { "epoch": 11.35, "learning_rate": 0.00016307336141214873, "loss": 0.9864, "step": 170 }, { "epoch": 11.55, "learning_rate": 0.00015654290810480042, "loss": 0.9916, "step": 175 }, { "epoch": 11.59, "eval_loss": 1.012216567993164, "eval_runtime": 1320.9896, "eval_samples_per_second": 17.672, "eval_steps_per_second": 0.553, "step": 176 }, { "epoch": 12.15, "learning_rate": 0.00015, "loss": 0.9837, "step": 180 }, { "epoch": 12.36, "learning_rate": 0.0001434570918951996, "loss": 0.984, "step": 185 }, { "epoch": 12.56, "learning_rate": 0.00013692663858785124, "loss": 0.9884, "step": 190 }, { "epoch": 12.56, "eval_loss": 1.0114786624908447, "eval_runtime": 1322.1026, "eval_samples_per_second": 17.657, "eval_steps_per_second": 0.552, "step": 190 }, { "epoch": 13.17, "learning_rate": 0.00013042107116699228, "loss": 0.981, "step": 195 }, { "epoch": 13.37, "learning_rate": 0.00012395277334996044, "loss": 0.9821, "step": 200 }, { "epoch": 13.58, "learning_rate": 0.00011753405790928456, "loss": 0.9881, "step": 205 }, { "epoch": 13.58, "eval_loss": 1.0108779668807983, "eval_runtime": 1322.1583, "eval_samples_per_second": 17.656, "eval_steps_per_second": 0.552, "step": 205 }, { "epoch": 14.18, "learning_rate": 0.00011117714323462186, "loss": 0.9778, "step": 210 }, { "epoch": 14.38, "learning_rate": 0.00010489413007435904, "loss": 0.982, "step": 215 }, { "epoch": 14.59, "learning_rate": 9.869697850114969e-05, "loss": 0.9856, "step": 220 }, { "epoch": 14.59, "eval_loss": 1.010445237159729, "eval_runtime": 1322.8082, "eval_samples_per_second": 17.647, "eval_steps_per_second": 0.552, "step": 220 }, { "epoch": 14.59, "step": 220, "total_flos": 6.664936558166016e+16, "train_loss": 1.0388530253009363, "train_runtime": 47681.956, "train_samples_per_second": 3.929, "train_steps_per_second": 0.008 } ], "logging_steps": 5, "max_steps": 360, "num_train_epochs": 15, "save_steps": 500, "total_flos": 6.664936558166016e+16, "trial_name": null, "trial_params": null }