|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 14.588235294117647, |
|
"eval_steps": 500, |
|
"global_step": 220, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00029999428845962564, |
|
"loss": 1.4179, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00029985723323727866, |
|
"loss": 1.3836, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0002994292047137618, |
|
"loss": 1.2291, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 1.1107903718948364, |
|
"eval_runtime": 1318.1616, |
|
"eval_samples_per_second": 17.71, |
|
"eval_steps_per_second": 0.554, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00029871672920607153, |
|
"loss": 1.171, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.0002977211629518312, |
|
"loss": 1.1376, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.00029644440106799, |
|
"loss": 1.1237, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_loss": 1.0651090145111084, |
|
"eval_runtime": 1320.1229, |
|
"eval_samples_per_second": 17.683, |
|
"eval_steps_per_second": 0.553, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 0.0002948888739433602, |
|
"loss": 1.1078, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 0.000293057542612234, |
|
"loss": 1.0947, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 0.0002909538931178862, |
|
"loss": 1.0918, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"eval_loss": 1.0472216606140137, |
|
"eval_runtime": 1319.747, |
|
"eval_samples_per_second": 17.688, |
|
"eval_steps_per_second": 0.553, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 0.000288581929876693, |
|
"loss": 1.0832, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 0.0002859461680554975, |
|
"loss": 1.0705, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 0.0002830516249767332, |
|
"loss": 1.0711, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"eval_loss": 1.0370612144470215, |
|
"eval_runtime": 1319.0792, |
|
"eval_samples_per_second": 17.697, |
|
"eval_steps_per_second": 0.553, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 0.0002799038105676658, |
|
"loss": 1.0589, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 0.0002765087168719328, |
|
"loss": 1.0489, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 0.00027287280664334875, |
|
"loss": 1.0498, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"eval_loss": 1.0298787355422974, |
|
"eval_runtime": 1319.3478, |
|
"eval_samples_per_second": 17.694, |
|
"eval_steps_per_second": 0.553, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 0.00026900300104368524, |
|
"loss": 1.0358, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 0.00026490666646784665, |
|
"loss": 1.0261, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 0.0002605916005215186, |
|
"loss": 1.0255, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"eval_loss": 1.0246919393539429, |
|
"eval_runtime": 1320.0654, |
|
"eval_samples_per_second": 17.684, |
|
"eval_steps_per_second": 0.553, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 0.00025606601717798207, |
|
"loss": 1.0165, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"learning_rate": 0.00025133853114234905, |
|
"loss": 1.0119, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"learning_rate": 0.0002464181414529809, |
|
"loss": 1.0131, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"eval_loss": 1.0210436582565308, |
|
"eval_runtime": 1320.0018, |
|
"eval_samples_per_second": 17.685, |
|
"eval_steps_per_second": 0.553, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 0.00024131421435130807, |
|
"loss": 1.0053, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"learning_rate": 0.00023603646545265687, |
|
"loss": 1.006, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 0.00023059494125202357, |
|
"loss": 1.0047, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"eval_loss": 1.0181236267089844, |
|
"eval_runtime": 1318.7669, |
|
"eval_samples_per_second": 17.701, |
|
"eval_steps_per_second": 0.554, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 0.000225, |
|
"loss": 1.0, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"learning_rate": 0.0002192622919852551, |
|
"loss": 0.9948, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 0.0002133927392611049, |
|
"loss": 1.004, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"eval_loss": 1.0159988403320312, |
|
"eval_runtime": 1320.6395, |
|
"eval_samples_per_second": 17.676, |
|
"eval_steps_per_second": 0.553, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"learning_rate": 0.00020740251485476345, |
|
"loss": 0.9934, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 0.00020130302149885031, |
|
"loss": 0.9901, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 0.00019510586992564093, |
|
"loss": 1.0007, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"eval_loss": 1.01445734500885, |
|
"eval_runtime": 1320.1156, |
|
"eval_samples_per_second": 17.683, |
|
"eval_steps_per_second": 0.553, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 10.13, |
|
"learning_rate": 0.0001888228567653781, |
|
"loss": 0.989, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 10.33, |
|
"learning_rate": 0.0001824659420907154, |
|
"loss": 0.9889, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 10.54, |
|
"learning_rate": 0.00017604722665003956, |
|
"loss": 0.9938, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 10.58, |
|
"eval_loss": 1.0132454633712769, |
|
"eval_runtime": 1321.3534, |
|
"eval_samples_per_second": 17.667, |
|
"eval_steps_per_second": 0.552, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 11.14, |
|
"learning_rate": 0.00016957892883300775, |
|
"loss": 0.9875, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 11.35, |
|
"learning_rate": 0.00016307336141214873, |
|
"loss": 0.9864, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 11.55, |
|
"learning_rate": 0.00015654290810480042, |
|
"loss": 0.9916, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 11.59, |
|
"eval_loss": 1.012216567993164, |
|
"eval_runtime": 1320.9896, |
|
"eval_samples_per_second": 17.672, |
|
"eval_steps_per_second": 0.553, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 12.15, |
|
"learning_rate": 0.00015, |
|
"loss": 0.9837, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 12.36, |
|
"learning_rate": 0.0001434570918951996, |
|
"loss": 0.984, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 12.56, |
|
"learning_rate": 0.00013692663858785124, |
|
"loss": 0.9884, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 12.56, |
|
"eval_loss": 1.0114786624908447, |
|
"eval_runtime": 1322.1026, |
|
"eval_samples_per_second": 17.657, |
|
"eval_steps_per_second": 0.552, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 13.17, |
|
"learning_rate": 0.00013042107116699228, |
|
"loss": 0.981, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 13.37, |
|
"learning_rate": 0.00012395277334996044, |
|
"loss": 0.9821, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 13.58, |
|
"learning_rate": 0.00011753405790928456, |
|
"loss": 0.9881, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 13.58, |
|
"eval_loss": 1.0108779668807983, |
|
"eval_runtime": 1322.1583, |
|
"eval_samples_per_second": 17.656, |
|
"eval_steps_per_second": 0.552, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 14.18, |
|
"learning_rate": 0.00011117714323462186, |
|
"loss": 0.9778, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 14.38, |
|
"learning_rate": 0.00010489413007435904, |
|
"loss": 0.982, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 14.59, |
|
"learning_rate": 9.869697850114969e-05, |
|
"loss": 0.9856, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 14.59, |
|
"eval_loss": 1.010445237159729, |
|
"eval_runtime": 1322.8082, |
|
"eval_samples_per_second": 17.647, |
|
"eval_steps_per_second": 0.552, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 14.59, |
|
"step": 220, |
|
"total_flos": 6.664936558166016e+16, |
|
"train_loss": 1.0388530253009363, |
|
"train_runtime": 47681.956, |
|
"train_samples_per_second": 3.929, |
|
"train_steps_per_second": 0.008 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 360, |
|
"num_train_epochs": 15, |
|
"save_steps": 500, |
|
"total_flos": 6.664936558166016e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|