diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,5619 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9064422143088378, + "eval_steps": 100, + "global_step": 700, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": "2.5641e-07", + "loss": 0.5987, + "slid_loss": 0.5987, + "step": 1, + "time": 416.96 + }, + { + "epoch": 0.0, + "learning_rate": "5.1282e-07", + "loss": 0.6236, + "slid_loss": 0.6112, + "step": 2, + "time": 308.68 + }, + { + "epoch": 0.0, + "learning_rate": "7.6923e-07", + "loss": 0.61, + "slid_loss": 0.6108, + "step": 3, + "time": 303.89 + }, + { + "epoch": 0.01, + "learning_rate": "1.0256e-06", + "loss": 0.6193, + "slid_loss": 0.6129, + "step": 4, + "time": 300.55 + }, + { + "epoch": 0.01, + "learning_rate": "1.2821e-06", + "loss": 0.62, + "slid_loss": 0.6143, + "step": 5, + "time": 301.85 + }, + { + "epoch": 0.01, + "learning_rate": "1.5385e-06", + "loss": 0.6036, + "slid_loss": 0.6125, + "step": 6, + "time": 303.95 + }, + { + "epoch": 0.01, + "learning_rate": "1.7949e-06", + "loss": 0.6116, + "slid_loss": 0.6124, + "step": 7, + "time": 301.83 + }, + { + "epoch": 0.01, + "learning_rate": "2.0513e-06", + "loss": 0.5796, + "slid_loss": 0.6083, + "step": 8, + "time": 304.86 + }, + { + "epoch": 0.01, + "learning_rate": "2.3077e-06", + "loss": 0.6255, + "slid_loss": 0.6102, + "step": 9, + "time": 299.19 + }, + { + "epoch": 0.01, + "learning_rate": "2.5641e-06", + "loss": 0.6125, + "slid_loss": 0.6104, + "step": 10, + "time": 303.97 + }, + { + "epoch": 0.01, + "learning_rate": "2.8205e-06", + "loss": 0.6187, + "slid_loss": 0.6112, + "step": 11, + "time": 299.93 + }, + { + "epoch": 0.02, + "learning_rate": "3.0769e-06", + "loss": 0.6229, + "slid_loss": 0.6122, + "step": 12, + "time": 299.47 + }, + { + "epoch": 0.02, + "learning_rate": "3.3333e-06", + "loss": 0.6, + "slid_loss": 0.6112, + "step": 13, + "time": 297.05 + }, + { + "epoch": 0.02, + "learning_rate": "3.5897e-06", + "loss": 0.6079, + "slid_loss": 0.611, + "step": 14, + "time": 300.11 + }, + { + "epoch": 0.02, + "learning_rate": "3.8462e-06", + "loss": 0.5676, + "slid_loss": 0.6081, + "step": 15, + "time": 301.79 + }, + { + "epoch": 0.02, + "learning_rate": "4.1026e-06", + "loss": 0.5754, + "slid_loss": 0.606, + "step": 16, + "time": 294.41 + }, + { + "epoch": 0.02, + "learning_rate": "4.3590e-06", + "loss": 0.5977, + "slid_loss": 0.6056, + "step": 17, + "time": 299.96 + }, + { + "epoch": 0.02, + "learning_rate": "4.6154e-06", + "loss": 0.5934, + "slid_loss": 0.6049, + "step": 18, + "time": 300.94 + }, + { + "epoch": 0.02, + "learning_rate": "4.8718e-06", + "loss": 0.6201, + "slid_loss": 0.6057, + "step": 19, + "time": 299.48 + }, + { + "epoch": 0.03, + "learning_rate": "5.1282e-06", + "loss": 0.6358, + "slid_loss": 0.6072, + "step": 20, + "time": 303.69 + }, + { + "epoch": 0.03, + "learning_rate": "5.3846e-06", + "loss": 0.6163, + "slid_loss": 0.6076, + "step": 21, + "time": 302.21 + }, + { + "epoch": 0.03, + "learning_rate": "5.6410e-06", + "loss": 0.5778, + "slid_loss": 0.6063, + "step": 22, + "time": 299.39 + }, + { + "epoch": 0.03, + "learning_rate": "5.8974e-06", + "loss": 0.6006, + "slid_loss": 0.606, + "step": 23, + "time": 303.27 + }, + { + "epoch": 0.03, + "learning_rate": "6.1538e-06", + "loss": 0.5979, + "slid_loss": 0.6057, + "step": 24, + "time": 301.63 + }, + { + "epoch": 0.03, + "learning_rate": "6.4103e-06", + "loss": 0.5862, + "slid_loss": 0.6049, + "step": 25, + "time": 298.76 + }, + { + "epoch": 0.03, + "learning_rate": "6.6667e-06", + "loss": 0.5932, + "slid_loss": 0.6045, + "step": 26, + "time": 301.27 + }, + { + "epoch": 0.03, + "learning_rate": "6.9231e-06", + "loss": 0.6079, + "slid_loss": 0.6046, + "step": 27, + "time": 301.95 + }, + { + "epoch": 0.04, + "learning_rate": "7.1795e-06", + "loss": 0.6066, + "slid_loss": 0.6047, + "step": 28, + "time": 306.01 + }, + { + "epoch": 0.04, + "learning_rate": "7.4359e-06", + "loss": 0.6065, + "slid_loss": 0.6047, + "step": 29, + "time": 305.69 + }, + { + "epoch": 0.04, + "learning_rate": "7.6923e-06", + "loss": 0.6172, + "slid_loss": 0.6051, + "step": 30, + "time": 307.86 + }, + { + "epoch": 0.04, + "learning_rate": "7.9487e-06", + "loss": 0.5885, + "slid_loss": 0.6046, + "step": 31, + "time": 302.34 + }, + { + "epoch": 0.04, + "learning_rate": "8.2051e-06", + "loss": 0.616, + "slid_loss": 0.605, + "step": 32, + "time": 297.77 + }, + { + "epoch": 0.04, + "learning_rate": "8.4615e-06", + "loss": 0.5881, + "slid_loss": 0.6044, + "step": 33, + "time": 303.91 + }, + { + "epoch": 0.04, + "learning_rate": "8.7179e-06", + "loss": 0.6276, + "slid_loss": 0.6051, + "step": 34, + "time": 300.64 + }, + { + "epoch": 0.05, + "learning_rate": "8.9744e-06", + "loss": 0.6095, + "slid_loss": 0.6052, + "step": 35, + "time": 293.11 + }, + { + "epoch": 0.05, + "learning_rate": "9.2308e-06", + "loss": 0.6124, + "slid_loss": 0.6054, + "step": 36, + "time": 302.12 + }, + { + "epoch": 0.05, + "learning_rate": "9.4872e-06", + "loss": 0.5959, + "slid_loss": 0.6052, + "step": 37, + "time": 300.08 + }, + { + "epoch": 0.05, + "learning_rate": "9.7436e-06", + "loss": 0.6022, + "slid_loss": 0.6051, + "step": 38, + "time": 301.67 + }, + { + "epoch": 0.05, + "learning_rate": "1.0000e-05", + "loss": 0.5919, + "slid_loss": 0.6048, + "step": 39, + "time": 300.9 + }, + { + "epoch": 0.05, + "learning_rate": "1.0000e-05", + "loss": 0.5992, + "slid_loss": 0.6046, + "step": 40, + "time": 303.93 + }, + { + "epoch": 0.05, + "learning_rate": "1.0000e-05", + "loss": 0.581, + "slid_loss": 0.6041, + "step": 41, + "time": 303.43 + }, + { + "epoch": 0.05, + "learning_rate": "1.0000e-05", + "loss": 0.5893, + "slid_loss": 0.6037, + "step": 42, + "time": 296.02 + }, + { + "epoch": 0.06, + "learning_rate": "1.0000e-05", + "loss": 0.6011, + "slid_loss": 0.6036, + "step": 43, + "time": 296.37 + }, + { + "epoch": 0.06, + "learning_rate": "1.0000e-05", + "loss": 0.619, + "slid_loss": 0.604, + "step": 44, + "time": 332.28 + }, + { + "epoch": 0.06, + "learning_rate": "1.0000e-05", + "loss": 0.5861, + "slid_loss": 0.6036, + "step": 45, + "time": 299.29 + }, + { + "epoch": 0.06, + "learning_rate": "1.0000e-05", + "loss": 0.5954, + "slid_loss": 0.6034, + "step": 46, + "time": 303.55 + }, + { + "epoch": 0.06, + "learning_rate": "9.9999e-06", + "loss": 0.6072, + "slid_loss": 0.6035, + "step": 47, + "time": 304.38 + }, + { + "epoch": 0.06, + "learning_rate": "9.9999e-06", + "loss": 0.5787, + "slid_loss": 0.603, + "step": 48, + "time": 304.24 + }, + { + "epoch": 0.06, + "learning_rate": "9.9999e-06", + "loss": 0.5957, + "slid_loss": 0.6028, + "step": 49, + "time": 298.5 + }, + { + "epoch": 0.06, + "learning_rate": "9.9999e-06", + "loss": 0.6328, + "slid_loss": 0.6034, + "step": 50, + "time": 303.53 + }, + { + "epoch": 0.07, + "learning_rate": "9.9999e-06", + "loss": 0.581, + "slid_loss": 0.603, + "step": 51, + "time": 302.2 + }, + { + "epoch": 0.07, + "learning_rate": "9.9999e-06", + "loss": 0.6062, + "slid_loss": 0.6031, + "step": 52, + "time": 301.68 + }, + { + "epoch": 0.07, + "learning_rate": "9.9998e-06", + "loss": 0.6003, + "slid_loss": 0.603, + "step": 53, + "time": 296.0 + }, + { + "epoch": 0.07, + "learning_rate": "9.9998e-06", + "loss": 0.6157, + "slid_loss": 0.6032, + "step": 54, + "time": 299.54 + }, + { + "epoch": 0.07, + "learning_rate": "9.9998e-06", + "loss": 0.604, + "slid_loss": 0.6032, + "step": 55, + "time": 299.1 + }, + { + "epoch": 0.07, + "learning_rate": "9.9998e-06", + "loss": 0.5752, + "slid_loss": 0.6027, + "step": 56, + "time": 294.14 + }, + { + "epoch": 0.07, + "learning_rate": "9.9997e-06", + "loss": 0.5931, + "slid_loss": 0.6026, + "step": 57, + "time": 381.96 + }, + { + "epoch": 0.08, + "learning_rate": "9.9997e-06", + "loss": 0.5937, + "slid_loss": 0.6024, + "step": 58, + "time": 293.92 + }, + { + "epoch": 0.08, + "learning_rate": "9.9997e-06", + "loss": 0.6232, + "slid_loss": 0.6028, + "step": 59, + "time": 304.51 + }, + { + "epoch": 0.08, + "learning_rate": "9.9996e-06", + "loss": 0.6031, + "slid_loss": 0.6028, + "step": 60, + "time": 306.02 + }, + { + "epoch": 0.08, + "learning_rate": "9.9996e-06", + "loss": 0.601, + "slid_loss": 0.6028, + "step": 61, + "time": 301.96 + }, + { + "epoch": 0.08, + "learning_rate": "9.9996e-06", + "loss": 0.6268, + "slid_loss": 0.6031, + "step": 62, + "time": 302.5 + }, + { + "epoch": 0.08, + "learning_rate": "9.9995e-06", + "loss": 0.6078, + "slid_loss": 0.6032, + "step": 63, + "time": 302.51 + }, + { + "epoch": 0.08, + "learning_rate": "9.9995e-06", + "loss": 0.6252, + "slid_loss": 0.6036, + "step": 64, + "time": 297.93 + }, + { + "epoch": 0.08, + "learning_rate": "9.9994e-06", + "loss": 0.6007, + "slid_loss": 0.6035, + "step": 65, + "time": 302.92 + }, + { + "epoch": 0.09, + "learning_rate": "9.9994e-06", + "loss": 0.5985, + "slid_loss": 0.6034, + "step": 66, + "time": 298.35 + }, + { + "epoch": 0.09, + "learning_rate": "9.9993e-06", + "loss": 0.5966, + "slid_loss": 0.6033, + "step": 67, + "time": 300.99 + }, + { + "epoch": 0.09, + "learning_rate": "9.9993e-06", + "loss": 0.5678, + "slid_loss": 0.6028, + "step": 68, + "time": 296.29 + }, + { + "epoch": 0.09, + "learning_rate": "9.9992e-06", + "loss": 0.5818, + "slid_loss": 0.6025, + "step": 69, + "time": 294.95 + }, + { + "epoch": 0.09, + "learning_rate": "9.9992e-06", + "loss": 0.6138, + "slid_loss": 0.6027, + "step": 70, + "time": 298.16 + }, + { + "epoch": 0.09, + "learning_rate": "9.9991e-06", + "loss": 0.6071, + "slid_loss": 0.6027, + "step": 71, + "time": 302.87 + }, + { + "epoch": 0.09, + "learning_rate": "9.9991e-06", + "loss": 0.6126, + "slid_loss": 0.6029, + "step": 72, + "time": 302.98 + }, + { + "epoch": 0.09, + "learning_rate": "9.9990e-06", + "loss": 0.5945, + "slid_loss": 0.6028, + "step": 73, + "time": 429.67 + }, + { + "epoch": 0.1, + "learning_rate": "9.9990e-06", + "loss": 0.5973, + "slid_loss": 0.6027, + "step": 74, + "time": 303.2 + }, + { + "epoch": 0.1, + "learning_rate": "9.9989e-06", + "loss": 0.6045, + "slid_loss": 0.6027, + "step": 75, + "time": 298.66 + }, + { + "epoch": 0.1, + "learning_rate": "9.9988e-06", + "loss": 0.5884, + "slid_loss": 0.6025, + "step": 76, + "time": 305.59 + }, + { + "epoch": 0.1, + "learning_rate": "9.9988e-06", + "loss": 0.6009, + "slid_loss": 0.6025, + "step": 77, + "time": 300.33 + }, + { + "epoch": 0.1, + "learning_rate": "9.9987e-06", + "loss": 0.6005, + "slid_loss": 0.6025, + "step": 78, + "time": 300.6 + }, + { + "epoch": 0.1, + "learning_rate": "9.9986e-06", + "loss": 0.5811, + "slid_loss": 0.6022, + "step": 79, + "time": 304.71 + }, + { + "epoch": 0.1, + "learning_rate": "9.9986e-06", + "loss": 0.5954, + "slid_loss": 0.6021, + "step": 80, + "time": 295.84 + }, + { + "epoch": 0.1, + "learning_rate": "9.9985e-06", + "loss": 0.5864, + "slid_loss": 0.6019, + "step": 81, + "time": 307.04 + }, + { + "epoch": 0.11, + "learning_rate": "9.9984e-06", + "loss": 0.6053, + "slid_loss": 0.602, + "step": 82, + "time": 301.52 + }, + { + "epoch": 0.11, + "learning_rate": "9.9984e-06", + "loss": 0.5725, + "slid_loss": 0.6016, + "step": 83, + "time": 297.7 + }, + { + "epoch": 0.11, + "learning_rate": "9.9983e-06", + "loss": 0.6035, + "slid_loss": 0.6016, + "step": 84, + "time": 304.19 + }, + { + "epoch": 0.11, + "learning_rate": "9.9982e-06", + "loss": 0.6073, + "slid_loss": 0.6017, + "step": 85, + "time": 338.5 + }, + { + "epoch": 0.11, + "learning_rate": "9.9981e-06", + "loss": 0.6044, + "slid_loss": 0.6017, + "step": 86, + "time": 296.47 + }, + { + "epoch": 0.11, + "learning_rate": "9.9981e-06", + "loss": 0.6035, + "slid_loss": 0.6017, + "step": 87, + "time": 300.44 + }, + { + "epoch": 0.11, + "learning_rate": "9.9980e-06", + "loss": 0.6143, + "slid_loss": 0.6019, + "step": 88, + "time": 299.11 + }, + { + "epoch": 0.12, + "learning_rate": "9.9979e-06", + "loss": 0.6097, + "slid_loss": 0.602, + "step": 89, + "time": 299.75 + }, + { + "epoch": 0.12, + "learning_rate": "9.9978e-06", + "loss": 0.5806, + "slid_loss": 0.6017, + "step": 90, + "time": 301.43 + }, + { + "epoch": 0.12, + "learning_rate": "9.9977e-06", + "loss": 0.5961, + "slid_loss": 0.6017, + "step": 91, + "time": 304.29 + }, + { + "epoch": 0.12, + "learning_rate": "9.9976e-06", + "loss": 0.6096, + "slid_loss": 0.6018, + "step": 92, + "time": 303.86 + }, + { + "epoch": 0.12, + "learning_rate": "9.9975e-06", + "loss": 0.625, + "slid_loss": 0.602, + "step": 93, + "time": 302.51 + }, + { + "epoch": 0.12, + "learning_rate": "9.9974e-06", + "loss": 0.5965, + "slid_loss": 0.602, + "step": 94, + "time": 301.0 + }, + { + "epoch": 0.12, + "learning_rate": "9.9974e-06", + "loss": 0.6001, + "slid_loss": 0.6019, + "step": 95, + "time": 299.4 + }, + { + "epoch": 0.12, + "learning_rate": "9.9973e-06", + "loss": 0.6154, + "slid_loss": 0.6021, + "step": 96, + "time": 301.42 + }, + { + "epoch": 0.13, + "learning_rate": "9.9972e-06", + "loss": 0.6055, + "slid_loss": 0.6021, + "step": 97, + "time": 304.69 + }, + { + "epoch": 0.13, + "learning_rate": "9.9971e-06", + "loss": 0.5986, + "slid_loss": 0.6021, + "step": 98, + "time": 301.12 + }, + { + "epoch": 0.13, + "learning_rate": "9.9970e-06", + "loss": 0.6033, + "slid_loss": 0.6021, + "step": 99, + "time": 302.85 + }, + { + "epoch": 0.13, + "learning_rate": "9.9969e-06", + "loss": 0.5926, + "slid_loss": 0.602, + "step": 100, + "time": 298.85 + }, + { + "epoch": 0.13, + "learning_rate": "9.9968e-06", + "loss": 0.5972, + "slid_loss": 0.602, + "step": 101, + "time": 2115.7 + }, + { + "epoch": 0.13, + "learning_rate": "9.9966e-06", + "loss": 0.6335, + "slid_loss": 0.6021, + "step": 102, + "time": 297.71 + }, + { + "epoch": 0.13, + "learning_rate": "9.9965e-06", + "loss": 0.6178, + "slid_loss": 0.6022, + "step": 103, + "time": 296.19 + }, + { + "epoch": 0.13, + "learning_rate": "9.9964e-06", + "loss": 0.5811, + "slid_loss": 0.6018, + "step": 104, + "time": 297.39 + }, + { + "epoch": 0.14, + "learning_rate": "9.9963e-06", + "loss": 0.6264, + "slid_loss": 0.6018, + "step": 105, + "time": 302.43 + }, + { + "epoch": 0.14, + "learning_rate": "9.9962e-06", + "loss": 0.5883, + "slid_loss": 0.6017, + "step": 106, + "time": 297.74 + }, + { + "epoch": 0.14, + "learning_rate": "9.9961e-06", + "loss": 0.5763, + "slid_loss": 0.6013, + "step": 107, + "time": 295.11 + }, + { + "epoch": 0.14, + "learning_rate": "9.9960e-06", + "loss": 0.5981, + "slid_loss": 0.6015, + "step": 108, + "time": 297.78 + }, + { + "epoch": 0.14, + "learning_rate": "9.9959e-06", + "loss": 0.5965, + "slid_loss": 0.6012, + "step": 109, + "time": 303.32 + }, + { + "epoch": 0.14, + "learning_rate": "9.9957e-06", + "loss": 0.5904, + "slid_loss": 0.601, + "step": 110, + "time": 305.55 + }, + { + "epoch": 0.14, + "learning_rate": "9.9956e-06", + "loss": 0.6257, + "slid_loss": 0.6011, + "step": 111, + "time": 309.15 + }, + { + "epoch": 0.15, + "learning_rate": "9.9955e-06", + "loss": 0.596, + "slid_loss": 0.6008, + "step": 112, + "time": 295.49 + }, + { + "epoch": 0.15, + "learning_rate": "9.9954e-06", + "loss": 0.6059, + "slid_loss": 0.6009, + "step": 113, + "time": 297.05 + }, + { + "epoch": 0.15, + "learning_rate": "9.9952e-06", + "loss": 0.5955, + "slid_loss": 0.6007, + "step": 114, + "time": 298.43 + }, + { + "epoch": 0.15, + "learning_rate": "9.9951e-06", + "loss": 0.5853, + "slid_loss": 0.6009, + "step": 115, + "time": 298.88 + }, + { + "epoch": 0.15, + "learning_rate": "9.9950e-06", + "loss": 0.5772, + "slid_loss": 0.6009, + "step": 116, + "time": 303.47 + }, + { + "epoch": 0.15, + "learning_rate": "9.9949e-06", + "loss": 0.6111, + "slid_loss": 0.6011, + "step": 117, + "time": 297.58 + }, + { + "epoch": 0.15, + "learning_rate": "9.9947e-06", + "loss": 0.5853, + "slid_loss": 0.601, + "step": 118, + "time": 303.99 + }, + { + "epoch": 0.15, + "learning_rate": "9.9946e-06", + "loss": 0.5996, + "slid_loss": 0.6008, + "step": 119, + "time": 298.4 + }, + { + "epoch": 0.16, + "learning_rate": "9.9945e-06", + "loss": 0.6205, + "slid_loss": 0.6006, + "step": 120, + "time": 344.13 + }, + { + "epoch": 0.16, + "learning_rate": "9.9943e-06", + "loss": 0.5966, + "slid_loss": 0.6004, + "step": 121, + "time": 634.45 + }, + { + "epoch": 0.16, + "learning_rate": "9.9942e-06", + "loss": 0.5996, + "slid_loss": 0.6007, + "step": 122, + "time": 298.46 + }, + { + "epoch": 0.16, + "learning_rate": "9.9940e-06", + "loss": 0.6048, + "slid_loss": 0.6007, + "step": 123, + "time": 305.47 + }, + { + "epoch": 0.16, + "learning_rate": "9.9939e-06", + "loss": 0.6087, + "slid_loss": 0.6008, + "step": 124, + "time": 298.35 + }, + { + "epoch": 0.16, + "learning_rate": "9.9938e-06", + "loss": 0.5717, + "slid_loss": 0.6007, + "step": 125, + "time": 304.29 + }, + { + "epoch": 0.16, + "learning_rate": "9.9936e-06", + "loss": 0.587, + "slid_loss": 0.6006, + "step": 126, + "time": 299.88 + }, + { + "epoch": 0.16, + "learning_rate": "9.9935e-06", + "loss": 0.5759, + "slid_loss": 0.6003, + "step": 127, + "time": 298.54 + }, + { + "epoch": 0.17, + "learning_rate": "9.9933e-06", + "loss": 0.5657, + "slid_loss": 0.5999, + "step": 128, + "time": 297.77 + }, + { + "epoch": 0.17, + "learning_rate": "9.9932e-06", + "loss": 0.6105, + "slid_loss": 0.5999, + "step": 129, + "time": 295.44 + }, + { + "epoch": 0.17, + "learning_rate": "9.9930e-06", + "loss": 0.5847, + "slid_loss": 0.5996, + "step": 130, + "time": 304.95 + }, + { + "epoch": 0.17, + "learning_rate": "9.9929e-06", + "loss": 0.58, + "slid_loss": 0.5995, + "step": 131, + "time": 301.05 + }, + { + "epoch": 0.17, + "learning_rate": "9.9927e-06", + "loss": 0.5847, + "slid_loss": 0.5992, + "step": 132, + "time": 302.39 + }, + { + "epoch": 0.17, + "learning_rate": "9.9925e-06", + "loss": 0.6222, + "slid_loss": 0.5995, + "step": 133, + "time": 306.34 + }, + { + "epoch": 0.17, + "learning_rate": "9.9924e-06", + "loss": 0.6114, + "slid_loss": 0.5994, + "step": 134, + "time": 299.73 + }, + { + "epoch": 0.17, + "learning_rate": "9.9922e-06", + "loss": 0.5823, + "slid_loss": 0.5991, + "step": 135, + "time": 340.32 + }, + { + "epoch": 0.18, + "learning_rate": "9.9921e-06", + "loss": 0.5989, + "slid_loss": 0.599, + "step": 136, + "time": 302.77 + }, + { + "epoch": 0.18, + "learning_rate": "9.9919e-06", + "loss": 0.5885, + "slid_loss": 0.5989, + "step": 137, + "time": 300.49 + }, + { + "epoch": 0.18, + "learning_rate": "9.9917e-06", + "loss": 0.6046, + "slid_loss": 0.5989, + "step": 138, + "time": 300.88 + }, + { + "epoch": 0.18, + "learning_rate": "9.9916e-06", + "loss": 0.6067, + "slid_loss": 0.5991, + "step": 139, + "time": 302.46 + }, + { + "epoch": 0.18, + "learning_rate": "9.9914e-06", + "loss": 0.5841, + "slid_loss": 0.5989, + "step": 140, + "time": 299.23 + }, + { + "epoch": 0.18, + "learning_rate": "9.9912e-06", + "loss": 0.5965, + "slid_loss": 0.5991, + "step": 141, + "time": 304.79 + }, + { + "epoch": 0.18, + "learning_rate": "9.9910e-06", + "loss": 0.6078, + "slid_loss": 0.5992, + "step": 142, + "time": 303.42 + }, + { + "epoch": 0.19, + "learning_rate": "9.9909e-06", + "loss": 0.5876, + "slid_loss": 0.5991, + "step": 143, + "time": 300.34 + }, + { + "epoch": 0.19, + "learning_rate": "9.9907e-06", + "loss": 0.5699, + "slid_loss": 0.5986, + "step": 144, + "time": 301.76 + }, + { + "epoch": 0.19, + "learning_rate": "9.9905e-06", + "loss": 0.594, + "slid_loss": 0.5987, + "step": 145, + "time": 301.5 + }, + { + "epoch": 0.19, + "learning_rate": "9.9903e-06", + "loss": 0.5813, + "slid_loss": 0.5986, + "step": 146, + "time": 305.53 + }, + { + "epoch": 0.19, + "learning_rate": "9.9902e-06", + "loss": 0.6037, + "slid_loss": 0.5985, + "step": 147, + "time": 299.21 + }, + { + "epoch": 0.19, + "learning_rate": "9.9900e-06", + "loss": 0.6152, + "slid_loss": 0.5989, + "step": 148, + "time": 302.99 + }, + { + "epoch": 0.19, + "learning_rate": "9.9898e-06", + "loss": 0.5807, + "slid_loss": 0.5987, + "step": 149, + "time": 300.63 + }, + { + "epoch": 0.19, + "learning_rate": "9.9896e-06", + "loss": 0.6118, + "slid_loss": 0.5985, + "step": 150, + "time": 300.98 + }, + { + "epoch": 0.2, + "learning_rate": "9.9894e-06", + "loss": 0.6146, + "slid_loss": 0.5989, + "step": 151, + "time": 302.41 + }, + { + "epoch": 0.2, + "learning_rate": "9.9892e-06", + "loss": 0.5905, + "slid_loss": 0.5987, + "step": 152, + "time": 302.49 + }, + { + "epoch": 0.2, + "learning_rate": "9.9890e-06", + "loss": 0.5948, + "slid_loss": 0.5987, + "step": 153, + "time": 298.31 + }, + { + "epoch": 0.2, + "learning_rate": "9.9888e-06", + "loss": 0.5909, + "slid_loss": 0.5984, + "step": 154, + "time": 303.58 + }, + { + "epoch": 0.2, + "learning_rate": "9.9886e-06", + "loss": 0.5935, + "slid_loss": 0.5983, + "step": 155, + "time": 300.33 + }, + { + "epoch": 0.2, + "learning_rate": "9.9884e-06", + "loss": 0.5986, + "slid_loss": 0.5985, + "step": 156, + "time": 302.57 + }, + { + "epoch": 0.2, + "learning_rate": "9.9882e-06", + "loss": 0.5854, + "slid_loss": 0.5985, + "step": 157, + "time": 300.71 + }, + { + "epoch": 0.2, + "learning_rate": "9.9880e-06", + "loss": 0.5969, + "slid_loss": 0.5985, + "step": 158, + "time": 303.74 + }, + { + "epoch": 0.21, + "learning_rate": "9.9878e-06", + "loss": 0.5823, + "slid_loss": 0.5981, + "step": 159, + "time": 297.54 + }, + { + "epoch": 0.21, + "learning_rate": "9.9876e-06", + "loss": 0.6209, + "slid_loss": 0.5983, + "step": 160, + "time": 301.04 + }, + { + "epoch": 0.21, + "learning_rate": "9.9874e-06", + "loss": 0.6104, + "slid_loss": 0.5984, + "step": 161, + "time": 299.43 + }, + { + "epoch": 0.21, + "learning_rate": "9.9872e-06", + "loss": 0.6082, + "slid_loss": 0.5982, + "step": 162, + "time": 303.51 + }, + { + "epoch": 0.21, + "learning_rate": "9.9870e-06", + "loss": 0.594, + "slid_loss": 0.598, + "step": 163, + "time": 299.33 + }, + { + "epoch": 0.21, + "learning_rate": "9.9868e-06", + "loss": 0.5984, + "slid_loss": 0.5978, + "step": 164, + "time": 300.6 + }, + { + "epoch": 0.21, + "learning_rate": "9.9866e-06", + "loss": 0.5662, + "slid_loss": 0.5974, + "step": 165, + "time": 300.92 + }, + { + "epoch": 0.21, + "learning_rate": "9.9864e-06", + "loss": 0.5915, + "slid_loss": 0.5973, + "step": 166, + "time": 302.81 + }, + { + "epoch": 0.22, + "learning_rate": "9.9862e-06", + "loss": 0.5758, + "slid_loss": 0.5971, + "step": 167, + "time": 297.08 + }, + { + "epoch": 0.22, + "learning_rate": "9.9860e-06", + "loss": 0.5816, + "slid_loss": 0.5973, + "step": 168, + "time": 299.58 + }, + { + "epoch": 0.22, + "learning_rate": "9.9857e-06", + "loss": 0.6093, + "slid_loss": 0.5976, + "step": 169, + "time": 298.08 + }, + { + "epoch": 0.22, + "learning_rate": "9.9855e-06", + "loss": 0.5916, + "slid_loss": 0.5973, + "step": 170, + "time": 299.47 + }, + { + "epoch": 0.22, + "learning_rate": "9.9853e-06", + "loss": 0.6068, + "slid_loss": 0.5973, + "step": 171, + "time": 301.28 + }, + { + "epoch": 0.22, + "learning_rate": "9.9851e-06", + "loss": 0.5746, + "slid_loss": 0.5969, + "step": 172, + "time": 298.82 + }, + { + "epoch": 0.22, + "learning_rate": "9.9848e-06", + "loss": 0.5839, + "slid_loss": 0.5968, + "step": 173, + "time": 297.42 + }, + { + "epoch": 0.23, + "learning_rate": "9.9846e-06", + "loss": 0.606, + "slid_loss": 0.5969, + "step": 174, + "time": 300.27 + }, + { + "epoch": 0.23, + "learning_rate": "9.9844e-06", + "loss": 0.5745, + "slid_loss": 0.5966, + "step": 175, + "time": 299.0 + }, + { + "epoch": 0.23, + "learning_rate": "9.9842e-06", + "loss": 0.5781, + "slid_loss": 0.5965, + "step": 176, + "time": 301.89 + }, + { + "epoch": 0.23, + "learning_rate": "9.9839e-06", + "loss": 0.6075, + "slid_loss": 0.5966, + "step": 177, + "time": 302.45 + }, + { + "epoch": 0.23, + "learning_rate": "9.9837e-06", + "loss": 0.5993, + "slid_loss": 0.5966, + "step": 178, + "time": 303.55 + }, + { + "epoch": 0.23, + "learning_rate": "9.9835e-06", + "loss": 0.5941, + "slid_loss": 0.5967, + "step": 179, + "time": 300.89 + }, + { + "epoch": 0.23, + "learning_rate": "9.9832e-06", + "loss": 0.5923, + "slid_loss": 0.5967, + "step": 180, + "time": 296.88 + }, + { + "epoch": 0.23, + "learning_rate": "9.9830e-06", + "loss": 0.6073, + "slid_loss": 0.5969, + "step": 181, + "time": 297.73 + }, + { + "epoch": 0.24, + "learning_rate": "9.9827e-06", + "loss": 0.5899, + "slid_loss": 0.5967, + "step": 182, + "time": 304.98 + }, + { + "epoch": 0.24, + "learning_rate": "9.9825e-06", + "loss": 0.6225, + "slid_loss": 0.5972, + "step": 183, + "time": 299.1 + }, + { + "epoch": 0.24, + "learning_rate": "9.9823e-06", + "loss": 0.5727, + "slid_loss": 0.5969, + "step": 184, + "time": 299.82 + }, + { + "epoch": 0.24, + "learning_rate": "9.9820e-06", + "loss": 0.5644, + "slid_loss": 0.5965, + "step": 185, + "time": 302.65 + }, + { + "epoch": 0.24, + "learning_rate": "9.9818e-06", + "loss": 0.6012, + "slid_loss": 0.5965, + "step": 186, + "time": 294.28 + }, + { + "epoch": 0.24, + "learning_rate": "9.9815e-06", + "loss": 0.583, + "slid_loss": 0.5963, + "step": 187, + "time": 300.57 + }, + { + "epoch": 0.24, + "learning_rate": "9.9813e-06", + "loss": 0.6199, + "slid_loss": 0.5963, + "step": 188, + "time": 300.21 + }, + { + "epoch": 0.24, + "learning_rate": "9.9810e-06", + "loss": 0.5737, + "slid_loss": 0.596, + "step": 189, + "time": 298.53 + }, + { + "epoch": 0.25, + "learning_rate": "9.9808e-06", + "loss": 0.6084, + "slid_loss": 0.5962, + "step": 190, + "time": 303.74 + }, + { + "epoch": 0.25, + "learning_rate": "9.9805e-06", + "loss": 0.587, + "slid_loss": 0.5961, + "step": 191, + "time": 303.28 + }, + { + "epoch": 0.25, + "learning_rate": "9.9802e-06", + "loss": 0.5778, + "slid_loss": 0.5958, + "step": 192, + "time": 296.74 + }, + { + "epoch": 0.25, + "learning_rate": "9.9800e-06", + "loss": 0.5752, + "slid_loss": 0.5953, + "step": 193, + "time": 301.16 + }, + { + "epoch": 0.25, + "learning_rate": "9.9797e-06", + "loss": 0.6127, + "slid_loss": 0.5955, + "step": 194, + "time": 296.39 + }, + { + "epoch": 0.25, + "learning_rate": "9.9795e-06", + "loss": 0.5864, + "slid_loss": 0.5953, + "step": 195, + "time": 298.05 + }, + { + "epoch": 0.25, + "learning_rate": "9.9792e-06", + "loss": 0.5788, + "slid_loss": 0.595, + "step": 196, + "time": 297.51 + }, + { + "epoch": 0.26, + "learning_rate": "9.9789e-06", + "loss": 0.5977, + "slid_loss": 0.5949, + "step": 197, + "time": 299.48 + }, + { + "epoch": 0.26, + "learning_rate": "9.9787e-06", + "loss": 0.568, + "slid_loss": 0.5946, + "step": 198, + "time": 303.2 + }, + { + "epoch": 0.26, + "learning_rate": "9.9784e-06", + "loss": 0.5954, + "slid_loss": 0.5945, + "step": 199, + "time": 298.52 + }, + { + "epoch": 0.26, + "learning_rate": "9.9781e-06", + "loss": 0.618, + "slid_loss": 0.5948, + "step": 200, + "time": 297.2 + }, + { + "epoch": 0.26, + "learning_rate": "9.9779e-06", + "loss": 0.568, + "slid_loss": 0.5945, + "step": 201, + "time": 2159.56 + }, + { + "epoch": 0.26, + "learning_rate": "9.9776e-06", + "loss": 0.5864, + "slid_loss": 0.594, + "step": 202, + "time": 304.84 + }, + { + "epoch": 0.26, + "learning_rate": "9.9773e-06", + "loss": 0.5866, + "slid_loss": 0.5937, + "step": 203, + "time": 301.84 + }, + { + "epoch": 0.26, + "learning_rate": "9.9770e-06", + "loss": 0.633, + "slid_loss": 0.5942, + "step": 204, + "time": 300.37 + }, + { + "epoch": 0.27, + "learning_rate": "9.9768e-06", + "loss": 0.5984, + "slid_loss": 0.5939, + "step": 205, + "time": 298.58 + }, + { + "epoch": 0.27, + "learning_rate": "9.9765e-06", + "loss": 0.5859, + "slid_loss": 0.5939, + "step": 206, + "time": 298.88 + }, + { + "epoch": 0.27, + "learning_rate": "9.9762e-06", + "loss": 0.5895, + "slid_loss": 0.594, + "step": 207, + "time": 302.29 + }, + { + "epoch": 0.27, + "learning_rate": "9.9759e-06", + "loss": 0.5931, + "slid_loss": 0.594, + "step": 208, + "time": 295.71 + }, + { + "epoch": 0.27, + "learning_rate": "9.9756e-06", + "loss": 0.586, + "slid_loss": 0.5939, + "step": 209, + "time": 297.38 + }, + { + "epoch": 0.27, + "learning_rate": "9.9753e-06", + "loss": 0.5702, + "slid_loss": 0.5937, + "step": 210, + "time": 292.88 + }, + { + "epoch": 0.27, + "learning_rate": "9.9750e-06", + "loss": 0.5841, + "slid_loss": 0.5933, + "step": 211, + "time": 302.19 + }, + { + "epoch": 0.27, + "learning_rate": "9.9748e-06", + "loss": 0.5839, + "slid_loss": 0.5932, + "step": 212, + "time": 295.71 + }, + { + "epoch": 0.28, + "learning_rate": "9.9745e-06", + "loss": 0.6085, + "slid_loss": 0.5932, + "step": 213, + "time": 301.35 + }, + { + "epoch": 0.28, + "learning_rate": "9.9742e-06", + "loss": 0.5931, + "slid_loss": 0.5932, + "step": 214, + "time": 297.9 + }, + { + "epoch": 0.28, + "learning_rate": "9.9739e-06", + "loss": 0.6003, + "slid_loss": 0.5933, + "step": 215, + "time": 305.13 + }, + { + "epoch": 0.28, + "learning_rate": "9.9736e-06", + "loss": 0.5904, + "slid_loss": 0.5934, + "step": 216, + "time": 299.72 + }, + { + "epoch": 0.28, + "learning_rate": "9.9733e-06", + "loss": 0.5789, + "slid_loss": 0.5931, + "step": 217, + "time": 294.25 + }, + { + "epoch": 0.28, + "learning_rate": "9.9730e-06", + "loss": 0.5975, + "slid_loss": 0.5932, + "step": 218, + "time": 300.49 + }, + { + "epoch": 0.28, + "learning_rate": "9.9727e-06", + "loss": 0.5814, + "slid_loss": 0.5931, + "step": 219, + "time": 300.88 + }, + { + "epoch": 0.28, + "learning_rate": "9.9724e-06", + "loss": 0.6156, + "slid_loss": 0.593, + "step": 220, + "time": 298.38 + }, + { + "epoch": 0.29, + "learning_rate": "9.9721e-06", + "loss": 0.5958, + "slid_loss": 0.593, + "step": 221, + "time": 295.82 + }, + { + "epoch": 0.29, + "learning_rate": "9.9718e-06", + "loss": 0.5821, + "slid_loss": 0.5928, + "step": 222, + "time": 297.92 + }, + { + "epoch": 0.29, + "learning_rate": "9.9714e-06", + "loss": 0.6157, + "slid_loss": 0.5929, + "step": 223, + "time": 301.05 + }, + { + "epoch": 0.29, + "learning_rate": "9.9711e-06", + "loss": 0.5596, + "slid_loss": 0.5924, + "step": 224, + "time": 300.03 + }, + { + "epoch": 0.29, + "learning_rate": "9.9708e-06", + "loss": 0.6077, + "slid_loss": 0.5928, + "step": 225, + "time": 302.72 + }, + { + "epoch": 0.29, + "learning_rate": "9.9705e-06", + "loss": 0.6011, + "slid_loss": 0.5929, + "step": 226, + "time": 300.68 + }, + { + "epoch": 0.29, + "learning_rate": "9.9702e-06", + "loss": 0.5792, + "slid_loss": 0.593, + "step": 227, + "time": 301.7 + }, + { + "epoch": 0.3, + "learning_rate": "9.9699e-06", + "loss": 0.5819, + "slid_loss": 0.5931, + "step": 228, + "time": 307.53 + }, + { + "epoch": 0.3, + "learning_rate": "9.9696e-06", + "loss": 0.5668, + "slid_loss": 0.5927, + "step": 229, + "time": 298.86 + }, + { + "epoch": 0.3, + "learning_rate": "9.9692e-06", + "loss": 0.5833, + "slid_loss": 0.5927, + "step": 230, + "time": 301.51 + }, + { + "epoch": 0.3, + "learning_rate": "9.9689e-06", + "loss": 0.5911, + "slid_loss": 0.5928, + "step": 231, + "time": 301.86 + }, + { + "epoch": 0.3, + "learning_rate": "9.9686e-06", + "loss": 0.6039, + "slid_loss": 0.593, + "step": 232, + "time": 301.91 + }, + { + "epoch": 0.3, + "learning_rate": "9.9683e-06", + "loss": 0.6113, + "slid_loss": 0.5929, + "step": 233, + "time": 299.99 + }, + { + "epoch": 0.3, + "learning_rate": "9.9679e-06", + "loss": 0.5821, + "slid_loss": 0.5926, + "step": 234, + "time": 300.63 + }, + { + "epoch": 0.3, + "learning_rate": "9.9676e-06", + "loss": 0.584, + "slid_loss": 0.5926, + "step": 235, + "time": 294.85 + }, + { + "epoch": 0.31, + "learning_rate": "9.9673e-06", + "loss": 0.593, + "slid_loss": 0.5925, + "step": 236, + "time": 300.63 + }, + { + "epoch": 0.31, + "learning_rate": "9.9669e-06", + "loss": 0.5875, + "slid_loss": 0.5925, + "step": 237, + "time": 301.49 + }, + { + "epoch": 0.31, + "learning_rate": "9.9666e-06", + "loss": 0.6002, + "slid_loss": 0.5925, + "step": 238, + "time": 300.88 + }, + { + "epoch": 0.31, + "learning_rate": "9.9663e-06", + "loss": 0.5786, + "slid_loss": 0.5922, + "step": 239, + "time": 300.01 + }, + { + "epoch": 0.31, + "learning_rate": "9.9659e-06", + "loss": 0.596, + "slid_loss": 0.5923, + "step": 240, + "time": 299.06 + }, + { + "epoch": 0.31, + "learning_rate": "9.9656e-06", + "loss": 0.5881, + "slid_loss": 0.5922, + "step": 241, + "time": 398.92 + }, + { + "epoch": 0.31, + "learning_rate": "9.9653e-06", + "loss": 0.5988, + "slid_loss": 0.5921, + "step": 242, + "time": 512.79 + }, + { + "epoch": 0.31, + "learning_rate": "9.9649e-06", + "loss": 0.5969, + "slid_loss": 0.5922, + "step": 243, + "time": 333.47 + }, + { + "epoch": 0.32, + "learning_rate": "9.9646e-06", + "loss": 0.58, + "slid_loss": 0.5923, + "step": 244, + "time": 303.21 + }, + { + "epoch": 0.32, + "learning_rate": "9.9642e-06", + "loss": 0.5971, + "slid_loss": 0.5924, + "step": 245, + "time": 296.87 + }, + { + "epoch": 0.32, + "learning_rate": "9.9639e-06", + "loss": 0.5809, + "slid_loss": 0.5924, + "step": 246, + "time": 303.53 + }, + { + "epoch": 0.32, + "learning_rate": "9.9635e-06", + "loss": 0.6039, + "slid_loss": 0.5924, + "step": 247, + "time": 304.38 + }, + { + "epoch": 0.32, + "learning_rate": "9.9632e-06", + "loss": 0.6062, + "slid_loss": 0.5923, + "step": 248, + "time": 299.2 + }, + { + "epoch": 0.32, + "learning_rate": "9.9628e-06", + "loss": 0.5842, + "slid_loss": 0.5923, + "step": 249, + "time": 295.64 + }, + { + "epoch": 0.32, + "learning_rate": "9.9625e-06", + "loss": 0.6137, + "slid_loss": 0.5923, + "step": 250, + "time": 300.97 + }, + { + "epoch": 0.33, + "learning_rate": "9.9621e-06", + "loss": 0.5899, + "slid_loss": 0.5921, + "step": 251, + "time": 304.22 + }, + { + "epoch": 0.33, + "learning_rate": "9.9618e-06", + "loss": 0.5839, + "slid_loss": 0.592, + "step": 252, + "time": 300.56 + }, + { + "epoch": 0.33, + "learning_rate": "9.9614e-06", + "loss": 0.6023, + "slid_loss": 0.5921, + "step": 253, + "time": 302.46 + }, + { + "epoch": 0.33, + "learning_rate": "9.9610e-06", + "loss": 0.6089, + "slid_loss": 0.5923, + "step": 254, + "time": 300.0 + }, + { + "epoch": 0.33, + "learning_rate": "9.9607e-06", + "loss": 0.6055, + "slid_loss": 0.5924, + "step": 255, + "time": 300.65 + }, + { + "epoch": 0.33, + "learning_rate": "9.9603e-06", + "loss": 0.5927, + "slid_loss": 0.5923, + "step": 256, + "time": 296.14 + }, + { + "epoch": 0.33, + "learning_rate": "9.9599e-06", + "loss": 0.5967, + "slid_loss": 0.5925, + "step": 257, + "time": 294.8 + }, + { + "epoch": 0.33, + "learning_rate": "9.9596e-06", + "loss": 0.5717, + "slid_loss": 0.5922, + "step": 258, + "time": 303.38 + }, + { + "epoch": 0.34, + "learning_rate": "9.9592e-06", + "loss": 0.5853, + "slid_loss": 0.5922, + "step": 259, + "time": 301.95 + }, + { + "epoch": 0.34, + "learning_rate": "9.9588e-06", + "loss": 0.5944, + "slid_loss": 0.592, + "step": 260, + "time": 300.31 + }, + { + "epoch": 0.34, + "learning_rate": "9.9585e-06", + "loss": 0.6069, + "slid_loss": 0.5919, + "step": 261, + "time": 303.26 + }, + { + "epoch": 0.34, + "learning_rate": "9.9581e-06", + "loss": 0.5901, + "slid_loss": 0.5917, + "step": 262, + "time": 301.92 + }, + { + "epoch": 0.34, + "learning_rate": "9.9577e-06", + "loss": 0.604, + "slid_loss": 0.5918, + "step": 263, + "time": 302.96 + }, + { + "epoch": 0.34, + "learning_rate": "9.9573e-06", + "loss": 0.5833, + "slid_loss": 0.5917, + "step": 264, + "time": 301.58 + }, + { + "epoch": 0.34, + "learning_rate": "9.9570e-06", + "loss": 0.5949, + "slid_loss": 0.592, + "step": 265, + "time": 301.0 + }, + { + "epoch": 0.34, + "learning_rate": "9.9566e-06", + "loss": 0.5695, + "slid_loss": 0.5918, + "step": 266, + "time": 300.87 + }, + { + "epoch": 0.35, + "learning_rate": "9.9562e-06", + "loss": 0.5978, + "slid_loss": 0.592, + "step": 267, + "time": 299.95 + }, + { + "epoch": 0.35, + "learning_rate": "9.9558e-06", + "loss": 0.5796, + "slid_loss": 0.592, + "step": 268, + "time": 293.55 + }, + { + "epoch": 0.35, + "learning_rate": "9.9554e-06", + "loss": 0.6032, + "slid_loss": 0.5919, + "step": 269, + "time": 302.36 + }, + { + "epoch": 0.35, + "learning_rate": "9.9550e-06", + "loss": 0.6002, + "slid_loss": 0.592, + "step": 270, + "time": 303.01 + }, + { + "epoch": 0.35, + "learning_rate": "9.9547e-06", + "loss": 0.6025, + "slid_loss": 0.5919, + "step": 271, + "time": 303.25 + }, + { + "epoch": 0.35, + "learning_rate": "9.9543e-06", + "loss": 0.6, + "slid_loss": 0.5922, + "step": 272, + "time": 303.51 + }, + { + "epoch": 0.35, + "learning_rate": "9.9539e-06", + "loss": 0.5961, + "slid_loss": 0.5923, + "step": 273, + "time": 300.67 + }, + { + "epoch": 0.35, + "learning_rate": "9.9535e-06", + "loss": 0.5879, + "slid_loss": 0.5921, + "step": 274, + "time": 298.22 + }, + { + "epoch": 0.36, + "learning_rate": "9.9531e-06", + "loss": 0.5915, + "slid_loss": 0.5923, + "step": 275, + "time": 294.33 + }, + { + "epoch": 0.36, + "learning_rate": "9.9527e-06", + "loss": 0.5785, + "slid_loss": 0.5923, + "step": 276, + "time": 303.84 + }, + { + "epoch": 0.36, + "learning_rate": "9.9523e-06", + "loss": 0.5918, + "slid_loss": 0.5922, + "step": 277, + "time": 301.67 + }, + { + "epoch": 0.36, + "learning_rate": "9.9519e-06", + "loss": 0.5767, + "slid_loss": 0.5919, + "step": 278, + "time": 298.92 + }, + { + "epoch": 0.36, + "learning_rate": "9.9515e-06", + "loss": 0.5884, + "slid_loss": 0.5919, + "step": 279, + "time": 301.91 + }, + { + "epoch": 0.36, + "learning_rate": "9.9511e-06", + "loss": 0.5535, + "slid_loss": 0.5915, + "step": 280, + "time": 305.25 + }, + { + "epoch": 0.36, + "learning_rate": "9.9507e-06", + "loss": 0.5793, + "slid_loss": 0.5912, + "step": 281, + "time": 304.08 + }, + { + "epoch": 0.37, + "learning_rate": "9.9503e-06", + "loss": 0.6179, + "slid_loss": 0.5915, + "step": 282, + "time": 302.88 + }, + { + "epoch": 0.37, + "learning_rate": "9.9499e-06", + "loss": 0.5902, + "slid_loss": 0.5912, + "step": 283, + "time": 304.51 + }, + { + "epoch": 0.37, + "learning_rate": "9.9495e-06", + "loss": 0.607, + "slid_loss": 0.5915, + "step": 284, + "time": 303.7 + }, + { + "epoch": 0.37, + "learning_rate": "9.9490e-06", + "loss": 0.5717, + "slid_loss": 0.5916, + "step": 285, + "time": 293.89 + }, + { + "epoch": 0.37, + "learning_rate": "9.9486e-06", + "loss": 0.5917, + "slid_loss": 0.5915, + "step": 286, + "time": 294.93 + }, + { + "epoch": 0.37, + "learning_rate": "9.9482e-06", + "loss": 0.6037, + "slid_loss": 0.5917, + "step": 287, + "time": 292.8 + }, + { + "epoch": 0.37, + "learning_rate": "9.9478e-06", + "loss": 0.5852, + "slid_loss": 0.5913, + "step": 288, + "time": 305.96 + }, + { + "epoch": 0.37, + "learning_rate": "9.9474e-06", + "loss": 0.6088, + "slid_loss": 0.5917, + "step": 289, + "time": 300.0 + }, + { + "epoch": 0.38, + "learning_rate": "9.9470e-06", + "loss": 0.589, + "slid_loss": 0.5915, + "step": 290, + "time": 298.64 + }, + { + "epoch": 0.38, + "learning_rate": "9.9465e-06", + "loss": 0.5874, + "slid_loss": 0.5915, + "step": 291, + "time": 299.51 + }, + { + "epoch": 0.38, + "learning_rate": "9.9461e-06", + "loss": 0.6055, + "slid_loss": 0.5918, + "step": 292, + "time": 303.2 + }, + { + "epoch": 0.38, + "learning_rate": "9.9457e-06", + "loss": 0.5701, + "slid_loss": 0.5917, + "step": 293, + "time": 306.15 + }, + { + "epoch": 0.38, + "learning_rate": "9.9453e-06", + "loss": 0.551, + "slid_loss": 0.5911, + "step": 294, + "time": 296.03 + }, + { + "epoch": 0.38, + "learning_rate": "9.9448e-06", + "loss": 0.5914, + "slid_loss": 0.5912, + "step": 295, + "time": 299.9 + }, + { + "epoch": 0.38, + "learning_rate": "9.9444e-06", + "loss": 0.6068, + "slid_loss": 0.5914, + "step": 296, + "time": 303.06 + }, + { + "epoch": 0.38, + "learning_rate": "9.9440e-06", + "loss": 0.5881, + "slid_loss": 0.5914, + "step": 297, + "time": 302.97 + }, + { + "epoch": 0.39, + "learning_rate": "9.9435e-06", + "loss": 0.5652, + "slid_loss": 0.5913, + "step": 298, + "time": 299.7 + }, + { + "epoch": 0.39, + "learning_rate": "9.9431e-06", + "loss": 0.5771, + "slid_loss": 0.5911, + "step": 299, + "time": 297.81 + }, + { + "epoch": 0.39, + "learning_rate": "9.9427e-06", + "loss": 0.5984, + "slid_loss": 0.5909, + "step": 300, + "time": 309.93 + }, + { + "epoch": 0.39, + "learning_rate": "9.9422e-06", + "loss": 0.5798, + "slid_loss": 0.5911, + "step": 301, + "time": 2145.71 + }, + { + "epoch": 0.39, + "learning_rate": "9.9418e-06", + "loss": 0.5467, + "slid_loss": 0.5907, + "step": 302, + "time": 304.18 + }, + { + "epoch": 0.39, + "learning_rate": "9.9413e-06", + "loss": 0.6062, + "slid_loss": 0.5909, + "step": 303, + "time": 303.3 + }, + { + "epoch": 0.39, + "learning_rate": "9.9409e-06", + "loss": 0.5981, + "slid_loss": 0.5905, + "step": 304, + "time": 301.71 + }, + { + "epoch": 0.39, + "learning_rate": "9.9404e-06", + "loss": 0.5806, + "slid_loss": 0.5903, + "step": 305, + "time": 298.37 + }, + { + "epoch": 0.4, + "learning_rate": "9.9400e-06", + "loss": 0.6079, + "slid_loss": 0.5906, + "step": 306, + "time": 297.14 + }, + { + "epoch": 0.4, + "learning_rate": "9.9396e-06", + "loss": 0.5935, + "slid_loss": 0.5906, + "step": 307, + "time": 302.82 + }, + { + "epoch": 0.4, + "learning_rate": "9.9391e-06", + "loss": 0.561, + "slid_loss": 0.5903, + "step": 308, + "time": 298.85 + }, + { + "epoch": 0.4, + "learning_rate": "9.9387e-06", + "loss": 0.603, + "slid_loss": 0.5904, + "step": 309, + "time": 298.61 + }, + { + "epoch": 0.4, + "learning_rate": "9.9382e-06", + "loss": 0.5999, + "slid_loss": 0.5907, + "step": 310, + "time": 301.25 + }, + { + "epoch": 0.4, + "learning_rate": "9.9377e-06", + "loss": 0.5995, + "slid_loss": 0.5909, + "step": 311, + "time": 299.59 + }, + { + "epoch": 0.4, + "learning_rate": "9.9373e-06", + "loss": 0.5712, + "slid_loss": 0.5908, + "step": 312, + "time": 302.55 + }, + { + "epoch": 0.41, + "learning_rate": "9.9368e-06", + "loss": 0.5979, + "slid_loss": 0.5907, + "step": 313, + "time": 298.82 + }, + { + "epoch": 0.41, + "learning_rate": "9.9364e-06", + "loss": 0.5781, + "slid_loss": 0.5905, + "step": 314, + "time": 296.46 + }, + { + "epoch": 0.41, + "learning_rate": "9.9359e-06", + "loss": 0.5776, + "slid_loss": 0.5903, + "step": 315, + "time": 299.03 + }, + { + "epoch": 0.41, + "learning_rate": "9.9354e-06", + "loss": 0.575, + "slid_loss": 0.5901, + "step": 316, + "time": 298.66 + }, + { + "epoch": 0.41, + "learning_rate": "9.9350e-06", + "loss": 0.5841, + "slid_loss": 0.5902, + "step": 317, + "time": 299.26 + }, + { + "epoch": 0.41, + "learning_rate": "9.9345e-06", + "loss": 0.5874, + "slid_loss": 0.5901, + "step": 318, + "time": 298.97 + }, + { + "epoch": 0.41, + "learning_rate": "9.9340e-06", + "loss": 0.5836, + "slid_loss": 0.5901, + "step": 319, + "time": 301.89 + }, + { + "epoch": 0.41, + "learning_rate": "9.9336e-06", + "loss": 0.5722, + "slid_loss": 0.5897, + "step": 320, + "time": 301.23 + }, + { + "epoch": 0.42, + "learning_rate": "9.9331e-06", + "loss": 0.5815, + "slid_loss": 0.5895, + "step": 321, + "time": 300.73 + }, + { + "epoch": 0.42, + "learning_rate": "9.9326e-06", + "loss": 0.582, + "slid_loss": 0.5895, + "step": 322, + "time": 306.95 + }, + { + "epoch": 0.42, + "learning_rate": "9.9322e-06", + "loss": 0.5925, + "slid_loss": 0.5893, + "step": 323, + "time": 303.77 + }, + { + "epoch": 0.42, + "learning_rate": "9.9317e-06", + "loss": 0.5637, + "slid_loss": 0.5893, + "step": 324, + "time": 302.23 + }, + { + "epoch": 0.42, + "learning_rate": "9.9312e-06", + "loss": 0.5697, + "slid_loss": 0.589, + "step": 325, + "time": 300.31 + }, + { + "epoch": 0.42, + "learning_rate": "9.9307e-06", + "loss": 0.5983, + "slid_loss": 0.5889, + "step": 326, + "time": 304.98 + }, + { + "epoch": 0.42, + "learning_rate": "9.9302e-06", + "loss": 0.5884, + "slid_loss": 0.589, + "step": 327, + "time": 295.6 + }, + { + "epoch": 0.42, + "learning_rate": "9.9298e-06", + "loss": 0.5902, + "slid_loss": 0.5891, + "step": 328, + "time": 300.44 + }, + { + "epoch": 0.43, + "learning_rate": "9.9293e-06", + "loss": 0.5706, + "slid_loss": 0.5891, + "step": 329, + "time": 299.45 + }, + { + "epoch": 0.43, + "learning_rate": "9.9288e-06", + "loss": 0.5751, + "slid_loss": 0.5891, + "step": 330, + "time": 300.15 + }, + { + "epoch": 0.43, + "learning_rate": "9.9283e-06", + "loss": 0.5948, + "slid_loss": 0.5891, + "step": 331, + "time": 301.39 + }, + { + "epoch": 0.43, + "learning_rate": "9.9278e-06", + "loss": 0.585, + "slid_loss": 0.5889, + "step": 332, + "time": 300.47 + }, + { + "epoch": 0.43, + "learning_rate": "9.9273e-06", + "loss": 0.598, + "slid_loss": 0.5888, + "step": 333, + "time": 301.51 + }, + { + "epoch": 0.43, + "learning_rate": "9.9268e-06", + "loss": 0.5929, + "slid_loss": 0.5889, + "step": 334, + "time": 301.08 + }, + { + "epoch": 0.43, + "learning_rate": "9.9263e-06", + "loss": 0.6013, + "slid_loss": 0.5891, + "step": 335, + "time": 301.25 + }, + { + "epoch": 0.44, + "learning_rate": "9.9258e-06", + "loss": 0.5635, + "slid_loss": 0.5888, + "step": 336, + "time": 302.84 + }, + { + "epoch": 0.44, + "learning_rate": "9.9253e-06", + "loss": 0.583, + "slid_loss": 0.5887, + "step": 337, + "time": 302.95 + }, + { + "epoch": 0.44, + "learning_rate": "9.9248e-06", + "loss": 0.5615, + "slid_loss": 0.5883, + "step": 338, + "time": 304.5 + }, + { + "epoch": 0.44, + "learning_rate": "9.9243e-06", + "loss": 0.5832, + "slid_loss": 0.5884, + "step": 339, + "time": 301.31 + }, + { + "epoch": 0.44, + "learning_rate": "9.9238e-06", + "loss": 0.5584, + "slid_loss": 0.588, + "step": 340, + "time": 301.03 + }, + { + "epoch": 0.44, + "learning_rate": "9.9233e-06", + "loss": 0.5838, + "slid_loss": 0.588, + "step": 341, + "time": 300.61 + }, + { + "epoch": 0.44, + "learning_rate": "9.9228e-06", + "loss": 0.5854, + "slid_loss": 0.5878, + "step": 342, + "time": 304.07 + }, + { + "epoch": 0.44, + "learning_rate": "9.9223e-06", + "loss": 0.5775, + "slid_loss": 0.5876, + "step": 343, + "time": 304.08 + }, + { + "epoch": 0.45, + "learning_rate": "9.9218e-06", + "loss": 0.587, + "slid_loss": 0.5877, + "step": 344, + "time": 301.56 + }, + { + "epoch": 0.45, + "learning_rate": "9.9213e-06", + "loss": 0.5812, + "slid_loss": 0.5875, + "step": 345, + "time": 297.96 + }, + { + "epoch": 0.45, + "learning_rate": "9.9208e-06", + "loss": 0.5783, + "slid_loss": 0.5875, + "step": 346, + "time": 302.47 + }, + { + "epoch": 0.45, + "learning_rate": "9.9203e-06", + "loss": 0.5731, + "slid_loss": 0.5872, + "step": 347, + "time": 297.21 + }, + { + "epoch": 0.45, + "learning_rate": "9.9198e-06", + "loss": 0.5725, + "slid_loss": 0.5869, + "step": 348, + "time": 300.15 + }, + { + "epoch": 0.45, + "learning_rate": "9.9192e-06", + "loss": 0.5565, + "slid_loss": 0.5866, + "step": 349, + "time": 299.24 + }, + { + "epoch": 0.45, + "learning_rate": "9.9187e-06", + "loss": 0.6033, + "slid_loss": 0.5865, + "step": 350, + "time": 297.98 + }, + { + "epoch": 0.45, + "learning_rate": "9.9182e-06", + "loss": 0.5588, + "slid_loss": 0.5862, + "step": 351, + "time": 298.4 + }, + { + "epoch": 0.46, + "learning_rate": "9.9177e-06", + "loss": 0.5595, + "slid_loss": 0.5859, + "step": 352, + "time": 300.12 + }, + { + "epoch": 0.46, + "learning_rate": "9.9171e-06", + "loss": 0.5777, + "slid_loss": 0.5857, + "step": 353, + "time": 293.74 + }, + { + "epoch": 0.46, + "learning_rate": "9.9166e-06", + "loss": 0.587, + "slid_loss": 0.5855, + "step": 354, + "time": 300.09 + }, + { + "epoch": 0.46, + "learning_rate": "9.9161e-06", + "loss": 0.5791, + "slid_loss": 0.5852, + "step": 355, + "time": 299.74 + }, + { + "epoch": 0.46, + "learning_rate": "9.9156e-06", + "loss": 0.5859, + "slid_loss": 0.5851, + "step": 356, + "time": 299.89 + }, + { + "epoch": 0.46, + "learning_rate": "9.9150e-06", + "loss": 0.5808, + "slid_loss": 0.585, + "step": 357, + "time": 299.12 + }, + { + "epoch": 0.46, + "learning_rate": "9.9145e-06", + "loss": 0.5901, + "slid_loss": 0.5852, + "step": 358, + "time": 301.57 + }, + { + "epoch": 0.46, + "learning_rate": "9.9140e-06", + "loss": 0.5809, + "slid_loss": 0.5851, + "step": 359, + "time": 302.98 + }, + { + "epoch": 0.47, + "learning_rate": "9.9134e-06", + "loss": 0.5859, + "slid_loss": 0.585, + "step": 360, + "time": 296.56 + }, + { + "epoch": 0.47, + "learning_rate": "9.9129e-06", + "loss": 0.592, + "slid_loss": 0.5849, + "step": 361, + "time": 309.77 + }, + { + "epoch": 0.47, + "learning_rate": "9.9124e-06", + "loss": 0.5672, + "slid_loss": 0.5847, + "step": 362, + "time": 442.06 + }, + { + "epoch": 0.47, + "learning_rate": "9.9118e-06", + "loss": 0.5837, + "slid_loss": 0.5844, + "step": 363, + "time": 543.81 + }, + { + "epoch": 0.47, + "learning_rate": "9.9113e-06", + "loss": 0.5883, + "slid_loss": 0.5845, + "step": 364, + "time": 307.58 + }, + { + "epoch": 0.47, + "learning_rate": "9.9107e-06", + "loss": 0.5914, + "slid_loss": 0.5845, + "step": 365, + "time": 302.4 + }, + { + "epoch": 0.47, + "learning_rate": "9.9102e-06", + "loss": 0.5901, + "slid_loss": 0.5847, + "step": 366, + "time": 304.61 + }, + { + "epoch": 0.48, + "learning_rate": "9.9096e-06", + "loss": 0.6143, + "slid_loss": 0.5848, + "step": 367, + "time": 301.14 + }, + { + "epoch": 0.48, + "learning_rate": "9.9091e-06", + "loss": 0.5647, + "slid_loss": 0.5847, + "step": 368, + "time": 304.0 + }, + { + "epoch": 0.48, + "learning_rate": "9.9085e-06", + "loss": 0.5874, + "slid_loss": 0.5845, + "step": 369, + "time": 299.09 + }, + { + "epoch": 0.48, + "learning_rate": "9.9080e-06", + "loss": 0.5769, + "slid_loss": 0.5843, + "step": 370, + "time": 300.46 + }, + { + "epoch": 0.48, + "learning_rate": "9.9074e-06", + "loss": 0.5568, + "slid_loss": 0.5838, + "step": 371, + "time": 299.6 + }, + { + "epoch": 0.48, + "learning_rate": "9.9069e-06", + "loss": 0.5953, + "slid_loss": 0.5838, + "step": 372, + "time": 303.64 + }, + { + "epoch": 0.48, + "learning_rate": "9.9063e-06", + "loss": 0.5935, + "slid_loss": 0.5838, + "step": 373, + "time": 302.9 + }, + { + "epoch": 0.48, + "learning_rate": "9.9058e-06", + "loss": 0.5781, + "slid_loss": 0.5837, + "step": 374, + "time": 303.66 + }, + { + "epoch": 0.49, + "learning_rate": "9.9052e-06", + "loss": 0.5954, + "slid_loss": 0.5837, + "step": 375, + "time": 301.25 + }, + { + "epoch": 0.49, + "learning_rate": "9.9046e-06", + "loss": 0.5562, + "slid_loss": 0.5835, + "step": 376, + "time": 302.16 + }, + { + "epoch": 0.49, + "learning_rate": "9.9041e-06", + "loss": 0.5978, + "slid_loss": 0.5835, + "step": 377, + "time": 301.0 + }, + { + "epoch": 0.49, + "learning_rate": "9.9035e-06", + "loss": 0.5904, + "slid_loss": 0.5837, + "step": 378, + "time": 301.57 + }, + { + "epoch": 0.49, + "learning_rate": "9.9030e-06", + "loss": 0.61, + "slid_loss": 0.5839, + "step": 379, + "time": 298.34 + }, + { + "epoch": 0.49, + "learning_rate": "9.9024e-06", + "loss": 0.5824, + "slid_loss": 0.5842, + "step": 380, + "time": 298.45 + }, + { + "epoch": 0.49, + "learning_rate": "9.9018e-06", + "loss": 0.5803, + "slid_loss": 0.5842, + "step": 381, + "time": 299.83 + }, + { + "epoch": 0.49, + "learning_rate": "9.9012e-06", + "loss": 0.6062, + "slid_loss": 0.5841, + "step": 382, + "time": 302.79 + }, + { + "epoch": 0.5, + "learning_rate": "9.9007e-06", + "loss": 0.5783, + "slid_loss": 0.584, + "step": 383, + "time": 304.89 + }, + { + "epoch": 0.5, + "learning_rate": "9.9001e-06", + "loss": 0.5873, + "slid_loss": 0.5838, + "step": 384, + "time": 299.4 + }, + { + "epoch": 0.5, + "learning_rate": "9.8995e-06", + "loss": 0.5714, + "slid_loss": 0.5838, + "step": 385, + "time": 302.76 + }, + { + "epoch": 0.5, + "learning_rate": "9.8989e-06", + "loss": 0.5849, + "slid_loss": 0.5837, + "step": 386, + "time": 298.68 + }, + { + "epoch": 0.5, + "learning_rate": "9.8984e-06", + "loss": 0.5512, + "slid_loss": 0.5832, + "step": 387, + "time": 302.47 + }, + { + "epoch": 0.5, + "learning_rate": "9.8978e-06", + "loss": 0.5851, + "slid_loss": 0.5832, + "step": 388, + "time": 300.3 + }, + { + "epoch": 0.5, + "learning_rate": "9.8972e-06", + "loss": 0.5749, + "slid_loss": 0.5828, + "step": 389, + "time": 303.56 + }, + { + "epoch": 0.51, + "learning_rate": "9.8966e-06", + "loss": 0.5559, + "slid_loss": 0.5825, + "step": 390, + "time": 303.89 + }, + { + "epoch": 0.51, + "learning_rate": "9.8960e-06", + "loss": 0.5666, + "slid_loss": 0.5823, + "step": 391, + "time": 298.94 + }, + { + "epoch": 0.51, + "learning_rate": "9.8954e-06", + "loss": 0.5939, + "slid_loss": 0.5822, + "step": 392, + "time": 299.49 + }, + { + "epoch": 0.51, + "learning_rate": "9.8949e-06", + "loss": 0.5914, + "slid_loss": 0.5824, + "step": 393, + "time": 300.26 + }, + { + "epoch": 0.51, + "learning_rate": "9.8943e-06", + "loss": 0.5602, + "slid_loss": 0.5825, + "step": 394, + "time": 301.06 + }, + { + "epoch": 0.51, + "learning_rate": "9.8937e-06", + "loss": 0.5878, + "slid_loss": 0.5824, + "step": 395, + "time": 298.55 + }, + { + "epoch": 0.51, + "learning_rate": "9.8931e-06", + "loss": 0.5964, + "slid_loss": 0.5823, + "step": 396, + "time": 297.95 + }, + { + "epoch": 0.51, + "learning_rate": "9.8925e-06", + "loss": 0.6124, + "slid_loss": 0.5826, + "step": 397, + "time": 302.23 + }, + { + "epoch": 0.52, + "learning_rate": "9.8919e-06", + "loss": 0.5672, + "slid_loss": 0.5826, + "step": 398, + "time": 298.67 + }, + { + "epoch": 0.52, + "learning_rate": "9.8913e-06", + "loss": 0.5678, + "slid_loss": 0.5825, + "step": 399, + "time": 296.19 + }, + { + "epoch": 0.52, + "learning_rate": "9.8907e-06", + "loss": 0.5952, + "slid_loss": 0.5825, + "step": 400, + "time": 294.41 + }, + { + "epoch": 0.52, + "learning_rate": "9.8901e-06", + "loss": 0.5817, + "slid_loss": 0.5825, + "step": 401, + "time": 2131.8 + }, + { + "epoch": 0.52, + "learning_rate": "9.8895e-06", + "loss": 0.5577, + "slid_loss": 0.5826, + "step": 402, + "time": 303.45 + }, + { + "epoch": 0.52, + "learning_rate": "9.8889e-06", + "loss": 0.5867, + "slid_loss": 0.5824, + "step": 403, + "time": 303.95 + }, + { + "epoch": 0.52, + "learning_rate": "9.8883e-06", + "loss": 0.5767, + "slid_loss": 0.5822, + "step": 404, + "time": 299.6 + }, + { + "epoch": 0.52, + "learning_rate": "9.8877e-06", + "loss": 0.5631, + "slid_loss": 0.582, + "step": 405, + "time": 296.97 + }, + { + "epoch": 0.53, + "learning_rate": "9.8870e-06", + "loss": 0.5671, + "slid_loss": 0.5816, + "step": 406, + "time": 296.01 + }, + { + "epoch": 0.53, + "learning_rate": "9.8864e-06", + "loss": 0.5607, + "slid_loss": 0.5813, + "step": 407, + "time": 292.73 + }, + { + "epoch": 0.53, + "learning_rate": "9.8858e-06", + "loss": 0.5612, + "slid_loss": 0.5813, + "step": 408, + "time": 299.45 + }, + { + "epoch": 0.53, + "learning_rate": "9.8852e-06", + "loss": 0.5591, + "slid_loss": 0.5808, + "step": 409, + "time": 296.08 + }, + { + "epoch": 0.53, + "learning_rate": "9.8846e-06", + "loss": 0.5762, + "slid_loss": 0.5806, + "step": 410, + "time": 299.42 + }, + { + "epoch": 0.53, + "learning_rate": "9.8840e-06", + "loss": 0.5762, + "slid_loss": 0.5804, + "step": 411, + "time": 302.72 + }, + { + "epoch": 0.53, + "learning_rate": "9.8834e-06", + "loss": 0.5675, + "slid_loss": 0.5803, + "step": 412, + "time": 304.89 + }, + { + "epoch": 0.53, + "learning_rate": "9.8827e-06", + "loss": 0.5656, + "slid_loss": 0.58, + "step": 413, + "time": 300.11 + }, + { + "epoch": 0.54, + "learning_rate": "9.8821e-06", + "loss": 0.599, + "slid_loss": 0.5802, + "step": 414, + "time": 300.68 + }, + { + "epoch": 0.54, + "learning_rate": "9.8815e-06", + "loss": 0.5735, + "slid_loss": 0.5802, + "step": 415, + "time": 300.05 + }, + { + "epoch": 0.54, + "learning_rate": "9.8809e-06", + "loss": 0.5825, + "slid_loss": 0.5803, + "step": 416, + "time": 300.83 + }, + { + "epoch": 0.54, + "learning_rate": "9.8802e-06", + "loss": 0.5733, + "slid_loss": 0.5802, + "step": 417, + "time": 298.88 + }, + { + "epoch": 0.54, + "learning_rate": "9.8796e-06", + "loss": 0.5703, + "slid_loss": 0.58, + "step": 418, + "time": 305.66 + }, + { + "epoch": 0.54, + "learning_rate": "9.8790e-06", + "loss": 0.5645, + "slid_loss": 0.5798, + "step": 419, + "time": 300.94 + }, + { + "epoch": 0.54, + "learning_rate": "9.8783e-06", + "loss": 0.5623, + "slid_loss": 0.5797, + "step": 420, + "time": 299.63 + }, + { + "epoch": 0.55, + "learning_rate": "9.8777e-06", + "loss": 0.5444, + "slid_loss": 0.5793, + "step": 421, + "time": 298.09 + }, + { + "epoch": 0.55, + "learning_rate": "9.8771e-06", + "loss": 0.5864, + "slid_loss": 0.5794, + "step": 422, + "time": 297.47 + }, + { + "epoch": 0.55, + "learning_rate": "9.8764e-06", + "loss": 0.6055, + "slid_loss": 0.5795, + "step": 423, + "time": 304.79 + }, + { + "epoch": 0.55, + "learning_rate": "9.8758e-06", + "loss": 0.57, + "slid_loss": 0.5796, + "step": 424, + "time": 301.25 + }, + { + "epoch": 0.55, + "learning_rate": "9.8752e-06", + "loss": 0.5871, + "slid_loss": 0.5797, + "step": 425, + "time": 298.86 + }, + { + "epoch": 0.55, + "learning_rate": "9.8745e-06", + "loss": 0.5809, + "slid_loss": 0.5796, + "step": 426, + "time": 301.44 + }, + { + "epoch": 0.55, + "learning_rate": "9.8739e-06", + "loss": 0.5766, + "slid_loss": 0.5794, + "step": 427, + "time": 302.51 + }, + { + "epoch": 0.55, + "learning_rate": "9.8732e-06", + "loss": 0.5997, + "slid_loss": 0.5795, + "step": 428, + "time": 295.31 + }, + { + "epoch": 0.56, + "learning_rate": "9.8726e-06", + "loss": 0.5724, + "slid_loss": 0.5796, + "step": 429, + "time": 304.51 + }, + { + "epoch": 0.56, + "learning_rate": "9.8719e-06", + "loss": 0.5924, + "slid_loss": 0.5797, + "step": 430, + "time": 300.1 + }, + { + "epoch": 0.56, + "learning_rate": "9.8713e-06", + "loss": 0.611, + "slid_loss": 0.5799, + "step": 431, + "time": 302.85 + }, + { + "epoch": 0.56, + "learning_rate": "9.8706e-06", + "loss": 0.5887, + "slid_loss": 0.5799, + "step": 432, + "time": 300.26 + }, + { + "epoch": 0.56, + "learning_rate": "9.8700e-06", + "loss": 0.5958, + "slid_loss": 0.5799, + "step": 433, + "time": 297.76 + }, + { + "epoch": 0.56, + "learning_rate": "9.8693e-06", + "loss": 0.5765, + "slid_loss": 0.5797, + "step": 434, + "time": 301.87 + }, + { + "epoch": 0.56, + "learning_rate": "9.8687e-06", + "loss": 0.5859, + "slid_loss": 0.5796, + "step": 435, + "time": 303.1 + }, + { + "epoch": 0.56, + "learning_rate": "9.8680e-06", + "loss": 0.5864, + "slid_loss": 0.5798, + "step": 436, + "time": 301.61 + }, + { + "epoch": 0.57, + "learning_rate": "9.8673e-06", + "loss": 0.5822, + "slid_loss": 0.5798, + "step": 437, + "time": 302.85 + }, + { + "epoch": 0.57, + "learning_rate": "9.8667e-06", + "loss": 0.5707, + "slid_loss": 0.5799, + "step": 438, + "time": 301.6 + }, + { + "epoch": 0.57, + "learning_rate": "9.8660e-06", + "loss": 0.5834, + "slid_loss": 0.5799, + "step": 439, + "time": 303.64 + }, + { + "epoch": 0.57, + "learning_rate": "9.8653e-06", + "loss": 0.575, + "slid_loss": 0.5801, + "step": 440, + "time": 300.26 + }, + { + "epoch": 0.57, + "learning_rate": "9.8647e-06", + "loss": 0.5684, + "slid_loss": 0.5799, + "step": 441, + "time": 298.04 + }, + { + "epoch": 0.57, + "learning_rate": "9.8640e-06", + "loss": 0.5908, + "slid_loss": 0.58, + "step": 442, + "time": 299.82 + }, + { + "epoch": 0.57, + "learning_rate": "9.8633e-06", + "loss": 0.6021, + "slid_loss": 0.5802, + "step": 443, + "time": 299.4 + }, + { + "epoch": 0.57, + "learning_rate": "9.8627e-06", + "loss": 0.5846, + "slid_loss": 0.5802, + "step": 444, + "time": 302.43 + }, + { + "epoch": 0.58, + "learning_rate": "9.8620e-06", + "loss": 0.5923, + "slid_loss": 0.5803, + "step": 445, + "time": 303.88 + }, + { + "epoch": 0.58, + "learning_rate": "9.8613e-06", + "loss": 0.5359, + "slid_loss": 0.5799, + "step": 446, + "time": 298.65 + }, + { + "epoch": 0.58, + "learning_rate": "9.8607e-06", + "loss": 0.5764, + "slid_loss": 0.5799, + "step": 447, + "time": 300.04 + }, + { + "epoch": 0.58, + "learning_rate": "9.8600e-06", + "loss": 0.5913, + "slid_loss": 0.5801, + "step": 448, + "time": 303.88 + }, + { + "epoch": 0.58, + "learning_rate": "9.8593e-06", + "loss": 0.5527, + "slid_loss": 0.5801, + "step": 449, + "time": 301.12 + }, + { + "epoch": 0.58, + "learning_rate": "9.8586e-06", + "loss": 0.563, + "slid_loss": 0.5797, + "step": 450, + "time": 298.58 + }, + { + "epoch": 0.58, + "learning_rate": "9.8579e-06", + "loss": 0.5561, + "slid_loss": 0.5796, + "step": 451, + "time": 299.14 + }, + { + "epoch": 0.59, + "learning_rate": "9.8572e-06", + "loss": 0.5594, + "slid_loss": 0.5796, + "step": 452, + "time": 299.44 + }, + { + "epoch": 0.59, + "learning_rate": "9.8566e-06", + "loss": 0.5778, + "slid_loss": 0.5796, + "step": 453, + "time": 302.98 + }, + { + "epoch": 0.59, + "learning_rate": "9.8559e-06", + "loss": 0.5815, + "slid_loss": 0.5796, + "step": 454, + "time": 298.23 + }, + { + "epoch": 0.59, + "learning_rate": "9.8552e-06", + "loss": 0.5589, + "slid_loss": 0.5794, + "step": 455, + "time": 300.7 + }, + { + "epoch": 0.59, + "learning_rate": "9.8545e-06", + "loss": 0.5716, + "slid_loss": 0.5792, + "step": 456, + "time": 299.94 + }, + { + "epoch": 0.59, + "learning_rate": "9.8538e-06", + "loss": 0.5927, + "slid_loss": 0.5793, + "step": 457, + "time": 302.7 + }, + { + "epoch": 0.59, + "learning_rate": "9.8531e-06", + "loss": 0.5675, + "slid_loss": 0.5791, + "step": 458, + "time": 297.41 + }, + { + "epoch": 0.59, + "learning_rate": "9.8524e-06", + "loss": 0.5934, + "slid_loss": 0.5792, + "step": 459, + "time": 302.7 + }, + { + "epoch": 0.6, + "learning_rate": "9.8517e-06", + "loss": 0.6052, + "slid_loss": 0.5794, + "step": 460, + "time": 300.68 + }, + { + "epoch": 0.6, + "learning_rate": "9.8510e-06", + "loss": 0.5644, + "slid_loss": 0.5792, + "step": 461, + "time": 295.54 + }, + { + "epoch": 0.6, + "learning_rate": "9.8503e-06", + "loss": 0.562, + "slid_loss": 0.5791, + "step": 462, + "time": 298.32 + }, + { + "epoch": 0.6, + "learning_rate": "9.8496e-06", + "loss": 0.5411, + "slid_loss": 0.5787, + "step": 463, + "time": 303.31 + }, + { + "epoch": 0.6, + "learning_rate": "9.8489e-06", + "loss": 0.549, + "slid_loss": 0.5783, + "step": 464, + "time": 299.02 + }, + { + "epoch": 0.6, + "learning_rate": "9.8482e-06", + "loss": 0.5705, + "slid_loss": 0.5781, + "step": 465, + "time": 302.64 + }, + { + "epoch": 0.6, + "learning_rate": "9.8475e-06", + "loss": 0.5853, + "slid_loss": 0.578, + "step": 466, + "time": 301.21 + }, + { + "epoch": 0.6, + "learning_rate": "9.8468e-06", + "loss": 0.5954, + "slid_loss": 0.5778, + "step": 467, + "time": 299.8 + }, + { + "epoch": 0.61, + "learning_rate": "9.8461e-06", + "loss": 0.6089, + "slid_loss": 0.5783, + "step": 468, + "time": 299.48 + }, + { + "epoch": 0.61, + "learning_rate": "9.8454e-06", + "loss": 0.5906, + "slid_loss": 0.5783, + "step": 469, + "time": 295.69 + }, + { + "epoch": 0.61, + "learning_rate": "9.8447e-06", + "loss": 0.5781, + "slid_loss": 0.5783, + "step": 470, + "time": 297.51 + }, + { + "epoch": 0.61, + "learning_rate": "9.8440e-06", + "loss": 0.5976, + "slid_loss": 0.5787, + "step": 471, + "time": 302.98 + }, + { + "epoch": 0.61, + "learning_rate": "9.8432e-06", + "loss": 0.5703, + "slid_loss": 0.5785, + "step": 472, + "time": 300.12 + }, + { + "epoch": 0.61, + "learning_rate": "9.8425e-06", + "loss": 0.5825, + "slid_loss": 0.5784, + "step": 473, + "time": 304.01 + }, + { + "epoch": 0.61, + "learning_rate": "9.8418e-06", + "loss": 0.5771, + "slid_loss": 0.5784, + "step": 474, + "time": 296.5 + }, + { + "epoch": 0.62, + "learning_rate": "9.8411e-06", + "loss": 0.5687, + "slid_loss": 0.5781, + "step": 475, + "time": 300.4 + }, + { + "epoch": 0.62, + "learning_rate": "9.8404e-06", + "loss": 0.6119, + "slid_loss": 0.5787, + "step": 476, + "time": 305.33 + }, + { + "epoch": 0.62, + "learning_rate": "9.8396e-06", + "loss": 0.5623, + "slid_loss": 0.5783, + "step": 477, + "time": 298.81 + }, + { + "epoch": 0.62, + "learning_rate": "9.8389e-06", + "loss": 0.5784, + "slid_loss": 0.5782, + "step": 478, + "time": 299.61 + }, + { + "epoch": 0.62, + "learning_rate": "9.8382e-06", + "loss": 0.5713, + "slid_loss": 0.5778, + "step": 479, + "time": 299.37 + }, + { + "epoch": 0.62, + "learning_rate": "9.8375e-06", + "loss": 0.5819, + "slid_loss": 0.5778, + "step": 480, + "time": 303.75 + }, + { + "epoch": 0.62, + "learning_rate": "9.8367e-06", + "loss": 0.6179, + "slid_loss": 0.5782, + "step": 481, + "time": 301.54 + }, + { + "epoch": 0.62, + "learning_rate": "9.8360e-06", + "loss": 0.5672, + "slid_loss": 0.5778, + "step": 482, + "time": 320.09 + }, + { + "epoch": 0.63, + "learning_rate": "9.8353e-06", + "loss": 0.5843, + "slid_loss": 0.5778, + "step": 483, + "time": 457.75 + }, + { + "epoch": 0.63, + "learning_rate": "9.8345e-06", + "loss": 0.5752, + "slid_loss": 0.5777, + "step": 484, + "time": 431.51 + }, + { + "epoch": 0.63, + "learning_rate": "9.8338e-06", + "loss": 0.5523, + "slid_loss": 0.5775, + "step": 485, + "time": 321.57 + }, + { + "epoch": 0.63, + "learning_rate": "9.8331e-06", + "loss": 0.5891, + "slid_loss": 0.5776, + "step": 486, + "time": 300.63 + }, + { + "epoch": 0.63, + "learning_rate": "9.8323e-06", + "loss": 0.5725, + "slid_loss": 0.5778, + "step": 487, + "time": 303.72 + }, + { + "epoch": 0.63, + "learning_rate": "9.8316e-06", + "loss": 0.5792, + "slid_loss": 0.5777, + "step": 488, + "time": 298.59 + }, + { + "epoch": 0.63, + "learning_rate": "9.8308e-06", + "loss": 0.5648, + "slid_loss": 0.5776, + "step": 489, + "time": 298.27 + }, + { + "epoch": 0.63, + "learning_rate": "9.8301e-06", + "loss": 0.5668, + "slid_loss": 0.5777, + "step": 490, + "time": 305.54 + }, + { + "epoch": 0.64, + "learning_rate": "9.8293e-06", + "loss": 0.568, + "slid_loss": 0.5777, + "step": 491, + "time": 303.7 + }, + { + "epoch": 0.64, + "learning_rate": "9.8286e-06", + "loss": 0.5719, + "slid_loss": 0.5775, + "step": 492, + "time": 300.89 + }, + { + "epoch": 0.64, + "learning_rate": "9.8278e-06", + "loss": 0.6006, + "slid_loss": 0.5776, + "step": 493, + "time": 297.17 + }, + { + "epoch": 0.64, + "learning_rate": "9.8271e-06", + "loss": 0.6049, + "slid_loss": 0.5781, + "step": 494, + "time": 302.1 + }, + { + "epoch": 0.64, + "learning_rate": "9.8263e-06", + "loss": 0.5823, + "slid_loss": 0.578, + "step": 495, + "time": 304.26 + }, + { + "epoch": 0.64, + "learning_rate": "9.8256e-06", + "loss": 0.5863, + "slid_loss": 0.5779, + "step": 496, + "time": 300.98 + }, + { + "epoch": 0.64, + "learning_rate": "9.8248e-06", + "loss": 0.5983, + "slid_loss": 0.5778, + "step": 497, + "time": 295.65 + }, + { + "epoch": 0.64, + "learning_rate": "9.8241e-06", + "loss": 0.5871, + "slid_loss": 0.578, + "step": 498, + "time": 301.29 + }, + { + "epoch": 0.65, + "learning_rate": "9.8233e-06", + "loss": 0.5856, + "slid_loss": 0.5781, + "step": 499, + "time": 298.92 + }, + { + "epoch": 0.65, + "learning_rate": "9.8226e-06", + "loss": 0.5778, + "slid_loss": 0.578, + "step": 500, + "time": 303.74 + }, + { + "epoch": 0.65, + "learning_rate": "9.8218e-06", + "loss": 0.5678, + "slid_loss": 0.5778, + "step": 501, + "time": 2049.88 + }, + { + "epoch": 0.65, + "learning_rate": "9.8210e-06", + "loss": 0.591, + "slid_loss": 0.5782, + "step": 502, + "time": 297.95 + }, + { + "epoch": 0.65, + "learning_rate": "9.8203e-06", + "loss": 0.5714, + "slid_loss": 0.578, + "step": 503, + "time": 302.64 + }, + { + "epoch": 0.65, + "learning_rate": "9.8195e-06", + "loss": 0.5987, + "slid_loss": 0.5782, + "step": 504, + "time": 301.99 + }, + { + "epoch": 0.65, + "learning_rate": "9.8187e-06", + "loss": 0.575, + "slid_loss": 0.5783, + "step": 505, + "time": 297.94 + }, + { + "epoch": 0.66, + "learning_rate": "9.8180e-06", + "loss": 0.5822, + "slid_loss": 0.5785, + "step": 506, + "time": 293.53 + }, + { + "epoch": 0.66, + "learning_rate": "9.8172e-06", + "loss": 0.5876, + "slid_loss": 0.5788, + "step": 507, + "time": 300.75 + }, + { + "epoch": 0.66, + "learning_rate": "9.8164e-06", + "loss": 0.5721, + "slid_loss": 0.5789, + "step": 508, + "time": 303.66 + }, + { + "epoch": 0.66, + "learning_rate": "9.8157e-06", + "loss": 0.5543, + "slid_loss": 0.5788, + "step": 509, + "time": 302.79 + }, + { + "epoch": 0.66, + "learning_rate": "9.8149e-06", + "loss": 0.5652, + "slid_loss": 0.5787, + "step": 510, + "time": 300.58 + }, + { + "epoch": 0.66, + "learning_rate": "9.8141e-06", + "loss": 0.6082, + "slid_loss": 0.579, + "step": 511, + "time": 305.49 + }, + { + "epoch": 0.66, + "learning_rate": "9.8133e-06", + "loss": 0.5794, + "slid_loss": 0.5792, + "step": 512, + "time": 301.02 + }, + { + "epoch": 0.66, + "learning_rate": "9.8125e-06", + "loss": 0.5701, + "slid_loss": 0.5792, + "step": 513, + "time": 303.23 + }, + { + "epoch": 0.67, + "learning_rate": "9.8118e-06", + "loss": 0.5764, + "slid_loss": 0.579, + "step": 514, + "time": 300.22 + }, + { + "epoch": 0.67, + "learning_rate": "9.8110e-06", + "loss": 0.5837, + "slid_loss": 0.5791, + "step": 515, + "time": 305.91 + }, + { + "epoch": 0.67, + "learning_rate": "9.8102e-06", + "loss": 0.5901, + "slid_loss": 0.5792, + "step": 516, + "time": 301.4 + }, + { + "epoch": 0.67, + "learning_rate": "9.8094e-06", + "loss": 0.5621, + "slid_loss": 0.579, + "step": 517, + "time": 296.36 + }, + { + "epoch": 0.67, + "learning_rate": "9.8086e-06", + "loss": 0.5756, + "slid_loss": 0.5791, + "step": 518, + "time": 301.91 + }, + { + "epoch": 0.67, + "learning_rate": "9.8078e-06", + "loss": 0.5763, + "slid_loss": 0.5792, + "step": 519, + "time": 301.14 + }, + { + "epoch": 0.67, + "learning_rate": "9.8070e-06", + "loss": 0.5869, + "slid_loss": 0.5795, + "step": 520, + "time": 296.65 + }, + { + "epoch": 0.67, + "learning_rate": "9.8062e-06", + "loss": 0.5763, + "slid_loss": 0.5798, + "step": 521, + "time": 299.31 + }, + { + "epoch": 0.68, + "learning_rate": "9.8054e-06", + "loss": 0.5921, + "slid_loss": 0.5798, + "step": 522, + "time": 300.99 + }, + { + "epoch": 0.68, + "learning_rate": "9.8047e-06", + "loss": 0.5499, + "slid_loss": 0.5793, + "step": 523, + "time": 300.13 + }, + { + "epoch": 0.68, + "learning_rate": "9.8039e-06", + "loss": 0.5583, + "slid_loss": 0.5792, + "step": 524, + "time": 298.62 + }, + { + "epoch": 0.68, + "learning_rate": "9.8031e-06", + "loss": 0.5705, + "slid_loss": 0.579, + "step": 525, + "time": 298.48 + }, + { + "epoch": 0.68, + "learning_rate": "9.8023e-06", + "loss": 0.618, + "slid_loss": 0.5794, + "step": 526, + "time": 296.88 + }, + { + "epoch": 0.68, + "learning_rate": "9.8015e-06", + "loss": 0.5895, + "slid_loss": 0.5795, + "step": 527, + "time": 296.39 + }, + { + "epoch": 0.68, + "learning_rate": "9.8007e-06", + "loss": 0.5516, + "slid_loss": 0.579, + "step": 528, + "time": 299.57 + }, + { + "epoch": 0.69, + "learning_rate": "9.7998e-06", + "loss": 0.593, + "slid_loss": 0.5792, + "step": 529, + "time": 302.3 + }, + { + "epoch": 0.69, + "learning_rate": "9.7990e-06", + "loss": 0.5789, + "slid_loss": 0.5791, + "step": 530, + "time": 300.1 + }, + { + "epoch": 0.69, + "learning_rate": "9.7982e-06", + "loss": 0.5832, + "slid_loss": 0.5788, + "step": 531, + "time": 300.39 + }, + { + "epoch": 0.69, + "learning_rate": "9.7974e-06", + "loss": 0.5875, + "slid_loss": 0.5788, + "step": 532, + "time": 302.23 + }, + { + "epoch": 0.69, + "learning_rate": "9.7966e-06", + "loss": 0.5849, + "slid_loss": 0.5787, + "step": 533, + "time": 304.22 + }, + { + "epoch": 0.69, + "learning_rate": "9.7958e-06", + "loss": 0.574, + "slid_loss": 0.5787, + "step": 534, + "time": 296.74 + }, + { + "epoch": 0.69, + "learning_rate": "9.7950e-06", + "loss": 0.5671, + "slid_loss": 0.5785, + "step": 535, + "time": 298.14 + }, + { + "epoch": 0.69, + "learning_rate": "9.7942e-06", + "loss": 0.5839, + "slid_loss": 0.5785, + "step": 536, + "time": 294.14 + }, + { + "epoch": 0.7, + "learning_rate": "9.7933e-06", + "loss": 0.5725, + "slid_loss": 0.5784, + "step": 537, + "time": 299.7 + }, + { + "epoch": 0.7, + "learning_rate": "9.7925e-06", + "loss": 0.5664, + "slid_loss": 0.5783, + "step": 538, + "time": 293.94 + }, + { + "epoch": 0.7, + "learning_rate": "9.7917e-06", + "loss": 0.581, + "slid_loss": 0.5783, + "step": 539, + "time": 299.01 + }, + { + "epoch": 0.7, + "learning_rate": "9.7909e-06", + "loss": 0.6031, + "slid_loss": 0.5786, + "step": 540, + "time": 299.36 + }, + { + "epoch": 0.7, + "learning_rate": "9.7901e-06", + "loss": 0.5569, + "slid_loss": 0.5785, + "step": 541, + "time": 302.66 + }, + { + "epoch": 0.7, + "learning_rate": "9.7892e-06", + "loss": 0.5837, + "slid_loss": 0.5784, + "step": 542, + "time": 297.96 + }, + { + "epoch": 0.7, + "learning_rate": "9.7884e-06", + "loss": 0.5852, + "slid_loss": 0.5782, + "step": 543, + "time": 302.07 + }, + { + "epoch": 0.7, + "learning_rate": "9.7876e-06", + "loss": 0.5566, + "slid_loss": 0.5779, + "step": 544, + "time": 299.15 + }, + { + "epoch": 0.71, + "learning_rate": "9.7868e-06", + "loss": 0.5821, + "slid_loss": 0.5778, + "step": 545, + "time": 295.31 + }, + { + "epoch": 0.71, + "learning_rate": "9.7859e-06", + "loss": 0.5775, + "slid_loss": 0.5783, + "step": 546, + "time": 302.12 + }, + { + "epoch": 0.71, + "learning_rate": "9.7851e-06", + "loss": 0.6021, + "slid_loss": 0.5785, + "step": 547, + "time": 298.46 + }, + { + "epoch": 0.71, + "learning_rate": "9.7843e-06", + "loss": 0.6014, + "slid_loss": 0.5786, + "step": 548, + "time": 297.01 + }, + { + "epoch": 0.71, + "learning_rate": "9.7834e-06", + "loss": 0.5815, + "slid_loss": 0.5789, + "step": 549, + "time": 303.58 + }, + { + "epoch": 0.71, + "learning_rate": "9.7826e-06", + "loss": 0.5721, + "slid_loss": 0.579, + "step": 550, + "time": 293.17 + }, + { + "epoch": 0.71, + "learning_rate": "9.7817e-06", + "loss": 0.5768, + "slid_loss": 0.5792, + "step": 551, + "time": 297.87 + }, + { + "epoch": 0.71, + "learning_rate": "9.7809e-06", + "loss": 0.5622, + "slid_loss": 0.5792, + "step": 552, + "time": 303.11 + }, + { + "epoch": 0.72, + "learning_rate": "9.7801e-06", + "loss": 0.5484, + "slid_loss": 0.5789, + "step": 553, + "time": 298.86 + }, + { + "epoch": 0.72, + "learning_rate": "9.7792e-06", + "loss": 0.5793, + "slid_loss": 0.5789, + "step": 554, + "time": 300.88 + }, + { + "epoch": 0.72, + "learning_rate": "9.7784e-06", + "loss": 0.5794, + "slid_loss": 0.5791, + "step": 555, + "time": 297.44 + }, + { + "epoch": 0.72, + "learning_rate": "9.7775e-06", + "loss": 0.5711, + "slid_loss": 0.5791, + "step": 556, + "time": 297.39 + }, + { + "epoch": 0.72, + "learning_rate": "9.7767e-06", + "loss": 0.5841, + "slid_loss": 0.579, + "step": 557, + "time": 299.79 + }, + { + "epoch": 0.72, + "learning_rate": "9.7758e-06", + "loss": 0.5912, + "slid_loss": 0.5793, + "step": 558, + "time": 298.87 + }, + { + "epoch": 0.72, + "learning_rate": "9.7750e-06", + "loss": 0.5774, + "slid_loss": 0.5791, + "step": 559, + "time": 301.18 + }, + { + "epoch": 0.73, + "learning_rate": "9.7741e-06", + "loss": 0.5456, + "slid_loss": 0.5785, + "step": 560, + "time": 298.72 + }, + { + "epoch": 0.73, + "learning_rate": "9.7733e-06", + "loss": 0.5367, + "slid_loss": 0.5782, + "step": 561, + "time": 299.38 + }, + { + "epoch": 0.73, + "learning_rate": "9.7724e-06", + "loss": 0.5657, + "slid_loss": 0.5783, + "step": 562, + "time": 298.86 + }, + { + "epoch": 0.73, + "learning_rate": "9.7716e-06", + "loss": 0.5877, + "slid_loss": 0.5787, + "step": 563, + "time": 303.56 + }, + { + "epoch": 0.73, + "learning_rate": "9.7707e-06", + "loss": 0.5731, + "slid_loss": 0.579, + "step": 564, + "time": 302.98 + }, + { + "epoch": 0.73, + "learning_rate": "9.7698e-06", + "loss": 0.5662, + "slid_loss": 0.5789, + "step": 565, + "time": 299.55 + }, + { + "epoch": 0.73, + "learning_rate": "9.7690e-06", + "loss": 0.5598, + "slid_loss": 0.5787, + "step": 566, + "time": 296.33 + }, + { + "epoch": 0.73, + "learning_rate": "9.7681e-06", + "loss": 0.5672, + "slid_loss": 0.5784, + "step": 567, + "time": 299.91 + }, + { + "epoch": 0.74, + "learning_rate": "9.7672e-06", + "loss": 0.5767, + "slid_loss": 0.5781, + "step": 568, + "time": 300.64 + }, + { + "epoch": 0.74, + "learning_rate": "9.7664e-06", + "loss": 0.5659, + "slid_loss": 0.5778, + "step": 569, + "time": 302.66 + }, + { + "epoch": 0.74, + "learning_rate": "9.7655e-06", + "loss": 0.5682, + "slid_loss": 0.5777, + "step": 570, + "time": 298.52 + }, + { + "epoch": 0.74, + "learning_rate": "9.7646e-06", + "loss": 0.5822, + "slid_loss": 0.5776, + "step": 571, + "time": 302.79 + }, + { + "epoch": 0.74, + "learning_rate": "9.7638e-06", + "loss": 0.587, + "slid_loss": 0.5777, + "step": 572, + "time": 303.17 + }, + { + "epoch": 0.74, + "learning_rate": "9.7629e-06", + "loss": 0.5786, + "slid_loss": 0.5777, + "step": 573, + "time": 295.3 + }, + { + "epoch": 0.74, + "learning_rate": "9.7620e-06", + "loss": 0.6122, + "slid_loss": 0.578, + "step": 574, + "time": 303.38 + }, + { + "epoch": 0.74, + "learning_rate": "9.7611e-06", + "loss": 0.5696, + "slid_loss": 0.5781, + "step": 575, + "time": 300.1 + }, + { + "epoch": 0.75, + "learning_rate": "9.7603e-06", + "loss": 0.5628, + "slid_loss": 0.5776, + "step": 576, + "time": 300.2 + }, + { + "epoch": 0.75, + "learning_rate": "9.7594e-06", + "loss": 0.5719, + "slid_loss": 0.5777, + "step": 577, + "time": 300.67 + }, + { + "epoch": 0.75, + "learning_rate": "9.7585e-06", + "loss": 0.592, + "slid_loss": 0.5778, + "step": 578, + "time": 302.63 + }, + { + "epoch": 0.75, + "learning_rate": "9.7576e-06", + "loss": 0.5533, + "slid_loss": 0.5776, + "step": 579, + "time": 303.71 + }, + { + "epoch": 0.75, + "learning_rate": "9.7567e-06", + "loss": 0.5822, + "slid_loss": 0.5776, + "step": 580, + "time": 301.66 + }, + { + "epoch": 0.75, + "learning_rate": "9.7559e-06", + "loss": 0.5546, + "slid_loss": 0.577, + "step": 581, + "time": 299.5 + }, + { + "epoch": 0.75, + "learning_rate": "9.7550e-06", + "loss": 0.5803, + "slid_loss": 0.5771, + "step": 582, + "time": 299.14 + }, + { + "epoch": 0.75, + "learning_rate": "9.7541e-06", + "loss": 0.5686, + "slid_loss": 0.577, + "step": 583, + "time": 304.85 + }, + { + "epoch": 0.76, + "learning_rate": "9.7532e-06", + "loss": 0.5827, + "slid_loss": 0.577, + "step": 584, + "time": 304.42 + }, + { + "epoch": 0.76, + "learning_rate": "9.7523e-06", + "loss": 0.5796, + "slid_loss": 0.5773, + "step": 585, + "time": 304.0 + }, + { + "epoch": 0.76, + "learning_rate": "9.7514e-06", + "loss": 0.5625, + "slid_loss": 0.577, + "step": 586, + "time": 297.89 + }, + { + "epoch": 0.76, + "learning_rate": "9.7505e-06", + "loss": 0.5733, + "slid_loss": 0.5771, + "step": 587, + "time": 298.03 + }, + { + "epoch": 0.76, + "learning_rate": "9.7496e-06", + "loss": 0.5815, + "slid_loss": 0.5771, + "step": 588, + "time": 303.5 + }, + { + "epoch": 0.76, + "learning_rate": "9.7487e-06", + "loss": 0.5898, + "slid_loss": 0.5773, + "step": 589, + "time": 301.59 + }, + { + "epoch": 0.76, + "learning_rate": "9.7478e-06", + "loss": 0.5994, + "slid_loss": 0.5777, + "step": 590, + "time": 303.28 + }, + { + "epoch": 0.77, + "learning_rate": "9.7469e-06", + "loss": 0.5832, + "slid_loss": 0.5778, + "step": 591, + "time": 294.78 + }, + { + "epoch": 0.77, + "learning_rate": "9.7460e-06", + "loss": 0.5676, + "slid_loss": 0.5778, + "step": 592, + "time": 304.99 + }, + { + "epoch": 0.77, + "learning_rate": "9.7451e-06", + "loss": 0.5801, + "slid_loss": 0.5776, + "step": 593, + "time": 298.88 + }, + { + "epoch": 0.77, + "learning_rate": "9.7442e-06", + "loss": 0.5912, + "slid_loss": 0.5774, + "step": 594, + "time": 295.82 + }, + { + "epoch": 0.77, + "learning_rate": "9.7433e-06", + "loss": 0.5739, + "slid_loss": 0.5773, + "step": 595, + "time": 300.49 + }, + { + "epoch": 0.77, + "learning_rate": "9.7424e-06", + "loss": 0.5688, + "slid_loss": 0.5772, + "step": 596, + "time": 294.8 + }, + { + "epoch": 0.77, + "learning_rate": "9.7415e-06", + "loss": 0.5834, + "slid_loss": 0.577, + "step": 597, + "time": 303.61 + }, + { + "epoch": 0.77, + "learning_rate": "9.7406e-06", + "loss": 0.5988, + "slid_loss": 0.5771, + "step": 598, + "time": 296.42 + }, + { + "epoch": 0.78, + "learning_rate": "9.7397e-06", + "loss": 0.578, + "slid_loss": 0.5771, + "step": 599, + "time": 293.08 + }, + { + "epoch": 0.78, + "learning_rate": "9.7387e-06", + "loss": 0.5955, + "slid_loss": 0.5772, + "step": 600, + "time": 299.55 + }, + { + "epoch": 0.78, + "learning_rate": "9.7378e-06", + "loss": 0.5693, + "slid_loss": 0.5772, + "step": 601, + "time": 2053.43 + }, + { + "epoch": 0.78, + "learning_rate": "9.7369e-06", + "loss": 0.5697, + "slid_loss": 0.577, + "step": 602, + "time": 297.5 + }, + { + "epoch": 0.78, + "learning_rate": "9.7360e-06", + "loss": 0.5897, + "slid_loss": 0.5772, + "step": 603, + "time": 355.72 + }, + { + "epoch": 0.78, + "learning_rate": "9.7351e-06", + "loss": 0.5582, + "slid_loss": 0.5768, + "step": 604, + "time": 543.71 + }, + { + "epoch": 0.78, + "learning_rate": "9.7341e-06", + "loss": 0.5588, + "slid_loss": 0.5766, + "step": 605, + "time": 450.24 + }, + { + "epoch": 0.78, + "learning_rate": "9.7332e-06", + "loss": 0.5721, + "slid_loss": 0.5765, + "step": 606, + "time": 309.18 + }, + { + "epoch": 0.79, + "learning_rate": "9.7323e-06", + "loss": 0.5639, + "slid_loss": 0.5763, + "step": 607, + "time": 300.7 + }, + { + "epoch": 0.79, + "learning_rate": "9.7314e-06", + "loss": 0.567, + "slid_loss": 0.5763, + "step": 608, + "time": 301.61 + }, + { + "epoch": 0.79, + "learning_rate": "9.7304e-06", + "loss": 0.5601, + "slid_loss": 0.5763, + "step": 609, + "time": 300.66 + }, + { + "epoch": 0.79, + "learning_rate": "9.7295e-06", + "loss": 0.5744, + "slid_loss": 0.5764, + "step": 610, + "time": 296.57 + }, + { + "epoch": 0.79, + "learning_rate": "9.7286e-06", + "loss": 0.5749, + "slid_loss": 0.5761, + "step": 611, + "time": 294.55 + }, + { + "epoch": 0.79, + "learning_rate": "9.7277e-06", + "loss": 0.5752, + "slid_loss": 0.576, + "step": 612, + "time": 296.3 + }, + { + "epoch": 0.79, + "learning_rate": "9.7267e-06", + "loss": 0.5614, + "slid_loss": 0.5759, + "step": 613, + "time": 298.5 + }, + { + "epoch": 0.8, + "learning_rate": "9.7258e-06", + "loss": 0.5752, + "slid_loss": 0.5759, + "step": 614, + "time": 292.6 + }, + { + "epoch": 0.8, + "learning_rate": "9.7249e-06", + "loss": 0.587, + "slid_loss": 0.576, + "step": 615, + "time": 297.26 + }, + { + "epoch": 0.8, + "learning_rate": "9.7239e-06", + "loss": 0.5631, + "slid_loss": 0.5757, + "step": 616, + "time": 307.76 + }, + { + "epoch": 0.8, + "learning_rate": "9.7230e-06", + "loss": 0.6094, + "slid_loss": 0.5762, + "step": 617, + "time": 300.4 + }, + { + "epoch": 0.8, + "learning_rate": "9.7220e-06", + "loss": 0.5581, + "slid_loss": 0.576, + "step": 618, + "time": 298.17 + }, + { + "epoch": 0.8, + "learning_rate": "9.7211e-06", + "loss": 0.5693, + "slid_loss": 0.5759, + "step": 619, + "time": 299.92 + }, + { + "epoch": 0.8, + "learning_rate": "9.7201e-06", + "loss": 0.5682, + "slid_loss": 0.5757, + "step": 620, + "time": 302.01 + }, + { + "epoch": 0.8, + "learning_rate": "9.7192e-06", + "loss": 0.5668, + "slid_loss": 0.5756, + "step": 621, + "time": 302.89 + }, + { + "epoch": 0.81, + "learning_rate": "9.7183e-06", + "loss": 0.5831, + "slid_loss": 0.5755, + "step": 622, + "time": 300.0 + }, + { + "epoch": 0.81, + "learning_rate": "9.7173e-06", + "loss": 0.5892, + "slid_loss": 0.5759, + "step": 623, + "time": 297.76 + }, + { + "epoch": 0.81, + "learning_rate": "9.7164e-06", + "loss": 0.5668, + "slid_loss": 0.576, + "step": 624, + "time": 302.83 + }, + { + "epoch": 0.81, + "learning_rate": "9.7154e-06", + "loss": 0.5879, + "slid_loss": 0.5762, + "step": 625, + "time": 308.26 + }, + { + "epoch": 0.81, + "learning_rate": "9.7144e-06", + "loss": 0.5769, + "slid_loss": 0.5758, + "step": 626, + "time": 298.8 + }, + { + "epoch": 0.81, + "learning_rate": "9.7135e-06", + "loss": 0.5729, + "slid_loss": 0.5756, + "step": 627, + "time": 302.73 + }, + { + "epoch": 0.81, + "learning_rate": "9.7125e-06", + "loss": 0.5622, + "slid_loss": 0.5757, + "step": 628, + "time": 304.61 + }, + { + "epoch": 0.81, + "learning_rate": "9.7116e-06", + "loss": 0.5882, + "slid_loss": 0.5757, + "step": 629, + "time": 302.35 + }, + { + "epoch": 0.82, + "learning_rate": "9.7106e-06", + "loss": 0.5668, + "slid_loss": 0.5756, + "step": 630, + "time": 301.02 + }, + { + "epoch": 0.82, + "learning_rate": "9.7097e-06", + "loss": 0.5877, + "slid_loss": 0.5756, + "step": 631, + "time": 300.32 + }, + { + "epoch": 0.82, + "learning_rate": "9.7087e-06", + "loss": 0.5718, + "slid_loss": 0.5754, + "step": 632, + "time": 293.92 + }, + { + "epoch": 0.82, + "learning_rate": "9.7077e-06", + "loss": 0.5553, + "slid_loss": 0.5752, + "step": 633, + "time": 296.84 + }, + { + "epoch": 0.82, + "learning_rate": "9.7068e-06", + "loss": 0.5692, + "slid_loss": 0.5751, + "step": 634, + "time": 303.08 + }, + { + "epoch": 0.82, + "learning_rate": "9.7058e-06", + "loss": 0.5597, + "slid_loss": 0.575, + "step": 635, + "time": 304.45 + }, + { + "epoch": 0.82, + "learning_rate": "9.7048e-06", + "loss": 0.5771, + "slid_loss": 0.575, + "step": 636, + "time": 300.98 + }, + { + "epoch": 0.82, + "learning_rate": "9.7039e-06", + "loss": 0.5856, + "slid_loss": 0.5751, + "step": 637, + "time": 303.39 + }, + { + "epoch": 0.83, + "learning_rate": "9.7029e-06", + "loss": 0.5705, + "slid_loss": 0.5751, + "step": 638, + "time": 300.56 + }, + { + "epoch": 0.83, + "learning_rate": "9.7019e-06", + "loss": 0.5753, + "slid_loss": 0.5751, + "step": 639, + "time": 302.55 + }, + { + "epoch": 0.83, + "learning_rate": "9.7009e-06", + "loss": 0.5651, + "slid_loss": 0.5747, + "step": 640, + "time": 300.37 + }, + { + "epoch": 0.83, + "learning_rate": "9.7000e-06", + "loss": 0.5477, + "slid_loss": 0.5746, + "step": 641, + "time": 299.12 + }, + { + "epoch": 0.83, + "learning_rate": "9.6990e-06", + "loss": 0.5573, + "slid_loss": 0.5743, + "step": 642, + "time": 302.26 + }, + { + "epoch": 0.83, + "learning_rate": "9.6980e-06", + "loss": 0.5694, + "slid_loss": 0.5742, + "step": 643, + "time": 300.38 + }, + { + "epoch": 0.83, + "learning_rate": "9.6970e-06", + "loss": 0.596, + "slid_loss": 0.5746, + "step": 644, + "time": 303.72 + }, + { + "epoch": 0.84, + "learning_rate": "9.6961e-06", + "loss": 0.568, + "slid_loss": 0.5744, + "step": 645, + "time": 299.1 + }, + { + "epoch": 0.84, + "learning_rate": "9.6951e-06", + "loss": 0.5573, + "slid_loss": 0.5742, + "step": 646, + "time": 296.37 + }, + { + "epoch": 0.84, + "learning_rate": "9.6941e-06", + "loss": 0.572, + "slid_loss": 0.5739, + "step": 647, + "time": 302.76 + }, + { + "epoch": 0.84, + "learning_rate": "9.6931e-06", + "loss": 0.6008, + "slid_loss": 0.5739, + "step": 648, + "time": 304.74 + }, + { + "epoch": 0.84, + "learning_rate": "9.6921e-06", + "loss": 0.5735, + "slid_loss": 0.5738, + "step": 649, + "time": 303.11 + }, + { + "epoch": 0.84, + "learning_rate": "9.6911e-06", + "loss": 0.5629, + "slid_loss": 0.5738, + "step": 650, + "time": 301.38 + }, + { + "epoch": 0.84, + "learning_rate": "9.6901e-06", + "loss": 0.5738, + "slid_loss": 0.5737, + "step": 651, + "time": 303.03 + }, + { + "epoch": 0.84, + "learning_rate": "9.6891e-06", + "loss": 0.5789, + "slid_loss": 0.5739, + "step": 652, + "time": 299.84 + }, + { + "epoch": 0.85, + "learning_rate": "9.6881e-06", + "loss": 0.5989, + "slid_loss": 0.5744, + "step": 653, + "time": 304.95 + }, + { + "epoch": 0.85, + "learning_rate": "9.6872e-06", + "loss": 0.5602, + "slid_loss": 0.5742, + "step": 654, + "time": 305.98 + }, + { + "epoch": 0.85, + "learning_rate": "9.6862e-06", + "loss": 0.5695, + "slid_loss": 0.5741, + "step": 655, + "time": 299.64 + }, + { + "epoch": 0.85, + "learning_rate": "9.6852e-06", + "loss": 0.5742, + "slid_loss": 0.5741, + "step": 656, + "time": 300.47 + }, + { + "epoch": 0.85, + "learning_rate": "9.6842e-06", + "loss": 0.5649, + "slid_loss": 0.5739, + "step": 657, + "time": 299.06 + }, + { + "epoch": 0.85, + "learning_rate": "9.6832e-06", + "loss": 0.5553, + "slid_loss": 0.5736, + "step": 658, + "time": 302.71 + }, + { + "epoch": 0.85, + "learning_rate": "9.6822e-06", + "loss": 0.5709, + "slid_loss": 0.5735, + "step": 659, + "time": 302.34 + }, + { + "epoch": 0.85, + "learning_rate": "9.6812e-06", + "loss": 0.5565, + "slid_loss": 0.5736, + "step": 660, + "time": 298.23 + }, + { + "epoch": 0.86, + "learning_rate": "9.6801e-06", + "loss": 0.573, + "slid_loss": 0.574, + "step": 661, + "time": 295.97 + }, + { + "epoch": 0.86, + "learning_rate": "9.6791e-06", + "loss": 0.5797, + "slid_loss": 0.5741, + "step": 662, + "time": 299.7 + }, + { + "epoch": 0.86, + "learning_rate": "9.6781e-06", + "loss": 0.5464, + "slid_loss": 0.5737, + "step": 663, + "time": 299.82 + }, + { + "epoch": 0.86, + "learning_rate": "9.6771e-06", + "loss": 0.5701, + "slid_loss": 0.5737, + "step": 664, + "time": 305.05 + }, + { + "epoch": 0.86, + "learning_rate": "9.6761e-06", + "loss": 0.5567, + "slid_loss": 0.5736, + "step": 665, + "time": 299.79 + }, + { + "epoch": 0.86, + "learning_rate": "9.6751e-06", + "loss": 0.5588, + "slid_loss": 0.5736, + "step": 666, + "time": 300.74 + }, + { + "epoch": 0.86, + "learning_rate": "9.6741e-06", + "loss": 0.5927, + "slid_loss": 0.5738, + "step": 667, + "time": 300.0 + }, + { + "epoch": 0.87, + "learning_rate": "9.6731e-06", + "loss": 0.5572, + "slid_loss": 0.5736, + "step": 668, + "time": 301.77 + }, + { + "epoch": 0.87, + "learning_rate": "9.6721e-06", + "loss": 0.5461, + "slid_loss": 0.5734, + "step": 669, + "time": 297.96 + }, + { + "epoch": 0.87, + "learning_rate": "9.6710e-06", + "loss": 0.564, + "slid_loss": 0.5734, + "step": 670, + "time": 301.97 + }, + { + "epoch": 0.87, + "learning_rate": "9.6700e-06", + "loss": 0.5656, + "slid_loss": 0.5732, + "step": 671, + "time": 299.58 + }, + { + "epoch": 0.87, + "learning_rate": "9.6690e-06", + "loss": 0.5534, + "slid_loss": 0.5729, + "step": 672, + "time": 296.02 + }, + { + "epoch": 0.87, + "learning_rate": "9.6680e-06", + "loss": 0.5292, + "slid_loss": 0.5724, + "step": 673, + "time": 302.8 + }, + { + "epoch": 0.87, + "learning_rate": "9.6669e-06", + "loss": 0.5549, + "slid_loss": 0.5718, + "step": 674, + "time": 301.94 + }, + { + "epoch": 0.87, + "learning_rate": "9.6659e-06", + "loss": 0.5647, + "slid_loss": 0.5718, + "step": 675, + "time": 300.67 + }, + { + "epoch": 0.88, + "learning_rate": "9.6649e-06", + "loss": 0.555, + "slid_loss": 0.5717, + "step": 676, + "time": 296.84 + }, + { + "epoch": 0.88, + "learning_rate": "9.6639e-06", + "loss": 0.5872, + "slid_loss": 0.5719, + "step": 677, + "time": 303.18 + }, + { + "epoch": 0.88, + "learning_rate": "9.6628e-06", + "loss": 0.5538, + "slid_loss": 0.5715, + "step": 678, + "time": 303.01 + }, + { + "epoch": 0.88, + "learning_rate": "9.6618e-06", + "loss": 0.5584, + "slid_loss": 0.5715, + "step": 679, + "time": 302.31 + }, + { + "epoch": 0.88, + "learning_rate": "9.6608e-06", + "loss": 0.5835, + "slid_loss": 0.5715, + "step": 680, + "time": 302.48 + }, + { + "epoch": 0.88, + "learning_rate": "9.6597e-06", + "loss": 0.5938, + "slid_loss": 0.5719, + "step": 681, + "time": 298.4 + }, + { + "epoch": 0.88, + "learning_rate": "9.6587e-06", + "loss": 0.5878, + "slid_loss": 0.572, + "step": 682, + "time": 300.58 + }, + { + "epoch": 0.88, + "learning_rate": "9.6577e-06", + "loss": 0.5725, + "slid_loss": 0.572, + "step": 683, + "time": 302.24 + }, + { + "epoch": 0.89, + "learning_rate": "9.6566e-06", + "loss": 0.5648, + "slid_loss": 0.5719, + "step": 684, + "time": 301.51 + }, + { + "epoch": 0.89, + "learning_rate": "9.6556e-06", + "loss": 0.5772, + "slid_loss": 0.5718, + "step": 685, + "time": 298.21 + }, + { + "epoch": 0.89, + "learning_rate": "9.6545e-06", + "loss": 0.5852, + "slid_loss": 0.5721, + "step": 686, + "time": 302.79 + }, + { + "epoch": 0.89, + "learning_rate": "9.6535e-06", + "loss": 0.5793, + "slid_loss": 0.5721, + "step": 687, + "time": 299.38 + }, + { + "epoch": 0.89, + "learning_rate": "9.6525e-06", + "loss": 0.597, + "slid_loss": 0.5723, + "step": 688, + "time": 304.05 + }, + { + "epoch": 0.89, + "learning_rate": "9.6514e-06", + "loss": 0.5599, + "slid_loss": 0.572, + "step": 689, + "time": 297.95 + }, + { + "epoch": 0.89, + "learning_rate": "9.6504e-06", + "loss": 0.5822, + "slid_loss": 0.5718, + "step": 690, + "time": 304.74 + }, + { + "epoch": 0.89, + "learning_rate": "9.6493e-06", + "loss": 0.5838, + "slid_loss": 0.5718, + "step": 691, + "time": 302.75 + }, + { + "epoch": 0.9, + "learning_rate": "9.6483e-06", + "loss": 0.5442, + "slid_loss": 0.5716, + "step": 692, + "time": 300.4 + }, + { + "epoch": 0.9, + "learning_rate": "9.6472e-06", + "loss": 0.575, + "slid_loss": 0.5715, + "step": 693, + "time": 302.57 + }, + { + "epoch": 0.9, + "learning_rate": "9.6462e-06", + "loss": 0.5481, + "slid_loss": 0.5711, + "step": 694, + "time": 302.08 + }, + { + "epoch": 0.9, + "learning_rate": "9.6451e-06", + "loss": 0.5555, + "slid_loss": 0.5709, + "step": 695, + "time": 296.83 + }, + { + "epoch": 0.9, + "learning_rate": "9.6440e-06", + "loss": 0.5521, + "slid_loss": 0.5708, + "step": 696, + "time": 305.62 + }, + { + "epoch": 0.9, + "learning_rate": "9.6430e-06", + "loss": 0.5705, + "slid_loss": 0.5706, + "step": 697, + "time": 297.63 + }, + { + "epoch": 0.9, + "learning_rate": "9.6419e-06", + "loss": 0.5724, + "slid_loss": 0.5704, + "step": 698, + "time": 296.55 + }, + { + "epoch": 0.91, + "learning_rate": "9.6409e-06", + "loss": 0.5564, + "slid_loss": 0.5701, + "step": 699, + "time": 301.49 + }, + { + "epoch": 0.91, + "learning_rate": "9.6398e-06", + "loss": 0.5864, + "slid_loss": 0.5701, + "step": 700, + "time": 306.47 + } + ], + "logging_steps": 1.0, + "max_steps": 3860, + "num_train_epochs": 5, + "save_steps": 50000.0, + "total_flos": 0.0, + "trial_name": null, + "trial_params": null +}