|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 432, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 7.692307692307694e-07, |
|
"loss": 2.065, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.5384615384615387e-06, |
|
"loss": 2.0734, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.307692307692308e-06, |
|
"loss": 2.0549, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.0769230769230774e-06, |
|
"loss": 2.0151, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.846153846153847e-06, |
|
"loss": 1.997, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.615384615384616e-06, |
|
"loss": 1.9595, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 5.384615384615385e-06, |
|
"loss": 1.9403, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 6.153846153846155e-06, |
|
"loss": 1.9314, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 6.923076923076923e-06, |
|
"loss": 1.9198, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 7.692307692307694e-06, |
|
"loss": 1.9514, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 8.461538461538462e-06, |
|
"loss": 1.9133, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.230769230769232e-06, |
|
"loss": 1.9148, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1e-05, |
|
"loss": 1.896, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.999859456853116e-06, |
|
"loss": 1.8821, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.99943783531341e-06, |
|
"loss": 1.8483, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.998735159083295e-06, |
|
"loss": 1.8198, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.997751467665295e-06, |
|
"loss": 1.8479, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.996486816359851e-06, |
|
"loss": 1.8472, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.994941276262188e-06, |
|
"loss": 1.8071, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.99311493425834e-06, |
|
"loss": 1.8517, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.991007893020242e-06, |
|
"loss": 1.816, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.98862027099998e-06, |
|
"loss": 1.8063, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.985952202423116e-06, |
|
"loss": 1.8017, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.983003837281152e-06, |
|
"loss": 1.8223, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.979775341323097e-06, |
|
"loss": 1.8145, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.976266896046143e-06, |
|
"loss": 1.8268, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.972478698685463e-06, |
|
"loss": 1.7692, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.968410962203131e-06, |
|
"loss": 1.773, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.964063915276141e-06, |
|
"loss": 1.7792, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.959437802283552e-06, |
|
"loss": 1.7774, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.954532883292761e-06, |
|
"loss": 1.7651, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.949349434044862e-06, |
|
"loss": 1.8011, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.943887745939164e-06, |
|
"loss": 1.7864, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.938148126016805e-06, |
|
"loss": 1.7789, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.932130896943477e-06, |
|
"loss": 1.7514, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.925836396991309e-06, |
|
"loss": 1.7729, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.919264980019829e-06, |
|
"loss": 1.7676, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.912417015456088e-06, |
|
"loss": 1.7933, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.905292888273883e-06, |
|
"loss": 1.7307, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.897892998972113e-06, |
|
"loss": 1.7404, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.89021776355227e-06, |
|
"loss": 1.7741, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 9.882267613495049e-06, |
|
"loss": 1.7601, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.874042995736095e-06, |
|
"loss": 1.7748, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 9.865544372640872e-06, |
|
"loss": 1.7497, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 9.85677222197867e-06, |
|
"loss": 1.7459, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 9.847727036895759e-06, |
|
"loss": 1.7574, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 9.838409325887643e-06, |
|
"loss": 1.7503, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 9.828819612770497e-06, |
|
"loss": 1.7564, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 9.818958436651704e-06, |
|
"loss": 1.7693, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 9.808826351899551e-06, |
|
"loss": 1.7723, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 9.798423928112071e-06, |
|
"loss": 1.7767, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 9.787751750085014e-06, |
|
"loss": 1.7676, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 9.77681041777897e-06, |
|
"loss": 1.7143, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 9.765600546285654e-06, |
|
"loss": 1.7606, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 9.754122765793306e-06, |
|
"loss": 1.7771, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 9.742377721551286e-06, |
|
"loss": 1.7835, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 9.730366073833785e-06, |
|
"loss": 1.7591, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 9.718088497902709e-06, |
|
"loss": 1.7789, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 9.705545683969722e-06, |
|
"loss": 1.7526, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 9.692738337157441e-06, |
|
"loss": 1.7449, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 9.679667177459794e-06, |
|
"loss": 1.7617, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 9.66633293970155e-06, |
|
"loss": 1.7585, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 9.652736373497001e-06, |
|
"loss": 1.764, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 9.63887824320783e-06, |
|
"loss": 1.7151, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 9.624759327900131e-06, |
|
"loss": 1.7299, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 9.610380421300623e-06, |
|
"loss": 1.7642, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 9.595742331752014e-06, |
|
"loss": 1.7522, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 9.580845882167574e-06, |
|
"loss": 1.7303, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 9.565691909984864e-06, |
|
"loss": 1.7423, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 9.550281267118659e-06, |
|
"loss": 1.7434, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 9.534614819913056e-06, |
|
"loss": 1.6931, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 9.518693449092772e-06, |
|
"loss": 1.7438, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.502518049713633e-06, |
|
"loss": 1.6983, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.486089531112247e-06, |
|
"loss": 1.7422, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.469408816854898e-06, |
|
"loss": 1.7321, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.452476844685611e-06, |
|
"loss": 1.7252, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.435294566473453e-06, |
|
"loss": 1.6962, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.417862948158997e-06, |
|
"loss": 1.7193, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.40018296970005e-06, |
|
"loss": 1.697, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.382255625016527e-06, |
|
"loss": 1.7596, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.364081921934607e-06, |
|
"loss": 1.7381, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.345662882130056e-06, |
|
"loss": 1.7395, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 9.326999541070804e-06, |
|
"loss": 1.7373, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 9.308092947958725e-06, |
|
"loss": 1.7659, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 9.288944165670651e-06, |
|
"loss": 1.7416, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 9.269554270698636e-06, |
|
"loss": 1.7444, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 9.24992435308942e-06, |
|
"loss": 1.7693, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 9.23005551638316e-06, |
|
"loss": 1.7481, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 9.209948877551393e-06, |
|
"loss": 1.7063, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 9.189605566934235e-06, |
|
"loss": 1.7348, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.169026728176845e-06, |
|
"loss": 1.756, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.148213518165121e-06, |
|
"loss": 1.7177, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 9.127167106960682e-06, |
|
"loss": 1.7138, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 9.105888677735069e-06, |
|
"loss": 1.7358, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 9.084379426703245e-06, |
|
"loss": 1.7359, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.062640563056339e-06, |
|
"loss": 1.6954, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.040673308893677e-06, |
|
"loss": 1.7139, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.018478899154068e-06, |
|
"loss": 1.7276, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 8.996058581546386e-06, |
|
"loss": 1.7083, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 8.973413616479429e-06, |
|
"loss": 1.6832, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 8.95054527699106e-06, |
|
"loss": 1.7406, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 8.927454848676633e-06, |
|
"loss": 1.7201, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 8.904143629616735e-06, |
|
"loss": 1.7369, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 8.880612930304196e-06, |
|
"loss": 1.6823, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 8.856864073570429e-06, |
|
"loss": 1.7197, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 8.83289839451106e-06, |
|
"loss": 1.6927, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 8.80871724041087e-06, |
|
"loss": 1.7568, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.784321970668054e-06, |
|
"loss": 1.7227, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.759713956717804e-06, |
|
"loss": 1.7387, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.734894581955208e-06, |
|
"loss": 1.7293, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 8.70986524165748e-06, |
|
"loss": 1.7002, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 8.684627342905519e-06, |
|
"loss": 1.715, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 8.65918230450481e-06, |
|
"loss": 1.7531, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 8.63353155690566e-06, |
|
"loss": 1.6997, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 8.607676542122782e-06, |
|
"loss": 1.7396, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 8.581618713654239e-06, |
|
"loss": 1.7285, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 8.55535953639971e-06, |
|
"loss": 1.741, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.528900486578158e-06, |
|
"loss": 1.7045, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.502243051644838e-06, |
|
"loss": 1.6933, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.475388730207662e-06, |
|
"loss": 1.7161, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.44833903194297e-06, |
|
"loss": 1.7186, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.421095477510648e-06, |
|
"loss": 1.706, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.393659598468644e-06, |
|
"loss": 1.7607, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 8.366032937186869e-06, |
|
"loss": 1.7455, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 8.33821704676049e-06, |
|
"loss": 1.7347, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 8.310213490922616e-06, |
|
"loss": 1.6941, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 8.282023843956392e-06, |
|
"loss": 1.7156, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 8.253649690606495e-06, |
|
"loss": 1.7068, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 8.225092625990047e-06, |
|
"loss": 1.731, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 8.196354255506937e-06, |
|
"loss": 1.6977, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 8.167436194749576e-06, |
|
"loss": 1.7639, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 8.138340069412069e-06, |
|
"loss": 1.7265, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 8.109067515198822e-06, |
|
"loss": 1.7195, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 8.079620177732587e-06, |
|
"loss": 1.7243, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 8.049999712461956e-06, |
|
"loss": 1.7081, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 8.020207784568293e-06, |
|
"loss": 1.7023, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 7.990246068872111e-06, |
|
"loss": 1.7047, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 7.960116249738939e-06, |
|
"loss": 1.688, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 7.92982002098461e-06, |
|
"loss": 1.7059, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 7.899359085780062e-06, |
|
"loss": 1.6971, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 7.868735156555567e-06, |
|
"loss": 1.6947, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 7.83794995490448e-06, |
|
"loss": 1.7479, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 7.807005211486445e-06, |
|
"loss": 1.7464, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 7.775902665930114e-06, |
|
"loss": 1.7386, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 7.744644066735335e-06, |
|
"loss": 1.6369, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 7.713231171174868e-06, |
|
"loss": 1.6396, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 7.681665745195593e-06, |
|
"loss": 1.6174, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 7.649949563319228e-06, |
|
"loss": 1.5872, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 7.618084408542576e-06, |
|
"loss": 1.5716, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 7.586072072237291e-06, |
|
"loss": 1.5423, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 7.5539143540491635e-06, |
|
"loss": 1.5086, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 7.521613061796957e-06, |
|
"loss": 1.5367, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 7.48917001137078e-06, |
|
"loss": 1.5399, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 7.456587026629991e-06, |
|
"loss": 1.594, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 7.423865939300674e-06, |
|
"loss": 1.5454, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 7.391008588872661e-06, |
|
"loss": 1.5158, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 7.358016822496126e-06, |
|
"loss": 1.4813, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 7.324892494877734e-06, |
|
"loss": 1.4816, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 7.29163746817638e-06, |
|
"loss": 1.4641, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 7.258253611898509e-06, |
|
"loss": 1.374, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 7.224742802793005e-06, |
|
"loss": 1.4205, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 7.191106924745695e-06, |
|
"loss": 1.4606, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 7.157347868673441e-06, |
|
"loss": 1.4101, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 7.1234675324178295e-06, |
|
"loss": 1.4522, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 7.089467820638491e-06, |
|
"loss": 1.4079, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 7.055350644706023e-06, |
|
"loss": 1.4163, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 7.021117922594532e-06, |
|
"loss": 1.3857, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 6.986771578773812e-06, |
|
"loss": 1.4118, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 6.952313544101165e-06, |
|
"loss": 1.4108, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 6.917745755712839e-06, |
|
"loss": 1.4318, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 6.8830701569151394e-06, |
|
"loss": 1.3488, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 6.8482886970751785e-06, |
|
"loss": 1.3547, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 6.81340333151128e-06, |
|
"loss": 1.3479, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 6.7784160213830696e-06, |
|
"loss": 1.3658, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 6.743328733581211e-06, |
|
"loss": 1.3601, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 6.708143440616845e-06, |
|
"loss": 1.4016, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 6.672862120510688e-06, |
|
"loss": 1.3709, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 6.637486756681843e-06, |
|
"loss": 1.3934, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 6.602019337836291e-06, |
|
"loss": 1.3734, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 6.566461857855096e-06, |
|
"loss": 1.3656, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 6.5308163156823064e-06, |
|
"loss": 1.3864, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 6.495084715212597e-06, |
|
"loss": 1.4066, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 6.459269065178592e-06, |
|
"loss": 1.3613, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 6.423371379037957e-06, |
|
"loss": 1.3644, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 6.387393674860205e-06, |
|
"loss": 1.4032, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 6.351337975213239e-06, |
|
"loss": 1.3797, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 6.315206307049656e-06, |
|
"loss": 1.4222, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 6.2790007015927946e-06, |
|
"loss": 1.3548, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 6.242723194222546e-06, |
|
"loss": 1.3757, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 6.2063758243609275e-06, |
|
"loss": 1.3396, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 6.169960635357437e-06, |
|
"loss": 1.3686, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 6.133479674374176e-06, |
|
"loss": 1.3789, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 6.0969349922707675e-06, |
|
"loss": 1.4203, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 6.060328643489064e-06, |
|
"loss": 1.3963, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 6.023662685937643e-06, |
|
"loss": 1.4282, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 5.9869391808761315e-06, |
|
"loss": 1.3772, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 5.9501601927993135e-06, |
|
"loss": 1.3101, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 5.9133277893210785e-06, |
|
"loss": 1.3972, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 5.8764440410581846e-06, |
|
"loss": 1.4262, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 5.839511021513853e-06, |
|
"loss": 1.3973, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 5.802530806961195e-06, |
|
"loss": 1.3769, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 5.765505476326505e-06, |
|
"loss": 1.4184, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 5.728437111072376e-06, |
|
"loss": 1.3802, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 5.691327795080685e-06, |
|
"loss": 1.3814, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 5.654179614535457e-06, |
|
"loss": 1.393, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5.616994657805566e-06, |
|
"loss": 1.3777, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 5.579775015327347e-06, |
|
"loss": 1.4092, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 5.5425227794870715e-06, |
|
"loss": 1.3382, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5.505240044503324e-06, |
|
"loss": 1.3584, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 5.46792890630926e-06, |
|
"loss": 1.4156, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 5.430591462434792e-06, |
|
"loss": 1.37, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 5.393229811888663e-06, |
|
"loss": 1.36, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 5.355846055040449e-06, |
|
"loss": 1.4008, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 5.318442293502482e-06, |
|
"loss": 1.3849, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 5.281020630011703e-06, |
|
"loss": 1.3328, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 5.2435831683114515e-06, |
|
"loss": 1.3804, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 5.206132013033199e-06, |
|
"loss": 1.3161, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 5.1686692695782325e-06, |
|
"loss": 1.3887, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 5.131197043999294e-06, |
|
"loss": 1.4188, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 5.093717442882185e-06, |
|
"loss": 1.3621, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 5.0562325732273405e-06, |
|
"loss": 1.3492, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 5.018744542331376e-06, |
|
"loss": 1.3909, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 4.981255457668625e-06, |
|
"loss": 1.3613, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 4.94376742677266e-06, |
|
"loss": 1.4051, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 4.906282557117817e-06, |
|
"loss": 1.385, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 4.8688029560007064e-06, |
|
"loss": 1.3837, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 4.831330730421769e-06, |
|
"loss": 1.4267, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 4.793867986966802e-06, |
|
"loss": 1.3869, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 4.756416831688549e-06, |
|
"loss": 1.3975, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.718979369988299e-06, |
|
"loss": 1.4298, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.681557706497519e-06, |
|
"loss": 1.4423, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 4.644153944959553e-06, |
|
"loss": 1.3994, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 4.606770188111339e-06, |
|
"loss": 1.3517, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 4.5694085375652105e-06, |
|
"loss": 1.3984, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 4.532071093690741e-06, |
|
"loss": 1.4148, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 4.494759955496678e-06, |
|
"loss": 1.3898, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 4.457477220512929e-06, |
|
"loss": 1.3715, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 4.420224984672654e-06, |
|
"loss": 1.3819, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 4.383005342194436e-06, |
|
"loss": 1.4257, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 4.345820385464543e-06, |
|
"loss": 1.3365, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 4.308672204919316e-06, |
|
"loss": 1.3555, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 4.271562888927626e-06, |
|
"loss": 1.3831, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 4.234494523673497e-06, |
|
"loss": 1.3866, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 4.1974691930388055e-06, |
|
"loss": 1.3544, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 4.16048897848615e-06, |
|
"loss": 1.3899, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 4.123555958941817e-06, |
|
"loss": 1.3751, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 4.0866722106789214e-06, |
|
"loss": 1.38, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 4.049839807200688e-06, |
|
"loss": 1.3596, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 4.013060819123869e-06, |
|
"loss": 1.3698, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 3.976337314062358e-06, |
|
"loss": 1.3362, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 3.9396713565109375e-06, |
|
"loss": 1.4218, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 3.903065007729234e-06, |
|
"loss": 1.3871, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 3.866520325625825e-06, |
|
"loss": 1.399, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 3.830039364642566e-06, |
|
"loss": 1.4025, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 3.7936241756390746e-06, |
|
"loss": 1.3634, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 3.7572768057774543e-06, |
|
"loss": 1.3397, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 3.7209992984072062e-06, |
|
"loss": 1.4236, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 3.6847936929503446e-06, |
|
"loss": 1.387, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 3.6486620247867625e-06, |
|
"loss": 1.4546, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 3.6126063251397968e-06, |
|
"loss": 1.4263, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 3.576628620962045e-06, |
|
"loss": 1.4271, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 3.5407309348214094e-06, |
|
"loss": 1.3993, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 3.5049152847874053e-06, |
|
"loss": 1.3685, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 3.469183684317694e-06, |
|
"loss": 1.3821, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.4335381421449056e-06, |
|
"loss": 1.3968, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 3.39798066216371e-06, |
|
"loss": 1.3889, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 3.3625132433181573e-06, |
|
"loss": 1.439, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 3.327137879489313e-06, |
|
"loss": 1.4536, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3.2918565593831565e-06, |
|
"loss": 1.3946, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 3.2566712664187907e-06, |
|
"loss": 1.3681, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 3.221583978616932e-06, |
|
"loss": 1.395, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 3.1865966684887222e-06, |
|
"loss": 1.3861, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 3.1517113029248236e-06, |
|
"loss": 1.4264, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 3.1169298430848605e-06, |
|
"loss": 1.372, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 3.082254244287163e-06, |
|
"loss": 1.4616, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 3.0476864558988364e-06, |
|
"loss": 1.4242, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 3.0132284212261886e-06, |
|
"loss": 1.4027, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 2.97888207740547e-06, |
|
"loss": 1.4085, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 2.944649355293979e-06, |
|
"loss": 1.3835, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 2.9105321793615106e-06, |
|
"loss": 1.3953, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 2.876532467582174e-06, |
|
"loss": 1.4009, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 2.842652131326562e-06, |
|
"loss": 1.3675, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 2.8088930752543063e-06, |
|
"loss": 1.3713, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 2.775257197206996e-06, |
|
"loss": 1.4089, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 2.741746388101493e-06, |
|
"loss": 1.4123, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 2.7083625318236213e-06, |
|
"loss": 1.4307, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 2.6751075051222684e-06, |
|
"loss": 1.4278, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 2.6419831775038763e-06, |
|
"loss": 1.472, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 2.6089914111273398e-06, |
|
"loss": 1.3458, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 2.576134060699328e-06, |
|
"loss": 1.3607, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 2.54341297337001e-06, |
|
"loss": 1.3164, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 2.510829988629222e-06, |
|
"loss": 1.2873, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 2.478386938203043e-06, |
|
"loss": 1.2711, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 2.4460856459508374e-06, |
|
"loss": 1.2507, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 2.4139279277627113e-06, |
|
"loss": 1.201, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 2.381915591457424e-06, |
|
"loss": 1.1929, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 2.3500504366807743e-06, |
|
"loss": 1.2074, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 2.3183342548044067e-06, |
|
"loss": 1.2939, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 2.286768828825133e-06, |
|
"loss": 1.2518, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 2.2553559332646675e-06, |
|
"loss": 1.2253, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 2.2240973340698886e-06, |
|
"loss": 1.1937, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 2.1929947885135567e-06, |
|
"loss": 1.1734, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 2.1620500450955224e-06, |
|
"loss": 1.1867, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 2.1312648434444342e-06, |
|
"loss": 1.08, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 2.100640914219939e-06, |
|
"loss": 1.1372, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 2.0701799790153897e-06, |
|
"loss": 1.178, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 2.039883750261063e-06, |
|
"loss": 1.0955, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 2.00975393112789e-06, |
|
"loss": 1.1424, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.979792215431709e-06, |
|
"loss": 1.0761, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.9500002875380458e-06, |
|
"loss": 1.1093, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.920379822267414e-06, |
|
"loss": 1.0658, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.8909324848011802e-06, |
|
"loss": 1.0853, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.8616599305879334e-06, |
|
"loss": 1.0628, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.8325638052504235e-06, |
|
"loss": 1.103, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.8036457444930643e-06, |
|
"loss": 1.0065, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.774907374009953e-06, |
|
"loss": 1.0206, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.7463503093935063e-06, |
|
"loss": 1.0387, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.7179761560436097e-06, |
|
"loss": 1.0256, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.6897865090773858e-06, |
|
"loss": 1.0667, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.661782953239512e-06, |
|
"loss": 1.1087, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.6339670628131327e-06, |
|
"loss": 1.077, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.6063404015313583e-06, |
|
"loss": 1.1042, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.578904522489354e-06, |
|
"loss": 1.0646, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.5516609680570316e-06, |
|
"loss": 1.0694, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.5246112697923389e-06, |
|
"loss": 1.0964, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.4977569483551634e-06, |
|
"loss": 1.0993, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.471099513421842e-06, |
|
"loss": 1.0485, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.4446404636002931e-06, |
|
"loss": 1.0584, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.4183812863457624e-06, |
|
"loss": 1.1169, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.3923234578772177e-06, |
|
"loss": 1.087, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.3664684430943431e-06, |
|
"loss": 1.1103, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.3408176954951912e-06, |
|
"loss": 1.0431, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.315372657094483e-06, |
|
"loss": 1.0704, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.29013475834252e-06, |
|
"loss": 1.0333, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.265105418044793e-06, |
|
"loss": 1.0854, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.2402860432821972e-06, |
|
"loss": 1.0929, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.2156780293319476e-06, |
|
"loss": 1.1359, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.1912827595891313e-06, |
|
"loss": 1.1184, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.1671016054889407e-06, |
|
"loss": 1.1367, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.1431359264295717e-06, |
|
"loss": 1.0994, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.1193870696958058e-06, |
|
"loss": 1.0315, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.0958563703832675e-06, |
|
"loss": 1.1019, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.0725451513233676e-06, |
|
"loss": 1.142, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.0494547230089413e-06, |
|
"loss": 1.1241, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.0265863835205709e-06, |
|
"loss": 1.0894, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.0039414184536161e-06, |
|
"loss": 1.16, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 9.815211008459336e-07, |
|
"loss": 1.1217, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 9.593266911063253e-07, |
|
"loss": 1.1184, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 9.373594369436611e-07, |
|
"loss": 1.0941, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 9.15620573296756e-07, |
|
"loss": 1.0911, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 8.941113222649328e-07, |
|
"loss": 1.12, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 8.728328930393188e-07, |
|
"loss": 1.0704, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 8.517864818348804e-07, |
|
"loss": 1.0672, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 8.309732718231578e-07, |
|
"loss": 1.1475, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 8.103944330657665e-07, |
|
"loss": 1.0936, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 7.900511224486085e-07, |
|
"loss": 1.0835, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 7.699444836168413e-07, |
|
"loss": 1.1453, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 7.500756469105819e-07, |
|
"loss": 1.1123, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 7.304457293013656e-07, |
|
"loss": 1.0686, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 7.1105583432935e-07, |
|
"loss": 1.1315, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 6.919070520412768e-07, |
|
"loss": 1.0504, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 6.730004589291961e-07, |
|
"loss": 1.1282, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 6.543371178699442e-07, |
|
"loss": 1.164, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 6.359180780653957e-07, |
|
"loss": 1.0805, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 6.177443749834743e-07, |
|
"loss": 1.1122, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 5.998170302999529e-07, |
|
"loss": 1.1417, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 5.821370518410019e-07, |
|
"loss": 1.1123, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 5.647054335265489e-07, |
|
"loss": 1.156, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 5.475231553143906e-07, |
|
"loss": 1.1302, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 5.305911831451044e-07, |
|
"loss": 1.1281, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 5.139104688877549e-07, |
|
"loss": 1.1888, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 4.974819502863687e-07, |
|
"loss": 1.1384, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 4.813065509072279e-07, |
|
"loss": 1.1435, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 4.6538518008694465e-07, |
|
"loss": 1.1718, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 4.4971873288134237e-07, |
|
"loss": 1.2111, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 4.343080900151375e-07, |
|
"loss": 1.1525, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 4.1915411783242766e-07, |
|
"loss": 1.1026, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 4.0425766824798817e-07, |
|
"loss": 1.1428, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 3.8961957869937893e-07, |
|
"loss": 1.1742, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 3.752406720998691e-07, |
|
"loss": 1.1439, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 3.61121756792171e-07, |
|
"loss": 1.1181, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 3.472636265030005e-07, |
|
"loss": 1.1227, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 3.3366706029845097e-07, |
|
"loss": 1.1738, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 3.2033282254020747e-07, |
|
"loss": 1.0632, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 3.072616628425601e-07, |
|
"loss": 1.0974, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 2.9445431603027876e-07, |
|
"loss": 1.1317, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 2.8191150209729233e-07, |
|
"loss": 1.1371, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 2.696339261662156e-07, |
|
"loss": 1.1154, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 2.576222784487148e-07, |
|
"loss": 1.1395, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.45877234206694e-07, |
|
"loss": 1.1419, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.3439945371434792e-07, |
|
"loss": 1.135, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.2318958222103004e-07, |
|
"loss": 1.1152, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.1224824991498695e-07, |
|
"loss": 1.1253, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.0157607188792894e-07, |
|
"loss": 1.1046, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 1.911736481004489e-07, |
|
"loss": 1.1955, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 1.8104156334829703e-07, |
|
"loss": 1.1563, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 1.7118038722950313e-07, |
|
"loss": 1.1846, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 1.615906741123574e-07, |
|
"loss": 1.1909, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 1.5227296310424244e-07, |
|
"loss": 1.1598, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 1.432277780213298e-07, |
|
"loss": 1.118, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 1.3445562735912965e-07, |
|
"loss": 1.2047, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 1.2595700426390633e-07, |
|
"loss": 1.1622, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 1.1773238650495122e-07, |
|
"loss": 1.2584, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 1.0978223644773134e-07, |
|
"loss": 1.2419, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 1.0210700102788796e-07, |
|
"loss": 1.2292, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 9.470711172611724e-08, |
|
"loss": 1.2044, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 8.75829845439119e-08, |
|
"loss": 1.1776, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 8.073501998017152e-08, |
|
"loss": 1.1786, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 7.416360300869285e-08, |
|
"loss": 1.2012, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 6.786910305652373e-08, |
|
"loss": 1.1945, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 6.185187398319691e-08, |
|
"loss": 1.2414, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 5.611225406083609e-08, |
|
"loss": 1.2703, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 5.065056595513984e-08, |
|
"loss": 1.1913, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 4.546711670724124e-08, |
|
"loss": 1.1705, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 4.0562197716448316e-08, |
|
"loss": 1.2055, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 3.593608472386045e-08, |
|
"loss": 1.1861, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 3.1589037796869725e-08, |
|
"loss": 1.2427, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 2.7521301314537564e-08, |
|
"loss": 1.1897, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 2.373310395385797e-08, |
|
"loss": 1.2836, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 2.022465867690282e-08, |
|
"loss": 1.2451, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.699616271884752e-08, |
|
"loss": 1.2135, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.4047797576885458e-08, |
|
"loss": 1.2279, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.1379729000021711e-08, |
|
"loss": 1.2035, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 8.99210697975883e-09, |
|
"loss": 1.2248, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 6.885065741661367e-09, |
|
"loss": 1.2341, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 5.058723737811355e-09, |
|
"loss": 1.1999, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 3.5131836401502972e-09, |
|
"loss": 1.1994, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 2.2485323347054555e-09, |
|
"loss": 1.2527, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.2648409167070886e-09, |
|
"loss": 1.2552, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 5.621646865899832e-10, |
|
"loss": 1.2782, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.405431468848306e-10, |
|
"loss": 1.2717, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.0, |
|
"loss": 1.3355, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 432, |
|
"total_flos": 0.0, |
|
"train_loss": 1.441655464746334, |
|
"train_runtime": 12001.223, |
|
"train_samples_per_second": 4.04, |
|
"train_steps_per_second": 0.036 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 432, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|