|
{ |
|
"best_metric": 3.7859156131744385, |
|
"best_model_checkpoint": "seq2seq_results/combined_nl_prompt_base_features_baseline_codet5small/checkpoint-144856", |
|
"epoch": 2.0, |
|
"global_step": 144856, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.001999861931849561, |
|
"loss": 4.6701, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.001999723863699122, |
|
"loss": 4.3041, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.001999585795548683, |
|
"loss": 4.37, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0019994477273982438, |
|
"loss": 4.3707, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0019993096592478047, |
|
"loss": 4.4073, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0019991715910973656, |
|
"loss": 4.34, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.001999033522946927, |
|
"loss": 4.377, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0019988954547964875, |
|
"loss": 4.3965, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0019987573866460484, |
|
"loss": 4.3162, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0019986193184956093, |
|
"loss": 4.4539, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0019984812503451702, |
|
"loss": 4.4049, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.001998343182194731, |
|
"loss": 4.4302, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.001998205114044292, |
|
"loss": 4.4396, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.001998067045893853, |
|
"loss": 4.3968, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0019979289777434144, |
|
"loss": 4.3686, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0019977909095929753, |
|
"loss": 4.2784, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0019976528414425362, |
|
"loss": 4.3356, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.001997514773292097, |
|
"loss": 4.3909, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.001997376705141658, |
|
"loss": 4.3739, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.001997238636991219, |
|
"loss": 4.364, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00199710056884078, |
|
"loss": 4.3889, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.001996962500690341, |
|
"loss": 4.3619, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.001996824432539902, |
|
"loss": 4.2707, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0019966863643894627, |
|
"loss": 4.3321, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0019965482962390237, |
|
"loss": 4.337, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0019964102280885846, |
|
"loss": 4.3264, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0019962721599381455, |
|
"loss": 4.2258, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0019961340917877064, |
|
"loss": 4.2702, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0019959960236372674, |
|
"loss": 4.2741, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0019958579554868283, |
|
"loss": 4.173, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0019957198873363892, |
|
"loss": 4.2405, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00199558181918595, |
|
"loss": 4.1739, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.001995443751035511, |
|
"loss": 4.2301, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.001995305682885072, |
|
"loss": 4.1023, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.001995167614734633, |
|
"loss": 4.1918, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0019950295465841943, |
|
"loss": 4.2123, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.001994891478433755, |
|
"loss": 4.1345, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0019947534102833157, |
|
"loss": 4.0942, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0019946153421328766, |
|
"loss": 4.1234, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0019944772739824376, |
|
"loss": 4.1437, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0019943392058319985, |
|
"loss": 4.0807, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0019942011376815594, |
|
"loss": 4.1128, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.001994063069531121, |
|
"loss": 4.1238, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0019939250013806817, |
|
"loss": 4.1071, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0019937869332302426, |
|
"loss": 4.1466, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0019936488650798036, |
|
"loss": 4.1327, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0019935107969293645, |
|
"loss": 4.0238, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0019933727287789254, |
|
"loss": 4.1153, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0019932346606284864, |
|
"loss": 4.04, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0019930965924780473, |
|
"loss": 4.0657, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.001992958524327608, |
|
"loss": 4.1566, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.001992820456177169, |
|
"loss": 4.1009, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00199268238802673, |
|
"loss": 4.0373, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.001992544319876291, |
|
"loss": 4.1002, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.001992406251725852, |
|
"loss": 4.0799, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.001992268183575413, |
|
"loss": 4.1071, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0019921301154249738, |
|
"loss": 4.004, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0019919920472745347, |
|
"loss": 4.0756, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0019918539791240956, |
|
"loss": 4.0442, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0019917159109736566, |
|
"loss": 4.0467, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0019915778428232175, |
|
"loss": 3.9504, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0019914397746727784, |
|
"loss": 4.0131, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0019913017065223393, |
|
"loss": 3.9668, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0019911636383719007, |
|
"loss": 4.0255, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.001991025570221461, |
|
"loss": 3.9381, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.001990887502071022, |
|
"loss": 4.0031, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.001990749433920583, |
|
"loss": 3.9848, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.001990611365770144, |
|
"loss": 4.0021, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.001990473297619705, |
|
"loss": 4.0027, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.001990335229469266, |
|
"loss": 3.9172, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.001990197161318827, |
|
"loss": 3.9527, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.001990059093168388, |
|
"loss": 3.9484, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.001989921025017949, |
|
"loss": 3.9383, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00198978295686751, |
|
"loss": 3.9678, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.001989644888717071, |
|
"loss": 3.9641, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.001989506820566632, |
|
"loss": 3.9309, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0019893687524161928, |
|
"loss": 3.9796, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0019892306842657537, |
|
"loss": 3.8781, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0019890926161153146, |
|
"loss": 3.9941, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0019889545479648755, |
|
"loss": 3.9713, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0019888164798144365, |
|
"loss": 4.0011, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0019886784116639974, |
|
"loss": 3.9279, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0019885403435135583, |
|
"loss": 3.9176, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0019884022753631193, |
|
"loss": 3.9786, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00198826420721268, |
|
"loss": 4.0188, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.001988126139062241, |
|
"loss": 3.9148, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.001987988070911802, |
|
"loss": 3.9541, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.001987850002761363, |
|
"loss": 3.9026, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.001987711934610924, |
|
"loss": 3.9408, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.001987573866460485, |
|
"loss": 3.9297, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0019874357983100457, |
|
"loss": 3.8746, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.001987297730159607, |
|
"loss": 3.9165, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.001987159662009168, |
|
"loss": 3.9406, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0019870215938587285, |
|
"loss": 3.934, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0019868835257082895, |
|
"loss": 3.9324, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0019867454575578504, |
|
"loss": 3.9349, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0019866073894074113, |
|
"loss": 3.9627, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0019864693212569722, |
|
"loss": 3.9866, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0019863312531065336, |
|
"loss": 3.9169, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0019861931849560945, |
|
"loss": 3.9981, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0019860551168056555, |
|
"loss": 3.9435, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0019859170486552164, |
|
"loss": 3.9869, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0019857789805047773, |
|
"loss": 3.8722, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0019856409123543382, |
|
"loss": 4.038, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.001985502844203899, |
|
"loss": 3.9329, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00198536477605346, |
|
"loss": 3.9034, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.001985226707903021, |
|
"loss": 3.9563, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.001985088639752582, |
|
"loss": 3.9504, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.001984950571602143, |
|
"loss": 3.9527, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.001984812503451704, |
|
"loss": 4.0248, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.0019846744353012647, |
|
"loss": 3.9452, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.0019845363671508257, |
|
"loss": 4.023, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0019843982990003866, |
|
"loss": 4.1163, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.0019842602308499475, |
|
"loss": 4.0406, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.0019841221626995084, |
|
"loss": 3.9629, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0019839840945490694, |
|
"loss": 4.0088, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.0019838460263986303, |
|
"loss": 4.0681, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.0019837079582481912, |
|
"loss": 4.0537, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.001983569890097752, |
|
"loss": 4.0625, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.0019834318219473135, |
|
"loss": 4.1052, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0019832937537968744, |
|
"loss": 4.1331, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0019831556856464354, |
|
"loss": 4.1319, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.001983017617495996, |
|
"loss": 4.0927, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0019828795493455568, |
|
"loss": 4.0494, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0019827414811951177, |
|
"loss": 4.1262, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0019826034130446786, |
|
"loss": 3.9996, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00198246534489424, |
|
"loss": 4.0418, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.001982327276743801, |
|
"loss": 3.976, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.001982189208593362, |
|
"loss": 4.0928, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.0019820511404429228, |
|
"loss": 4.1164, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.0019819130722924837, |
|
"loss": 3.9452, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.0019817750041420446, |
|
"loss": 4.033, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.0019816369359916056, |
|
"loss": 3.9945, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0019814988678411665, |
|
"loss": 3.9676, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0019813607996907274, |
|
"loss": 4.0068, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.0019812227315402883, |
|
"loss": 3.9709, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0019810846633898493, |
|
"loss": 4.0048, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00198094659523941, |
|
"loss": 3.995, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.001980808527088971, |
|
"loss": 4.064, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.001980670458938532, |
|
"loss": 4.0621, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.001980532390788093, |
|
"loss": 4.0397, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.001980394322637654, |
|
"loss": 4.1456, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.001980256254487215, |
|
"loss": 4.0672, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0019801181863367758, |
|
"loss": 4.0551, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 4.511486530303955, |
|
"eval_runtime": 111.9469, |
|
"eval_samples_per_second": 176.432, |
|
"eval_steps_per_second": 11.032, |
|
"step": 72428 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0019799801181863367, |
|
"loss": 4.1848, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.0019798420500358976, |
|
"loss": 4.0794, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.0019797039818854585, |
|
"loss": 4.1764, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00197956591373502, |
|
"loss": 4.086, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.001979427845584581, |
|
"loss": 4.0969, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.0019792897774341418, |
|
"loss": 4.1175, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.0019791517092837027, |
|
"loss": 4.0663, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.001979013641133263, |
|
"loss": 4.0281, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.001978875572982824, |
|
"loss": 4.1221, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.001978737504832385, |
|
"loss": 4.2072, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.0019785994366819464, |
|
"loss": 4.0445, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.0019784613685315073, |
|
"loss": 4.1667, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.0019783233003810683, |
|
"loss": 4.1388, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.001978185232230629, |
|
"loss": 4.1253, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00197804716408019, |
|
"loss": 4.1804, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.001977909095929751, |
|
"loss": 4.0465, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.001977771027779312, |
|
"loss": 4.01, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.001977632959628873, |
|
"loss": 4.0643, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.001977494891478434, |
|
"loss": 4.0639, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.0019773568233279947, |
|
"loss": 4.0322, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.0019772187551775557, |
|
"loss": 4.0383, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.0019770806870271166, |
|
"loss": 4.0777, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.0019769426188766775, |
|
"loss": 4.0972, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.0019768045507262385, |
|
"loss": 4.1713, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0019766664825757994, |
|
"loss": 4.0441, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0019765284144253603, |
|
"loss": 4.0923, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.0019763903462749212, |
|
"loss": 4.0889, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.001976252278124482, |
|
"loss": 4.0687, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.001976114209974043, |
|
"loss": 4.0273, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.001975976141823604, |
|
"loss": 4.1192, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.001975838073673165, |
|
"loss": 4.0272, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.0019757000055227263, |
|
"loss": 4.054, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.0019755619373722872, |
|
"loss": 4.0947, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.001975423869221848, |
|
"loss": 4.1307, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.001975285801071409, |
|
"loss": 4.0405, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00197514773292097, |
|
"loss": 4.0817, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.0019750096647705305, |
|
"loss": 4.0129, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.0019748715966200914, |
|
"loss": 4.1245, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.001974733528469653, |
|
"loss": 4.0581, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.0019745954603192137, |
|
"loss": 4.0563, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.0019744573921687747, |
|
"loss": 4.0469, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.0019743193240183356, |
|
"loss": 3.978, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.0019741812558678965, |
|
"loss": 3.941, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.0019740431877174574, |
|
"loss": 4.0216, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.0019739051195670184, |
|
"loss": 4.0471, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.0019737670514165793, |
|
"loss": 3.9967, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.0019736289832661402, |
|
"loss": 4.0091, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.001973490915115701, |
|
"loss": 3.9558, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.001973352846965262, |
|
"loss": 4.0075, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.001973214778814823, |
|
"loss": 3.9662, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.001973076710664384, |
|
"loss": 3.9896, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.001972938642513945, |
|
"loss": 4.0709, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.001972800574363506, |
|
"loss": 4.0645, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.0019726625062130667, |
|
"loss": 4.0215, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.0019725244380626276, |
|
"loss": 4.0326, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.0019723863699121886, |
|
"loss": 3.9966, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.0019722483017617495, |
|
"loss": 4.0232, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.0019721102336113104, |
|
"loss": 4.0139, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.0019719721654608714, |
|
"loss": 4.0067, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.0019718340973104327, |
|
"loss": 3.9949, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.0019716960291599936, |
|
"loss": 4.0542, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.0019715579610095546, |
|
"loss": 3.9849, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.0019714198928591155, |
|
"loss": 4.119, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.0019712818247086764, |
|
"loss": 3.9913, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.001971143756558237, |
|
"loss": 4.0074, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.001971005688407798, |
|
"loss": 3.9604, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.001970867620257359, |
|
"loss": 4.0134, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.00197072955210692, |
|
"loss": 4.021, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.001970591483956481, |
|
"loss": 3.9878, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.001970453415806042, |
|
"loss": 3.9759, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.001970315347655603, |
|
"loss": 3.9569, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.001970177279505164, |
|
"loss": 3.9763, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.0019700392113547248, |
|
"loss": 3.8924, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.0019699011432042857, |
|
"loss": 3.9378, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.0019697630750538466, |
|
"loss": 3.9423, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.0019696250069034076, |
|
"loss": 3.9732, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.0019694869387529685, |
|
"loss": 3.961, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.0019693488706025294, |
|
"loss": 3.9872, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.0019692108024520903, |
|
"loss": 3.9157, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.0019690727343016513, |
|
"loss": 3.9561, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.001968934666151212, |
|
"loss": 3.9836, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.001968796598000773, |
|
"loss": 3.8875, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.001968658529850334, |
|
"loss": 3.8598, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.001968520461699895, |
|
"loss": 4.0, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.001968382393549456, |
|
"loss": 3.976, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.001968244325399017, |
|
"loss": 4.0193, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.0019681062572485778, |
|
"loss": 4.027, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.001967968189098139, |
|
"loss": 4.0129, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.0019678301209477, |
|
"loss": 3.9996, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.001967692052797261, |
|
"loss": 3.8491, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.001967553984646822, |
|
"loss": 3.9668, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.001967415916496383, |
|
"loss": 3.9194, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.0019672778483459438, |
|
"loss": 3.8736, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.0019671397801955042, |
|
"loss": 3.9618, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.0019670017120450656, |
|
"loss": 3.9653, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.0019668636438946265, |
|
"loss": 3.8935, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.0019667255757441875, |
|
"loss": 3.9393, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.0019665875075937484, |
|
"loss": 3.8719, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.0019664494394433093, |
|
"loss": 3.881, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.0019663113712928702, |
|
"loss": 3.7973, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.001966173303142431, |
|
"loss": 3.8897, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.001966035234991992, |
|
"loss": 3.8499, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.001965897166841553, |
|
"loss": 3.8924, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.001965759098691114, |
|
"loss": 3.9056, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.001965621030540675, |
|
"loss": 4.0077, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.001965482962390236, |
|
"loss": 3.8725, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.0019653448942397967, |
|
"loss": 4.0384, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.0019652068260893577, |
|
"loss": 4.0188, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.0019650687579389186, |
|
"loss": 3.9961, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.0019649306897884795, |
|
"loss": 3.9249, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.0019647926216380404, |
|
"loss": 3.9624, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.0019646545534876014, |
|
"loss": 3.9348, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.0019645164853371623, |
|
"loss": 3.9695, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.0019643784171867232, |
|
"loss": 3.9816, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.001964240349036284, |
|
"loss": 3.9832, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.0019641022808858455, |
|
"loss": 3.9284, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.0019639642127354064, |
|
"loss": 3.9135, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.0019638261445849674, |
|
"loss": 3.9262, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.0019636880764345283, |
|
"loss": 3.8446, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.0019635500082840892, |
|
"loss": 3.8439, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00196341194013365, |
|
"loss": 3.9125, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.001963273871983211, |
|
"loss": 3.9033, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.0019631358038327716, |
|
"loss": 3.8268, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.001962997735682333, |
|
"loss": 3.8309, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.001962859667531894, |
|
"loss": 3.8943, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.001962721599381455, |
|
"loss": 3.8645, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.0019625835312310157, |
|
"loss": 3.8826, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.0019624454630805767, |
|
"loss": 3.8056, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.0019623073949301376, |
|
"loss": 3.835, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.0019621693267796985, |
|
"loss": 3.801, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.0019620312586292594, |
|
"loss": 3.7232, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.0019618931904788204, |
|
"loss": 3.7806, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.0019617551223283813, |
|
"loss": 3.8155, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.001961617054177942, |
|
"loss": 3.8579, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.001961478986027503, |
|
"loss": 3.8498, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.001961340917877064, |
|
"loss": 3.8688, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.001961202849726625, |
|
"loss": 3.7867, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.001961064781576186, |
|
"loss": 3.7469, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.001960926713425747, |
|
"loss": 3.862, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.0019607886452753078, |
|
"loss": 3.9004, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.0019606505771248687, |
|
"loss": 3.8185, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.0019605125089744296, |
|
"loss": 3.8896, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.0019603744408239906, |
|
"loss": 3.9049, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.0019602363726735515, |
|
"loss": 3.8464, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.001960098304523113, |
|
"loss": 3.7932, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 3.7859156131744385, |
|
"eval_runtime": 111.4844, |
|
"eval_samples_per_second": 177.164, |
|
"eval_steps_per_second": 11.078, |
|
"step": 144856 |
|
} |
|
], |
|
"max_steps": 7242800, |
|
"num_train_epochs": 100, |
|
"total_flos": 3.136735727635661e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|