Davlan's picture
Upload 14 files
07f65fb
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"global_step": 48159,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"learning_rate": 4.94808862310264e-05,
"loss": 0.6922,
"step": 500
},
{
"epoch": 0.06,
"learning_rate": 4.8961772462052786e-05,
"loss": 0.3808,
"step": 1000
},
{
"epoch": 0.09,
"learning_rate": 4.844265869307918e-05,
"loss": 0.3117,
"step": 1500
},
{
"epoch": 0.12,
"learning_rate": 4.792354492410557e-05,
"loss": 0.2758,
"step": 2000
},
{
"epoch": 0.16,
"learning_rate": 4.7404431155131964e-05,
"loss": 0.251,
"step": 2500
},
{
"epoch": 0.19,
"learning_rate": 4.688531738615835e-05,
"loss": 0.2316,
"step": 3000
},
{
"epoch": 0.22,
"learning_rate": 4.636620361718475e-05,
"loss": 0.2138,
"step": 3500
},
{
"epoch": 0.25,
"learning_rate": 4.5847089848211136e-05,
"loss": 0.1987,
"step": 4000
},
{
"epoch": 0.28,
"learning_rate": 4.5327976079237525e-05,
"loss": 0.1947,
"step": 4500
},
{
"epoch": 0.31,
"learning_rate": 4.480886231026392e-05,
"loss": 0.1797,
"step": 5000
},
{
"epoch": 0.34,
"learning_rate": 4.428974854129031e-05,
"loss": 0.1747,
"step": 5500
},
{
"epoch": 0.37,
"learning_rate": 4.37706347723167e-05,
"loss": 0.1664,
"step": 6000
},
{
"epoch": 0.4,
"learning_rate": 4.325152100334309e-05,
"loss": 0.1611,
"step": 6500
},
{
"epoch": 0.44,
"learning_rate": 4.273240723436949e-05,
"loss": 0.1529,
"step": 7000
},
{
"epoch": 0.47,
"learning_rate": 4.2213293465395875e-05,
"loss": 0.149,
"step": 7500
},
{
"epoch": 0.5,
"learning_rate": 4.169417969642227e-05,
"loss": 0.1505,
"step": 8000
},
{
"epoch": 0.53,
"learning_rate": 4.1175065927448665e-05,
"loss": 0.1459,
"step": 8500
},
{
"epoch": 0.56,
"learning_rate": 4.0655952158475054e-05,
"loss": 0.1406,
"step": 9000
},
{
"epoch": 0.59,
"learning_rate": 4.013683838950145e-05,
"loss": 0.1359,
"step": 9500
},
{
"epoch": 0.62,
"learning_rate": 3.961772462052784e-05,
"loss": 0.1326,
"step": 10000
},
{
"epoch": 0.65,
"learning_rate": 3.909861085155423e-05,
"loss": 0.132,
"step": 10500
},
{
"epoch": 0.69,
"learning_rate": 3.857949708258062e-05,
"loss": 0.13,
"step": 11000
},
{
"epoch": 0.72,
"learning_rate": 3.806038331360701e-05,
"loss": 0.124,
"step": 11500
},
{
"epoch": 0.75,
"learning_rate": 3.7541269544633404e-05,
"loss": 0.1242,
"step": 12000
},
{
"epoch": 0.78,
"learning_rate": 3.702215577565979e-05,
"loss": 0.1244,
"step": 12500
},
{
"epoch": 0.81,
"learning_rate": 3.650304200668619e-05,
"loss": 0.1216,
"step": 13000
},
{
"epoch": 0.84,
"learning_rate": 3.5983928237712576e-05,
"loss": 0.1177,
"step": 13500
},
{
"epoch": 0.87,
"learning_rate": 3.546481446873897e-05,
"loss": 0.1176,
"step": 14000
},
{
"epoch": 0.9,
"learning_rate": 3.494570069976536e-05,
"loss": 0.1144,
"step": 14500
},
{
"epoch": 0.93,
"learning_rate": 3.4426586930791755e-05,
"loss": 0.1127,
"step": 15000
},
{
"epoch": 0.97,
"learning_rate": 3.390747316181814e-05,
"loss": 0.1111,
"step": 15500
},
{
"epoch": 1.0,
"learning_rate": 3.338835939284454e-05,
"loss": 0.1099,
"step": 16000
},
{
"epoch": 1.03,
"learning_rate": 3.286924562387093e-05,
"loss": 0.1009,
"step": 16500
},
{
"epoch": 1.06,
"learning_rate": 3.235013185489732e-05,
"loss": 0.1,
"step": 17000
},
{
"epoch": 1.09,
"learning_rate": 3.183101808592372e-05,
"loss": 0.0976,
"step": 17500
},
{
"epoch": 1.12,
"learning_rate": 3.1311904316950105e-05,
"loss": 0.0953,
"step": 18000
},
{
"epoch": 1.15,
"learning_rate": 3.079279054797649e-05,
"loss": 0.0953,
"step": 18500
},
{
"epoch": 1.18,
"learning_rate": 3.0273676779002885e-05,
"loss": 0.0978,
"step": 19000
},
{
"epoch": 1.21,
"learning_rate": 2.975456301002928e-05,
"loss": 0.0945,
"step": 19500
},
{
"epoch": 1.25,
"learning_rate": 2.9235449241055672e-05,
"loss": 0.0962,
"step": 20000
},
{
"epoch": 1.28,
"learning_rate": 2.8716335472082064e-05,
"loss": 0.0936,
"step": 20500
},
{
"epoch": 1.31,
"learning_rate": 2.8197221703108455e-05,
"loss": 0.0952,
"step": 21000
},
{
"epoch": 1.34,
"learning_rate": 2.7678107934134844e-05,
"loss": 0.0927,
"step": 21500
},
{
"epoch": 1.37,
"learning_rate": 2.715899416516124e-05,
"loss": 0.0913,
"step": 22000
},
{
"epoch": 1.4,
"learning_rate": 2.6639880396187627e-05,
"loss": 0.0921,
"step": 22500
},
{
"epoch": 1.43,
"learning_rate": 2.6120766627214022e-05,
"loss": 0.0895,
"step": 23000
},
{
"epoch": 1.46,
"learning_rate": 2.5601652858240418e-05,
"loss": 0.0869,
"step": 23500
},
{
"epoch": 1.5,
"learning_rate": 2.5082539089266806e-05,
"loss": 0.0898,
"step": 24000
},
{
"epoch": 1.53,
"learning_rate": 2.4563425320293194e-05,
"loss": 0.0867,
"step": 24500
},
{
"epoch": 1.56,
"learning_rate": 2.4044311551319586e-05,
"loss": 0.0889,
"step": 25000
},
{
"epoch": 1.59,
"learning_rate": 2.352519778234598e-05,
"loss": 0.0858,
"step": 25500
},
{
"epoch": 1.62,
"learning_rate": 2.3006084013372373e-05,
"loss": 0.0856,
"step": 26000
},
{
"epoch": 1.65,
"learning_rate": 2.2486970244398765e-05,
"loss": 0.0847,
"step": 26500
},
{
"epoch": 1.68,
"learning_rate": 2.1967856475425156e-05,
"loss": 0.0861,
"step": 27000
},
{
"epoch": 1.71,
"learning_rate": 2.1448742706451548e-05,
"loss": 0.0847,
"step": 27500
},
{
"epoch": 1.74,
"learning_rate": 2.0929628937477936e-05,
"loss": 0.0819,
"step": 28000
},
{
"epoch": 1.78,
"learning_rate": 2.0410515168504328e-05,
"loss": 0.0819,
"step": 28500
},
{
"epoch": 1.81,
"learning_rate": 1.989140139953072e-05,
"loss": 0.0856,
"step": 29000
},
{
"epoch": 1.84,
"learning_rate": 1.9372287630557115e-05,
"loss": 0.0828,
"step": 29500
},
{
"epoch": 1.87,
"learning_rate": 1.8853173861583507e-05,
"loss": 0.0832,
"step": 30000
},
{
"epoch": 1.9,
"learning_rate": 1.83340600926099e-05,
"loss": 0.0823,
"step": 30500
},
{
"epoch": 1.93,
"learning_rate": 1.781494632363629e-05,
"loss": 0.0808,
"step": 31000
},
{
"epoch": 1.96,
"learning_rate": 1.7295832554662682e-05,
"loss": 0.0811,
"step": 31500
},
{
"epoch": 1.99,
"learning_rate": 1.677671878568907e-05,
"loss": 0.084,
"step": 32000
},
{
"epoch": 2.02,
"learning_rate": 1.6257605016715462e-05,
"loss": 0.0769,
"step": 32500
},
{
"epoch": 2.06,
"learning_rate": 1.5738491247741857e-05,
"loss": 0.0733,
"step": 33000
},
{
"epoch": 2.09,
"learning_rate": 1.5219377478768249e-05,
"loss": 0.0724,
"step": 33500
},
{
"epoch": 2.12,
"learning_rate": 1.470026370979464e-05,
"loss": 0.0747,
"step": 34000
},
{
"epoch": 2.15,
"learning_rate": 1.418114994082103e-05,
"loss": 0.0715,
"step": 34500
},
{
"epoch": 2.18,
"learning_rate": 1.3662036171847422e-05,
"loss": 0.0736,
"step": 35000
},
{
"epoch": 2.21,
"learning_rate": 1.3142922402873814e-05,
"loss": 0.0745,
"step": 35500
},
{
"epoch": 2.24,
"learning_rate": 1.2623808633900206e-05,
"loss": 0.0732,
"step": 36000
},
{
"epoch": 2.27,
"learning_rate": 1.2104694864926598e-05,
"loss": 0.0738,
"step": 36500
},
{
"epoch": 2.3,
"learning_rate": 1.158558109595299e-05,
"loss": 0.0739,
"step": 37000
},
{
"epoch": 2.34,
"learning_rate": 1.1066467326979381e-05,
"loss": 0.0703,
"step": 37500
},
{
"epoch": 2.37,
"learning_rate": 1.0547353558005773e-05,
"loss": 0.072,
"step": 38000
},
{
"epoch": 2.4,
"learning_rate": 1.0028239789032165e-05,
"loss": 0.0723,
"step": 38500
},
{
"epoch": 2.43,
"learning_rate": 9.509126020058556e-06,
"loss": 0.0734,
"step": 39000
},
{
"epoch": 2.46,
"learning_rate": 8.990012251084948e-06,
"loss": 0.0702,
"step": 39500
},
{
"epoch": 2.49,
"learning_rate": 8.47089848211134e-06,
"loss": 0.0712,
"step": 40000
},
{
"epoch": 2.52,
"learning_rate": 7.951784713137732e-06,
"loss": 0.0713,
"step": 40500
},
{
"epoch": 2.55,
"learning_rate": 7.432670944164123e-06,
"loss": 0.0719,
"step": 41000
},
{
"epoch": 2.59,
"learning_rate": 6.913557175190514e-06,
"loss": 0.0718,
"step": 41500
},
{
"epoch": 2.62,
"learning_rate": 6.394443406216908e-06,
"loss": 0.0709,
"step": 42000
},
{
"epoch": 2.65,
"learning_rate": 5.8753296372432985e-06,
"loss": 0.07,
"step": 42500
},
{
"epoch": 2.68,
"learning_rate": 5.35621586826969e-06,
"loss": 0.068,
"step": 43000
},
{
"epoch": 2.71,
"learning_rate": 4.837102099296082e-06,
"loss": 0.0714,
"step": 43500
},
{
"epoch": 2.74,
"learning_rate": 4.317988330322474e-06,
"loss": 0.0719,
"step": 44000
},
{
"epoch": 2.77,
"learning_rate": 3.7988745613488655e-06,
"loss": 0.0678,
"step": 44500
},
{
"epoch": 2.8,
"learning_rate": 3.2797607923752568e-06,
"loss": 0.0693,
"step": 45000
},
{
"epoch": 2.83,
"learning_rate": 2.760647023401649e-06,
"loss": 0.0677,
"step": 45500
},
{
"epoch": 2.87,
"learning_rate": 2.2415332544280407e-06,
"loss": 0.0711,
"step": 46000
},
{
"epoch": 2.9,
"learning_rate": 1.7224194854544324e-06,
"loss": 0.0692,
"step": 46500
},
{
"epoch": 2.93,
"learning_rate": 1.203305716480824e-06,
"loss": 0.0692,
"step": 47000
},
{
"epoch": 2.96,
"learning_rate": 6.841919475072158e-07,
"loss": 0.0699,
"step": 47500
},
{
"epoch": 2.99,
"learning_rate": 1.6507817853360744e-07,
"loss": 0.0711,
"step": 48000
},
{
"epoch": 3.0,
"step": 48159,
"total_flos": 5.191245227822285e+16,
"train_loss": 0.11467816326249321,
"train_runtime": 7321.5952,
"train_samples_per_second": 210.473,
"train_steps_per_second": 6.578
}
],
"max_steps": 48159,
"num_train_epochs": 3,
"total_flos": 5.191245227822285e+16,
"trial_name": null,
"trial_params": null
}