|
{ |
|
"best_metric": 0.9769449234008789, |
|
"best_model_checkpoint": "./checkpoints/meta-baseline-t5-small/checkpoint-70000", |
|
"epoch": 8.968516798513916, |
|
"global_step": 70000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.001, |
|
"loss": 2.3828, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.001, |
|
"loss": 1.6006, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.001, |
|
"loss": 1.4131, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.001, |
|
"loss": 1.3124, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.001, |
|
"loss": 1.2434, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.001, |
|
"loss": 1.1786, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.001, |
|
"loss": 1.1193, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.001, |
|
"loss": 1.1045, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.001, |
|
"loss": 1.0873, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.001, |
|
"loss": 1.0495, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.001, |
|
"loss": 1.0346, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.001, |
|
"loss": 1.0245, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.001, |
|
"loss": 0.9973, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.001, |
|
"loss": 0.9785, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.001, |
|
"loss": 0.9696, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.001, |
|
"loss": 0.9684, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.001, |
|
"loss": 0.9425, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.001, |
|
"loss": 0.9437, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.001, |
|
"loss": 0.9376, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.001, |
|
"loss": 0.9247, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.001, |
|
"loss": 0.9127, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.001, |
|
"loss": 0.9133, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.001, |
|
"loss": 0.9021, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.001, |
|
"loss": 0.9021, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.001, |
|
"loss": 0.8922, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 1.1090599298477173, |
|
"eval_runtime": 226.6186, |
|
"eval_samples_per_second": 539.567, |
|
"eval_steps_per_second": 2.109, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.001, |
|
"loss": 0.884, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.001, |
|
"loss": 0.8681, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.001, |
|
"loss": 0.8732, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.001, |
|
"loss": 0.869, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.001, |
|
"loss": 0.8467, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.001, |
|
"loss": 0.8578, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.001, |
|
"loss": 0.8621, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.001, |
|
"loss": 0.8484, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.001, |
|
"loss": 0.8449, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.001, |
|
"loss": 0.8432, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.001, |
|
"loss": 0.8481, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.001, |
|
"loss": 0.8388, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.001, |
|
"loss": 0.8402, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.001, |
|
"loss": 0.8175, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.001, |
|
"loss": 0.7903, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.001, |
|
"loss": 0.7977, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.001, |
|
"loss": 0.8082, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.001, |
|
"loss": 0.7975, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.001, |
|
"loss": 0.8046, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.001, |
|
"loss": 0.7912, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.001, |
|
"loss": 0.7875, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.001, |
|
"loss": 0.7894, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.001, |
|
"loss": 0.7886, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.001, |
|
"loss": 0.7847, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.001, |
|
"loss": 0.7906, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_loss": 1.0378576517105103, |
|
"eval_runtime": 226.2892, |
|
"eval_samples_per_second": 540.353, |
|
"eval_steps_per_second": 2.112, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.001, |
|
"loss": 0.7796, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.001, |
|
"loss": 0.7873, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.001, |
|
"loss": 0.7796, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.001, |
|
"loss": 0.7661, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.001, |
|
"loss": 0.7901, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.001, |
|
"loss": 0.7852, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.001, |
|
"loss": 0.7722, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.001, |
|
"loss": 0.7879, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.001, |
|
"loss": 0.7616, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.001, |
|
"loss": 0.7645, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.001, |
|
"loss": 0.7783, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.001, |
|
"loss": 0.7725, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.001, |
|
"loss": 0.7664, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.001, |
|
"loss": 0.7632, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.001, |
|
"loss": 0.7702, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.001, |
|
"loss": 0.7695, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.001, |
|
"loss": 0.7498, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.001, |
|
"loss": 0.7583, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.001, |
|
"loss": 0.7556, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.001, |
|
"loss": 0.7401, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.001, |
|
"loss": 0.7408, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.001, |
|
"loss": 0.7474, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.001, |
|
"loss": 0.7509, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.001, |
|
"loss": 0.7588, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.001, |
|
"loss": 0.7486, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_loss": 1.012812852859497, |
|
"eval_runtime": 226.2966, |
|
"eval_samples_per_second": 540.335, |
|
"eval_steps_per_second": 2.112, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.001, |
|
"loss": 0.7529, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.001, |
|
"loss": 0.7405, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.001, |
|
"loss": 0.7443, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 0.001, |
|
"loss": 0.7358, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.001, |
|
"loss": 0.7161, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 0.001, |
|
"loss": 0.7292, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 0.001, |
|
"loss": 0.7187, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 0.001, |
|
"loss": 0.7228, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 0.001, |
|
"loss": 0.7212, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 0.001, |
|
"loss": 0.7183, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 0.001, |
|
"loss": 0.7293, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 0.001, |
|
"loss": 0.7164, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 0.001, |
|
"loss": 0.7169, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 0.001, |
|
"loss": 0.7262, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 0.001, |
|
"loss": 0.7206, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 0.001, |
|
"loss": 0.7075, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 0.001, |
|
"loss": 0.7111, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 0.001, |
|
"loss": 0.708, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 0.001, |
|
"loss": 0.7205, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 0.001, |
|
"loss": 0.716, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 0.001, |
|
"loss": 0.7092, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 0.001, |
|
"loss": 0.7145, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 0.001, |
|
"loss": 0.7079, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 0.001, |
|
"loss": 0.7091, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 0.001, |
|
"loss": 0.7069, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_loss": 1.0125058889389038, |
|
"eval_runtime": 226.3642, |
|
"eval_samples_per_second": 540.174, |
|
"eval_steps_per_second": 2.112, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 0.001, |
|
"loss": 0.7227, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 0.001, |
|
"loss": 0.7131, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 0.001, |
|
"loss": 0.718, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 0.001, |
|
"loss": 0.7079, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 0.001, |
|
"loss": 0.7029, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 0.001, |
|
"loss": 0.7052, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 0.001, |
|
"loss": 0.7023, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 0.001, |
|
"loss": 0.6946, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 0.001, |
|
"loss": 0.7007, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 0.001, |
|
"loss": 0.7065, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 0.001, |
|
"loss": 0.7129, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 0.001, |
|
"loss": 0.7115, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 0.001, |
|
"loss": 0.7002, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 0.001, |
|
"loss": 0.7048, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 0.001, |
|
"loss": 0.7051, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 0.001, |
|
"loss": 0.6826, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.001, |
|
"loss": 0.707, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 0.001, |
|
"loss": 0.6763, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 0.001, |
|
"loss": 0.6645, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 0.001, |
|
"loss": 0.6786, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 0.001, |
|
"loss": 0.6786, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 0.001, |
|
"loss": 0.6867, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 0.001, |
|
"loss": 0.6748, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 0.001, |
|
"loss": 0.6754, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 0.001, |
|
"loss": 0.6766, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"eval_loss": 1.0152596235275269, |
|
"eval_runtime": 226.4668, |
|
"eval_samples_per_second": 539.929, |
|
"eval_steps_per_second": 2.111, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 0.001, |
|
"loss": 0.6852, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 0.001, |
|
"loss": 0.6736, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 0.001, |
|
"loss": 0.6859, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 0.001, |
|
"loss": 0.6754, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 0.001, |
|
"loss": 0.6789, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 0.001, |
|
"loss": 0.6725, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 0.001, |
|
"loss": 0.6826, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 0.001, |
|
"loss": 0.6852, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 0.001, |
|
"loss": 0.6767, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 0.001, |
|
"loss": 0.6584, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 0.001, |
|
"loss": 0.6642, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 0.001, |
|
"loss": 0.6732, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 0.001, |
|
"loss": 0.6832, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 0.001, |
|
"loss": 0.6746, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 0.001, |
|
"loss": 0.6803, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 0.001, |
|
"loss": 0.6753, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 0.001, |
|
"loss": 0.6901, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 0.001, |
|
"loss": 0.6717, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 0.001, |
|
"loss": 0.6761, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 0.001, |
|
"loss": 0.6698, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 0.001, |
|
"loss": 0.6737, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 0.001, |
|
"loss": 0.6832, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 0.001, |
|
"loss": 0.6699, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 0.001, |
|
"loss": 0.6686, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 0.001, |
|
"loss": 0.6624, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"eval_loss": 0.9890710711479187, |
|
"eval_runtime": 226.6603, |
|
"eval_samples_per_second": 539.468, |
|
"eval_steps_per_second": 2.109, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 0.001, |
|
"loss": 0.6739, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 0.001, |
|
"loss": 0.672, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 0.001, |
|
"loss": 0.6728, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 0.001, |
|
"loss": 0.6767, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 0.001, |
|
"loss": 0.6808, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.001, |
|
"loss": 0.6644, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 0.001, |
|
"loss": 0.6459, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 0.001, |
|
"loss": 0.6548, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 0.001, |
|
"loss": 0.6486, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 0.001, |
|
"loss": 0.6516, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 0.001, |
|
"loss": 0.6533, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 0.001, |
|
"loss": 0.6496, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 0.001, |
|
"loss": 0.6478, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 0.001, |
|
"loss": 0.6498, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 0.001, |
|
"loss": 0.6345, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 0.001, |
|
"loss": 0.6451, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 0.001, |
|
"loss": 0.6465, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 0.001, |
|
"loss": 0.643, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 0.001, |
|
"loss": 0.6573, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 0.001, |
|
"loss": 0.6473, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 0.001, |
|
"loss": 0.6523, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 0.001, |
|
"loss": 0.6507, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 0.001, |
|
"loss": 0.6478, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 0.001, |
|
"loss": 0.658, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 0.001, |
|
"loss": 0.6458, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"eval_loss": 0.9963102340698242, |
|
"eval_runtime": 226.814, |
|
"eval_samples_per_second": 539.103, |
|
"eval_steps_per_second": 2.107, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 0.001, |
|
"loss": 0.6567, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 0.001, |
|
"loss": 0.6474, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 0.001, |
|
"loss": 0.6518, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 0.001, |
|
"loss": 0.6509, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 0.001, |
|
"loss": 0.6512, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 0.001, |
|
"loss": 0.6501, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 0.001, |
|
"loss": 0.65, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 0.001, |
|
"loss": 0.6505, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 0.001, |
|
"loss": 0.6426, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 0.001, |
|
"loss": 0.6593, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 0.001, |
|
"loss": 0.6488, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 0.001, |
|
"loss": 0.6402, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 0.001, |
|
"loss": 0.6515, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 0.001, |
|
"loss": 0.6453, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 0.001, |
|
"loss": 0.6581, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 0.001, |
|
"loss": 0.6408, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 0.001, |
|
"loss": 0.6452, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 0.001, |
|
"loss": 0.6445, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 0.001, |
|
"loss": 0.6455, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.001, |
|
"loss": 0.6505, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 0.001, |
|
"loss": 0.6246, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 0.001, |
|
"loss": 0.6119, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 0.001, |
|
"loss": 0.622, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 0.001, |
|
"loss": 0.6278, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 0.001, |
|
"loss": 0.6233, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"eval_loss": 0.9902318716049194, |
|
"eval_runtime": 227.3177, |
|
"eval_samples_per_second": 537.908, |
|
"eval_steps_per_second": 2.103, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 0.001, |
|
"loss": 0.6309, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 0.001, |
|
"loss": 0.6308, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 0.001, |
|
"loss": 0.6173, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 5.23, |
|
"learning_rate": 0.001, |
|
"loss": 0.6188, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 0.001, |
|
"loss": 0.6412, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 0.001, |
|
"loss": 0.6232, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 0.001, |
|
"loss": 0.6393, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 0.001, |
|
"loss": 0.6305, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 0.001, |
|
"loss": 0.632, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 0.001, |
|
"loss": 0.6308, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 0.001, |
|
"loss": 0.6307, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 0.001, |
|
"loss": 0.6321, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 0.001, |
|
"loss": 0.6337, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 0.001, |
|
"loss": 0.6281, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 0.001, |
|
"loss": 0.633, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 0.001, |
|
"loss": 0.6208, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 0.001, |
|
"loss": 0.6217, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 0.001, |
|
"loss": 0.6229, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"learning_rate": 0.001, |
|
"loss": 0.6253, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 0.001, |
|
"loss": 0.6315, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 0.001, |
|
"loss": 0.6354, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 0.001, |
|
"loss": 0.6314, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 0.001, |
|
"loss": 0.6346, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 0.001, |
|
"loss": 0.6293, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 0.001, |
|
"loss": 0.6393, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"eval_loss": 0.9893815517425537, |
|
"eval_runtime": 226.9791, |
|
"eval_samples_per_second": 538.71, |
|
"eval_steps_per_second": 2.106, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 0.001, |
|
"loss": 0.629, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"learning_rate": 0.001, |
|
"loss": 0.6381, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 0.001, |
|
"loss": 0.6327, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 0.001, |
|
"loss": 0.6272, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 0.001, |
|
"loss": 0.6266, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 0.001, |
|
"loss": 0.628, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 0.001, |
|
"loss": 0.6211, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"learning_rate": 0.001, |
|
"loss": 0.628, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 0.001, |
|
"loss": 0.6351, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 0.001, |
|
"loss": 0.6172, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 0.001, |
|
"loss": 0.5967, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 0.001, |
|
"loss": 0.603, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 0.001, |
|
"loss": 0.614, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 0.001, |
|
"loss": 0.6204, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 0.001, |
|
"loss": 0.6075, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"learning_rate": 0.001, |
|
"loss": 0.612, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 0.001, |
|
"loss": 0.6184, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"learning_rate": 0.001, |
|
"loss": 0.6062, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 0.001, |
|
"loss": 0.6146, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"learning_rate": 0.001, |
|
"loss": 0.6172, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 0.001, |
|
"loss": 0.6066, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 0.001, |
|
"loss": 0.6185, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"learning_rate": 0.001, |
|
"loss": 0.6121, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 0.001, |
|
"loss": 0.6054, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 0.001, |
|
"loss": 0.6035, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"eval_loss": 0.9930422902107239, |
|
"eval_runtime": 227.4872, |
|
"eval_samples_per_second": 537.507, |
|
"eval_steps_per_second": 2.101, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 0.001, |
|
"loss": 0.6172, |
|
"step": 50200 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"learning_rate": 0.001, |
|
"loss": 0.6118, |
|
"step": 50400 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"learning_rate": 0.001, |
|
"loss": 0.6193, |
|
"step": 50600 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"learning_rate": 0.001, |
|
"loss": 0.6235, |
|
"step": 50800 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 0.001, |
|
"loss": 0.6267, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 0.001, |
|
"loss": 0.6125, |
|
"step": 51200 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"learning_rate": 0.001, |
|
"loss": 0.6075, |
|
"step": 51400 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 0.001, |
|
"loss": 0.6147, |
|
"step": 51600 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"learning_rate": 0.001, |
|
"loss": 0.6022, |
|
"step": 51800 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"learning_rate": 0.001, |
|
"loss": 0.6039, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 6.69, |
|
"learning_rate": 0.001, |
|
"loss": 0.6087, |
|
"step": 52200 |
|
}, |
|
{ |
|
"epoch": 6.71, |
|
"learning_rate": 0.001, |
|
"loss": 0.611, |
|
"step": 52400 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 0.001, |
|
"loss": 0.6122, |
|
"step": 52600 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 0.001, |
|
"loss": 0.6038, |
|
"step": 52800 |
|
}, |
|
{ |
|
"epoch": 6.79, |
|
"learning_rate": 0.001, |
|
"loss": 0.617, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 6.82, |
|
"learning_rate": 0.001, |
|
"loss": 0.6068, |
|
"step": 53200 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 0.001, |
|
"loss": 0.6186, |
|
"step": 53400 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"learning_rate": 0.001, |
|
"loss": 0.6131, |
|
"step": 53600 |
|
}, |
|
{ |
|
"epoch": 6.89, |
|
"learning_rate": 0.001, |
|
"loss": 0.612, |
|
"step": 53800 |
|
}, |
|
{ |
|
"epoch": 6.92, |
|
"learning_rate": 0.001, |
|
"loss": 0.6101, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"learning_rate": 0.001, |
|
"loss": 0.6087, |
|
"step": 54200 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"learning_rate": 0.001, |
|
"loss": 0.6093, |
|
"step": 54400 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 0.001, |
|
"loss": 0.6139, |
|
"step": 54600 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 0.001, |
|
"loss": 0.6001, |
|
"step": 54800 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 0.001, |
|
"loss": 0.5916, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"eval_loss": 0.9893601536750793, |
|
"eval_runtime": 227.4715, |
|
"eval_samples_per_second": 537.544, |
|
"eval_steps_per_second": 2.101, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 0.001, |
|
"loss": 0.5944, |
|
"step": 55200 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 0.001, |
|
"loss": 0.59, |
|
"step": 55400 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"learning_rate": 0.001, |
|
"loss": 0.5999, |
|
"step": 55600 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"learning_rate": 0.001, |
|
"loss": 0.5887, |
|
"step": 55800 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"learning_rate": 0.001, |
|
"loss": 0.5939, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 0.001, |
|
"loss": 0.5941, |
|
"step": 56200 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"learning_rate": 0.001, |
|
"loss": 0.6025, |
|
"step": 56400 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 0.001, |
|
"loss": 0.5955, |
|
"step": 56600 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 0.001, |
|
"loss": 0.6038, |
|
"step": 56800 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 0.001, |
|
"loss": 0.5878, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"learning_rate": 0.001, |
|
"loss": 0.5974, |
|
"step": 57200 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"learning_rate": 0.001, |
|
"loss": 0.6144, |
|
"step": 57400 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"learning_rate": 0.001, |
|
"loss": 0.5916, |
|
"step": 57600 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 0.001, |
|
"loss": 0.5897, |
|
"step": 57800 |
|
}, |
|
{ |
|
"epoch": 7.43, |
|
"learning_rate": 0.001, |
|
"loss": 0.5954, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"learning_rate": 0.001, |
|
"loss": 0.5975, |
|
"step": 58200 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 0.001, |
|
"loss": 0.5953, |
|
"step": 58400 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 0.001, |
|
"loss": 0.5967, |
|
"step": 58600 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"learning_rate": 0.001, |
|
"loss": 0.5988, |
|
"step": 58800 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"learning_rate": 0.001, |
|
"loss": 0.6058, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"learning_rate": 0.001, |
|
"loss": 0.6007, |
|
"step": 59200 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"learning_rate": 0.001, |
|
"loss": 0.606, |
|
"step": 59400 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"learning_rate": 0.001, |
|
"loss": 0.5885, |
|
"step": 59600 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 0.001, |
|
"loss": 0.5968, |
|
"step": 59800 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 0.001, |
|
"loss": 0.6073, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"eval_loss": 0.9817278981208801, |
|
"eval_runtime": 227.2904, |
|
"eval_samples_per_second": 537.973, |
|
"eval_steps_per_second": 2.103, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"learning_rate": 0.001, |
|
"loss": 0.5998, |
|
"step": 60200 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 0.001, |
|
"loss": 0.5959, |
|
"step": 60400 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"learning_rate": 0.001, |
|
"loss": 0.5946, |
|
"step": 60600 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 0.001, |
|
"loss": 0.601, |
|
"step": 60800 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"learning_rate": 0.001, |
|
"loss": 0.5963, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"learning_rate": 0.001, |
|
"loss": 0.5996, |
|
"step": 61200 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"learning_rate": 0.001, |
|
"loss": 0.5981, |
|
"step": 61400 |
|
}, |
|
{ |
|
"epoch": 7.89, |
|
"learning_rate": 0.001, |
|
"loss": 0.5945, |
|
"step": 61600 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"learning_rate": 0.001, |
|
"loss": 0.6057, |
|
"step": 61800 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"learning_rate": 0.001, |
|
"loss": 0.5952, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 0.001, |
|
"loss": 0.6012, |
|
"step": 62200 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"learning_rate": 0.001, |
|
"loss": 0.5979, |
|
"step": 62400 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 0.001, |
|
"loss": 0.5965, |
|
"step": 62600 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 0.001, |
|
"loss": 0.5767, |
|
"step": 62800 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 0.001, |
|
"loss": 0.5697, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 0.001, |
|
"loss": 0.5761, |
|
"step": 63200 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"learning_rate": 0.001, |
|
"loss": 0.5808, |
|
"step": 63400 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 0.001, |
|
"loss": 0.5734, |
|
"step": 63600 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"learning_rate": 0.001, |
|
"loss": 0.5767, |
|
"step": 63800 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 0.001, |
|
"loss": 0.5874, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 0.001, |
|
"loss": 0.5773, |
|
"step": 64200 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"learning_rate": 0.001, |
|
"loss": 0.587, |
|
"step": 64400 |
|
}, |
|
{ |
|
"epoch": 8.28, |
|
"learning_rate": 0.001, |
|
"loss": 0.5882, |
|
"step": 64600 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 0.001, |
|
"loss": 0.5815, |
|
"step": 64800 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 0.001, |
|
"loss": 0.5928, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"eval_loss": 0.9951310753822327, |
|
"eval_runtime": 227.2886, |
|
"eval_samples_per_second": 537.977, |
|
"eval_steps_per_second": 2.103, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"learning_rate": 0.001, |
|
"loss": 0.5827, |
|
"step": 65200 |
|
}, |
|
{ |
|
"epoch": 8.38, |
|
"learning_rate": 0.001, |
|
"loss": 0.5733, |
|
"step": 65400 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 0.001, |
|
"loss": 0.5889, |
|
"step": 65600 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"learning_rate": 0.001, |
|
"loss": 0.589, |
|
"step": 65800 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"learning_rate": 0.001, |
|
"loss": 0.5902, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 0.001, |
|
"loss": 0.5899, |
|
"step": 66200 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 0.001, |
|
"loss": 0.586, |
|
"step": 66400 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"learning_rate": 0.001, |
|
"loss": 0.585, |
|
"step": 66600 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"learning_rate": 0.001, |
|
"loss": 0.5968, |
|
"step": 66800 |
|
}, |
|
{ |
|
"epoch": 8.58, |
|
"learning_rate": 0.001, |
|
"loss": 0.5916, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 0.001, |
|
"loss": 0.5773, |
|
"step": 67200 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 0.001, |
|
"loss": 0.5737, |
|
"step": 67400 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"learning_rate": 0.001, |
|
"loss": 0.5806, |
|
"step": 67600 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"learning_rate": 0.001, |
|
"loss": 0.5891, |
|
"step": 67800 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"learning_rate": 0.001, |
|
"loss": 0.5984, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"learning_rate": 0.001, |
|
"loss": 0.599, |
|
"step": 68200 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"learning_rate": 0.001, |
|
"loss": 0.5892, |
|
"step": 68400 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 0.001, |
|
"loss": 0.5785, |
|
"step": 68600 |
|
}, |
|
{ |
|
"epoch": 8.81, |
|
"learning_rate": 0.001, |
|
"loss": 0.5892, |
|
"step": 68800 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"learning_rate": 0.001, |
|
"loss": 0.5812, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 8.87, |
|
"learning_rate": 0.001, |
|
"loss": 0.5943, |
|
"step": 69200 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 0.001, |
|
"loss": 0.5926, |
|
"step": 69400 |
|
}, |
|
{ |
|
"epoch": 8.92, |
|
"learning_rate": 0.001, |
|
"loss": 0.5798, |
|
"step": 69600 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"learning_rate": 0.001, |
|
"loss": 0.5911, |
|
"step": 69800 |
|
}, |
|
{ |
|
"epoch": 8.97, |
|
"learning_rate": 0.001, |
|
"loss": 0.5846, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 8.97, |
|
"eval_loss": 0.9769449234008789, |
|
"eval_runtime": 227.0836, |
|
"eval_samples_per_second": 538.462, |
|
"eval_steps_per_second": 2.105, |
|
"step": 70000 |
|
} |
|
], |
|
"max_steps": 78050, |
|
"num_train_epochs": 10, |
|
"total_flos": 9.84005748927529e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|