|
{ |
|
"best_metric": 0.8136950731277466, |
|
"best_model_checkpoint": "./lora-alpaca/checkpoint-800", |
|
"epoch": 2.0499679692504804, |
|
"global_step": 800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1e-05, |
|
"loss": 2.0409, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2e-05, |
|
"loss": 2.1188, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3e-05, |
|
"loss": 2.0143, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4e-05, |
|
"loss": 1.8108, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4764, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 6e-05, |
|
"loss": 1.1516, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 6.9e-05, |
|
"loss": 0.9453, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 7.900000000000001e-05, |
|
"loss": 0.8503, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 8.900000000000001e-05, |
|
"loss": 0.8352, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.900000000000001e-05, |
|
"loss": 0.7635, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.91588785046729e-05, |
|
"loss": 0.8399, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 9.822429906542057e-05, |
|
"loss": 0.7683, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 9.728971962616824e-05, |
|
"loss": 0.7774, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 9.635514018691589e-05, |
|
"loss": 0.7912, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 9.542056074766356e-05, |
|
"loss": 0.7262, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 9.448598130841123e-05, |
|
"loss": 0.8167, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 9.355140186915888e-05, |
|
"loss": 0.7634, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 9.261682242990655e-05, |
|
"loss": 0.7666, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 9.168224299065422e-05, |
|
"loss": 0.7943, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.074766355140187e-05, |
|
"loss": 0.7366, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 0.8413083553314209, |
|
"eval_runtime": 68.432, |
|
"eval_samples_per_second": 29.226, |
|
"eval_steps_per_second": 0.468, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 8.981308411214954e-05, |
|
"loss": 0.8029, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.88785046728972e-05, |
|
"loss": 0.7383, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.794392523364486e-05, |
|
"loss": 0.7692, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 8.700934579439253e-05, |
|
"loss": 0.7673, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 8.607476635514018e-05, |
|
"loss": 0.7129, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 8.514018691588785e-05, |
|
"loss": 0.7913, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 8.420560747663552e-05, |
|
"loss": 0.7432, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 8.327102803738318e-05, |
|
"loss": 0.7617, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 8.233644859813084e-05, |
|
"loss": 0.7783, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 8.140186915887851e-05, |
|
"loss": 0.7146, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 8.046728971962617e-05, |
|
"loss": 0.7992, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 7.953271028037383e-05, |
|
"loss": 0.738, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.85981308411215e-05, |
|
"loss": 0.7477, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.766355140186916e-05, |
|
"loss": 0.7698, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 7.672897196261682e-05, |
|
"loss": 0.714, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 7.579439252336449e-05, |
|
"loss": 0.7939, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 7.485981308411215e-05, |
|
"loss": 0.7394, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 7.392523364485982e-05, |
|
"loss": 0.7481, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 7.299065420560748e-05, |
|
"loss": 0.7446, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 7.205607476635514e-05, |
|
"loss": 0.7805, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_loss": 0.8240922093391418, |
|
"eval_runtime": 68.3178, |
|
"eval_samples_per_second": 29.275, |
|
"eval_steps_per_second": 0.468, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 7.11214953271028e-05, |
|
"loss": 0.7309, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 7.018691588785047e-05, |
|
"loss": 0.7595, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 6.925233644859813e-05, |
|
"loss": 0.7693, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 6.83177570093458e-05, |
|
"loss": 0.7108, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 6.738317757009346e-05, |
|
"loss": 0.7988, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 6.644859813084112e-05, |
|
"loss": 0.7316, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 6.551401869158879e-05, |
|
"loss": 0.7518, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 6.457943925233646e-05, |
|
"loss": 0.7579, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 6.364485981308411e-05, |
|
"loss": 0.7115, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 6.271028037383178e-05, |
|
"loss": 0.7861, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 6.177570093457945e-05, |
|
"loss": 0.7292, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 6.08411214953271e-05, |
|
"loss": 0.7471, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 5.990654205607477e-05, |
|
"loss": 0.7458, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 5.897196261682243e-05, |
|
"loss": 0.7164, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 5.803738317757009e-05, |
|
"loss": 0.7867, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5.710280373831776e-05, |
|
"loss": 0.7165, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 5.616822429906542e-05, |
|
"loss": 0.7424, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 5.523364485981308e-05, |
|
"loss": 0.7438, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 5.429906542056075e-05, |
|
"loss": 0.702, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 5.336448598130841e-05, |
|
"loss": 0.7791, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"eval_loss": 0.8157330751419067, |
|
"eval_runtime": 68.2707, |
|
"eval_samples_per_second": 29.295, |
|
"eval_steps_per_second": 0.469, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 5.242990654205607e-05, |
|
"loss": 0.7248, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 5.1495327102803733e-05, |
|
"loss": 0.7558, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 5.05607476635514e-05, |
|
"loss": 0.7438, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 4.962616822429907e-05, |
|
"loss": 0.7094, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 4.869158878504673e-05, |
|
"loss": 0.7733, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 4.77570093457944e-05, |
|
"loss": 0.7162, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 4.682242990654206e-05, |
|
"loss": 0.7397, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 4.588785046728972e-05, |
|
"loss": 0.7579, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 4.495327102803738e-05, |
|
"loss": 0.7055, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 4.401869158878505e-05, |
|
"loss": 0.7685, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 4.308411214953271e-05, |
|
"loss": 0.7266, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 4.2149532710280373e-05, |
|
"loss": 0.7469, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 4.121495327102804e-05, |
|
"loss": 0.7559, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 4.02803738317757e-05, |
|
"loss": 0.6953, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 3.9345794392523364e-05, |
|
"loss": 0.7702, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 3.841121495327103e-05, |
|
"loss": 0.7111, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 3.7476635514018693e-05, |
|
"loss": 0.7348, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3.6542056074766355e-05, |
|
"loss": 0.7146, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 3.560747663551402e-05, |
|
"loss": 0.7753, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 3.4672897196261684e-05, |
|
"loss": 0.717, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"eval_loss": 0.8136950731277466, |
|
"eval_runtime": 68.3249, |
|
"eval_samples_per_second": 29.272, |
|
"eval_steps_per_second": 0.468, |
|
"step": 800 |
|
} |
|
], |
|
"max_steps": 1170, |
|
"num_train_epochs": 3, |
|
"total_flos": 5.536929232204071e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|