RedPajama-INCITE-Base-3B-v1-wikipedia-8bit
/
flytei5n1f0xr
/local_flytekit
/12359fa3e845f047743a0b5c420028f3
/trainer_state.json
{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 400.0, | |
"global_step": 600, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 6.67, | |
"learning_rate": 1.4388747994087888e-05, | |
"loss": 1.9989, | |
"step": 10 | |
}, | |
{ | |
"epoch": 13.33, | |
"learning_rate": 2e-05, | |
"loss": 1.1205, | |
"step": 20 | |
}, | |
{ | |
"epoch": 20.0, | |
"learning_rate": 2e-05, | |
"loss": 0.2458, | |
"step": 30 | |
}, | |
{ | |
"epoch": 26.67, | |
"learning_rate": 2e-05, | |
"loss": 0.0727, | |
"step": 40 | |
}, | |
{ | |
"epoch": 33.33, | |
"learning_rate": 2e-05, | |
"loss": 0.0478, | |
"step": 50 | |
}, | |
{ | |
"epoch": 40.0, | |
"learning_rate": 2e-05, | |
"loss": 0.0351, | |
"step": 60 | |
}, | |
{ | |
"epoch": 46.67, | |
"learning_rate": 2e-05, | |
"loss": 0.0258, | |
"step": 70 | |
}, | |
{ | |
"epoch": 53.33, | |
"learning_rate": 2e-05, | |
"loss": 0.0196, | |
"step": 80 | |
}, | |
{ | |
"epoch": 60.0, | |
"learning_rate": 2e-05, | |
"loss": 0.0158, | |
"step": 90 | |
}, | |
{ | |
"epoch": 66.67, | |
"learning_rate": 2e-05, | |
"loss": 0.0132, | |
"step": 100 | |
}, | |
{ | |
"epoch": 73.33, | |
"learning_rate": 2e-05, | |
"loss": 0.0112, | |
"step": 110 | |
}, | |
{ | |
"epoch": 80.0, | |
"learning_rate": 2e-05, | |
"loss": 0.0099, | |
"step": 120 | |
}, | |
{ | |
"epoch": 86.67, | |
"learning_rate": 2e-05, | |
"loss": 0.009, | |
"step": 130 | |
}, | |
{ | |
"epoch": 93.33, | |
"learning_rate": 2e-05, | |
"loss": 0.0077, | |
"step": 140 | |
}, | |
{ | |
"epoch": 100.0, | |
"learning_rate": 2e-05, | |
"loss": 0.0073, | |
"step": 150 | |
}, | |
{ | |
"epoch": 106.67, | |
"learning_rate": 2e-05, | |
"loss": 0.0068, | |
"step": 160 | |
}, | |
{ | |
"epoch": 113.33, | |
"learning_rate": 2e-05, | |
"loss": 0.0064, | |
"step": 170 | |
}, | |
{ | |
"epoch": 120.0, | |
"learning_rate": 2e-05, | |
"loss": 0.0062, | |
"step": 180 | |
}, | |
{ | |
"epoch": 126.67, | |
"learning_rate": 2e-05, | |
"loss": 0.0057, | |
"step": 190 | |
}, | |
{ | |
"epoch": 133.33, | |
"learning_rate": 2e-05, | |
"loss": 0.0056, | |
"step": 200 | |
}, | |
{ | |
"epoch": 140.0, | |
"learning_rate": 2e-05, | |
"loss": 0.0054, | |
"step": 210 | |
}, | |
{ | |
"epoch": 146.67, | |
"learning_rate": 2e-05, | |
"loss": 0.005, | |
"step": 220 | |
}, | |
{ | |
"epoch": 153.33, | |
"learning_rate": 2e-05, | |
"loss": 0.005, | |
"step": 230 | |
}, | |
{ | |
"epoch": 160.0, | |
"learning_rate": 2e-05, | |
"loss": 0.0047, | |
"step": 240 | |
}, | |
{ | |
"epoch": 166.67, | |
"learning_rate": 2e-05, | |
"loss": 0.0045, | |
"step": 250 | |
}, | |
{ | |
"epoch": 173.33, | |
"learning_rate": 2e-05, | |
"loss": 0.0046, | |
"step": 260 | |
}, | |
{ | |
"epoch": 180.0, | |
"learning_rate": 2e-05, | |
"loss": 0.0044, | |
"step": 270 | |
}, | |
{ | |
"epoch": 186.67, | |
"learning_rate": 2e-05, | |
"loss": 0.0042, | |
"step": 280 | |
}, | |
{ | |
"epoch": 193.33, | |
"learning_rate": 2e-05, | |
"loss": 0.0043, | |
"step": 290 | |
}, | |
{ | |
"epoch": 200.0, | |
"learning_rate": 2e-05, | |
"loss": 0.0043, | |
"step": 300 | |
}, | |
{ | |
"epoch": 206.67, | |
"learning_rate": 2e-05, | |
"loss": 0.0041, | |
"step": 310 | |
}, | |
{ | |
"epoch": 213.33, | |
"learning_rate": 2e-05, | |
"loss": 0.0042, | |
"step": 320 | |
}, | |
{ | |
"epoch": 220.0, | |
"learning_rate": 2e-05, | |
"loss": 0.0041, | |
"step": 330 | |
}, | |
{ | |
"epoch": 226.67, | |
"learning_rate": 2e-05, | |
"loss": 0.0042, | |
"step": 340 | |
}, | |
{ | |
"epoch": 233.33, | |
"learning_rate": 2e-05, | |
"loss": 0.004, | |
"step": 350 | |
}, | |
{ | |
"epoch": 240.0, | |
"learning_rate": 2e-05, | |
"loss": 0.0037, | |
"step": 360 | |
}, | |
{ | |
"epoch": 246.67, | |
"learning_rate": 2e-05, | |
"loss": 0.004, | |
"step": 370 | |
}, | |
{ | |
"epoch": 253.33, | |
"learning_rate": 2e-05, | |
"loss": 0.0039, | |
"step": 380 | |
}, | |
{ | |
"epoch": 260.0, | |
"learning_rate": 2e-05, | |
"loss": 0.0041, | |
"step": 390 | |
}, | |
{ | |
"epoch": 266.67, | |
"learning_rate": 2e-05, | |
"loss": 0.004, | |
"step": 400 | |
}, | |
{ | |
"epoch": 273.33, | |
"learning_rate": 2e-05, | |
"loss": 0.0039, | |
"step": 410 | |
}, | |
{ | |
"epoch": 280.0, | |
"learning_rate": 2e-05, | |
"loss": 0.0038, | |
"step": 420 | |
}, | |
{ | |
"epoch": 286.67, | |
"learning_rate": 2e-05, | |
"loss": 0.0037, | |
"step": 430 | |
}, | |
{ | |
"epoch": 293.33, | |
"learning_rate": 2e-05, | |
"loss": 0.0038, | |
"step": 440 | |
}, | |
{ | |
"epoch": 300.0, | |
"learning_rate": 2e-05, | |
"loss": 0.0039, | |
"step": 450 | |
}, | |
{ | |
"epoch": 306.67, | |
"learning_rate": 2e-05, | |
"loss": 0.0038, | |
"step": 460 | |
}, | |
{ | |
"epoch": 313.33, | |
"learning_rate": 2e-05, | |
"loss": 0.0042, | |
"step": 470 | |
}, | |
{ | |
"epoch": 320.0, | |
"learning_rate": 2e-05, | |
"loss": 0.0037, | |
"step": 480 | |
}, | |
{ | |
"epoch": 326.67, | |
"learning_rate": 2e-05, | |
"loss": 0.0039, | |
"step": 490 | |
}, | |
{ | |
"epoch": 333.33, | |
"learning_rate": 2e-05, | |
"loss": 0.0037, | |
"step": 500 | |
}, | |
{ | |
"epoch": 340.0, | |
"learning_rate": 2e-05, | |
"loss": 0.0039, | |
"step": 510 | |
}, | |
{ | |
"epoch": 346.67, | |
"learning_rate": 2e-05, | |
"loss": 0.0038, | |
"step": 520 | |
}, | |
{ | |
"epoch": 353.33, | |
"learning_rate": 2e-05, | |
"loss": 0.0039, | |
"step": 530 | |
}, | |
{ | |
"epoch": 360.0, | |
"learning_rate": 2e-05, | |
"loss": 0.004, | |
"step": 540 | |
}, | |
{ | |
"epoch": 366.67, | |
"learning_rate": 2e-05, | |
"loss": 0.0041, | |
"step": 550 | |
}, | |
{ | |
"epoch": 373.33, | |
"learning_rate": 2e-05, | |
"loss": 0.004, | |
"step": 560 | |
}, | |
{ | |
"epoch": 380.0, | |
"learning_rate": 2e-05, | |
"loss": 0.0038, | |
"step": 570 | |
}, | |
{ | |
"epoch": 386.67, | |
"learning_rate": 2e-05, | |
"loss": 0.0043, | |
"step": 580 | |
}, | |
{ | |
"epoch": 393.33, | |
"learning_rate": 2e-05, | |
"loss": 0.0044, | |
"step": 590 | |
}, | |
{ | |
"epoch": 400.0, | |
"learning_rate": 2e-05, | |
"loss": 0.0045, | |
"step": 600 | |
}, | |
{ | |
"epoch": 400.0, | |
"step": 600, | |
"total_flos": 252437248081920.0, | |
"train_loss": 0.06393463966126244, | |
"train_runtime": 25615.4629, | |
"train_samples_per_second": 2.998, | |
"train_steps_per_second": 0.023 | |
} | |
], | |
"max_steps": 600, | |
"num_train_epochs": 600, | |
"total_flos": 252437248081920.0, | |
"trial_name": null, | |
"trial_params": null | |
} | |