ivangrapher's picture
Training in progress, step 30, checkpoint
a74acbf verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.13872832369942195,
"eval_steps": 8,
"global_step": 30,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.004624277456647399,
"eval_loss": 8.074233055114746,
"eval_runtime": 11.9008,
"eval_samples_per_second": 7.731,
"eval_steps_per_second": 3.865,
"step": 1
},
{
"epoch": 0.013872832369942197,
"grad_norm": 11.582316398620605,
"learning_rate": 6e-05,
"loss": 7.5988,
"step": 3
},
{
"epoch": 0.027745664739884393,
"grad_norm": 12.307031631469727,
"learning_rate": 0.00012,
"loss": 8.0135,
"step": 6
},
{
"epoch": 0.03699421965317919,
"eval_loss": 3.775078058242798,
"eval_runtime": 12.045,
"eval_samples_per_second": 7.638,
"eval_steps_per_second": 3.819,
"step": 8
},
{
"epoch": 0.04161849710982659,
"grad_norm": 12.446931838989258,
"learning_rate": 0.00018,
"loss": 5.2833,
"step": 9
},
{
"epoch": 0.055491329479768786,
"grad_norm": 4.806489944458008,
"learning_rate": 0.00019510565162951537,
"loss": 1.8637,
"step": 12
},
{
"epoch": 0.06936416184971098,
"grad_norm": 8.42613697052002,
"learning_rate": 0.00017071067811865476,
"loss": 1.2632,
"step": 15
},
{
"epoch": 0.07398843930635839,
"eval_loss": 1.535495638847351,
"eval_runtime": 12.1233,
"eval_samples_per_second": 7.589,
"eval_steps_per_second": 3.794,
"step": 16
},
{
"epoch": 0.08323699421965318,
"grad_norm": 7.994266510009766,
"learning_rate": 0.00013090169943749476,
"loss": 1.8054,
"step": 18
},
{
"epoch": 0.09710982658959537,
"grad_norm": 6.297595500946045,
"learning_rate": 8.435655349597689e-05,
"loss": 0.9978,
"step": 21
},
{
"epoch": 0.11098265895953757,
"grad_norm": 5.638557434082031,
"learning_rate": 4.12214747707527e-05,
"loss": 1.1281,
"step": 24
},
{
"epoch": 0.11098265895953757,
"eval_loss": 1.1503130197525024,
"eval_runtime": 12.1214,
"eval_samples_per_second": 7.59,
"eval_steps_per_second": 3.795,
"step": 24
},
{
"epoch": 0.12485549132947976,
"grad_norm": 3.9357333183288574,
"learning_rate": 1.0899347581163221e-05,
"loss": 0.9166,
"step": 27
},
{
"epoch": 0.13872832369942195,
"grad_norm": 4.9822821617126465,
"learning_rate": 0.0,
"loss": 0.6963,
"step": 30
}
],
"logging_steps": 3,
"max_steps": 30,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 70,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5084301722910720.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}