HebQwen / checkpoint-45 /trainer_state.json
asafd60's picture
Initial model upload
3c6b9cb verified
{
"best_metric": 0.50872844,
"best_model_checkpoint": "/content/output/qwen2-vl-7b-instruct/v2-20240909-100714/checkpoint-45",
"epoch": 0.993103448275862,
"eval_steps": 50,
"global_step": 45,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"acc": 0.61398816,
"epoch": 0.022068965517241378,
"grad_norm": 7.717197895050049,
"learning_rate": 3.3333333333333335e-05,
"loss": 2.87438893,
"memory(GiB)": 20.29,
"step": 1,
"train_speed(iter/s)": 0.066629
},
{
"acc": 0.62096363,
"epoch": 0.1103448275862069,
"grad_norm": 8.616560935974121,
"learning_rate": 9.944154131125642e-05,
"loss": 2.60930538,
"memory(GiB)": 22.58,
"step": 5,
"train_speed(iter/s)": 0.075138
},
{
"acc": 0.80472717,
"epoch": 0.2206896551724138,
"grad_norm": 3.399202346801758,
"learning_rate": 9.330127018922194e-05,
"loss": 0.68508205,
"memory(GiB)": 23.35,
"step": 10,
"train_speed(iter/s)": 0.076348
},
{
"acc": 0.81436024,
"epoch": 0.3310344827586207,
"grad_norm": 5.226686477661133,
"learning_rate": 8.117449009293668e-05,
"loss": 0.69332366,
"memory(GiB)": 24.12,
"step": 15,
"train_speed(iter/s)": 0.076737
},
{
"acc": 0.86372032,
"epoch": 0.4413793103448276,
"grad_norm": 3.478239059448242,
"learning_rate": 6.473775872054521e-05,
"loss": 0.57136168,
"memory(GiB)": 24.12,
"step": 20,
"train_speed(iter/s)": 0.076946
},
{
"acc": 0.85740089,
"epoch": 0.5517241379310345,
"grad_norm": 2.1063661575317383,
"learning_rate": 4.626349532067879e-05,
"loss": 0.51958747,
"memory(GiB)": 24.91,
"step": 25,
"train_speed(iter/s)": 0.077061
},
{
"acc": 0.85881948,
"epoch": 0.6620689655172414,
"grad_norm": 2.6917998790740967,
"learning_rate": 2.8305813044122097e-05,
"loss": 0.51737795,
"memory(GiB)": 24.91,
"step": 30,
"train_speed(iter/s)": 0.077142
},
{
"acc": 0.84086313,
"epoch": 0.7724137931034483,
"grad_norm": 7.916348934173584,
"learning_rate": 1.3347406408508695e-05,
"loss": 0.72901492,
"memory(GiB)": 24.91,
"step": 35,
"train_speed(iter/s)": 0.077198
},
{
"acc": 0.86680059,
"epoch": 0.8827586206896552,
"grad_norm": 2.948944568634033,
"learning_rate": 3.4563125677897932e-06,
"loss": 0.43249173,
"memory(GiB)": 24.91,
"step": 40,
"train_speed(iter/s)": 0.077243
},
{
"acc": 0.91657734,
"epoch": 0.993103448275862,
"grad_norm": 2.6367099285125732,
"learning_rate": 0.0,
"loss": 0.34231672,
"memory(GiB)": 24.91,
"step": 45,
"train_speed(iter/s)": 0.077273
},
{
"epoch": 0.993103448275862,
"eval_acc": 0.8181818181818182,
"eval_loss": 0.5087284445762634,
"eval_runtime": 3.4104,
"eval_samples_per_second": 2.053,
"eval_steps_per_second": 2.053,
"step": 45
}
],
"logging_steps": 5,
"max_steps": 45,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4.488065503664026e+16,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}