josesantorcuato's picture
End of training
a483a91 verified
{
"best_metric": 1.0,
"best_model_checkpoint": "videomae-base-finetuned-ucf101-subset/checkpoint-266",
"epoch": 9.1,
"eval_steps": 500,
"global_step": 380,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02631578947368421,
"grad_norm": 7.891677379608154,
"learning_rate": 1.3157894736842106e-05,
"loss": 1.3956,
"step": 10
},
{
"epoch": 0.05263157894736842,
"grad_norm": 9.808924674987793,
"learning_rate": 2.6315789473684212e-05,
"loss": 1.3551,
"step": 20
},
{
"epoch": 0.07894736842105263,
"grad_norm": 11.947072982788086,
"learning_rate": 3.9473684210526316e-05,
"loss": 1.3762,
"step": 30
},
{
"epoch": 0.1,
"eval_accuracy": 0.2857142857142857,
"eval_loss": 1.4489834308624268,
"eval_runtime": 6.6934,
"eval_samples_per_second": 2.092,
"eval_steps_per_second": 0.299,
"step": 38
},
{
"epoch": 1.0052631578947369,
"grad_norm": 9.198051452636719,
"learning_rate": 4.970760233918128e-05,
"loss": 1.3077,
"step": 40
},
{
"epoch": 1.0315789473684212,
"grad_norm": 8.386839866638184,
"learning_rate": 4.824561403508772e-05,
"loss": 1.3181,
"step": 50
},
{
"epoch": 1.0578947368421052,
"grad_norm": 7.219974040985107,
"learning_rate": 4.678362573099415e-05,
"loss": 1.3483,
"step": 60
},
{
"epoch": 1.0842105263157895,
"grad_norm": 8.174283027648926,
"learning_rate": 4.5321637426900585e-05,
"loss": 1.2421,
"step": 70
},
{
"epoch": 1.1,
"eval_accuracy": 0.42857142857142855,
"eval_loss": 1.3189738988876343,
"eval_runtime": 1.1969,
"eval_samples_per_second": 11.696,
"eval_steps_per_second": 1.671,
"step": 76
},
{
"epoch": 2.0105263157894737,
"grad_norm": 8.107172012329102,
"learning_rate": 4.3859649122807014e-05,
"loss": 1.192,
"step": 80
},
{
"epoch": 2.036842105263158,
"grad_norm": 4.386706352233887,
"learning_rate": 4.239766081871345e-05,
"loss": 1.1036,
"step": 90
},
{
"epoch": 2.0631578947368423,
"grad_norm": 22.933664321899414,
"learning_rate": 4.093567251461988e-05,
"loss": 0.9115,
"step": 100
},
{
"epoch": 2.0894736842105264,
"grad_norm": 8.911148071289062,
"learning_rate": 3.9473684210526316e-05,
"loss": 0.8753,
"step": 110
},
{
"epoch": 2.1,
"eval_accuracy": 0.5714285714285714,
"eval_loss": 0.9505947828292847,
"eval_runtime": 1.3125,
"eval_samples_per_second": 10.667,
"eval_steps_per_second": 1.524,
"step": 114
},
{
"epoch": 3.0157894736842104,
"grad_norm": 12.133541107177734,
"learning_rate": 3.8011695906432746e-05,
"loss": 0.6292,
"step": 120
},
{
"epoch": 3.042105263157895,
"grad_norm": 3.894150495529175,
"learning_rate": 3.654970760233918e-05,
"loss": 0.4353,
"step": 130
},
{
"epoch": 3.068421052631579,
"grad_norm": 16.60247039794922,
"learning_rate": 3.508771929824561e-05,
"loss": 0.6194,
"step": 140
},
{
"epoch": 3.094736842105263,
"grad_norm": 9.527063369750977,
"learning_rate": 3.362573099415205e-05,
"loss": 0.4285,
"step": 150
},
{
"epoch": 3.1,
"eval_accuracy": 0.7857142857142857,
"eval_loss": 0.5579931139945984,
"eval_runtime": 1.2045,
"eval_samples_per_second": 11.623,
"eval_steps_per_second": 1.66,
"step": 152
},
{
"epoch": 4.021052631578947,
"grad_norm": 10.711992263793945,
"learning_rate": 3.216374269005848e-05,
"loss": 0.2534,
"step": 160
},
{
"epoch": 4.0473684210526315,
"grad_norm": 0.5280627012252808,
"learning_rate": 3.0701754385964913e-05,
"loss": 0.1613,
"step": 170
},
{
"epoch": 4.073684210526316,
"grad_norm": 6.357590675354004,
"learning_rate": 2.9239766081871346e-05,
"loss": 0.2539,
"step": 180
},
{
"epoch": 4.1,
"grad_norm": 10.521815299987793,
"learning_rate": 2.777777777777778e-05,
"loss": 0.3808,
"step": 190
},
{
"epoch": 4.1,
"eval_accuracy": 0.8571428571428571,
"eval_loss": 0.49507784843444824,
"eval_runtime": 1.2129,
"eval_samples_per_second": 11.543,
"eval_steps_per_second": 1.649,
"step": 190
},
{
"epoch": 5.026315789473684,
"grad_norm": 0.37827879190444946,
"learning_rate": 2.6315789473684212e-05,
"loss": 0.0941,
"step": 200
},
{
"epoch": 5.052631578947368,
"grad_norm": 15.41084098815918,
"learning_rate": 2.485380116959064e-05,
"loss": 0.1117,
"step": 210
},
{
"epoch": 5.078947368421052,
"grad_norm": 16.81968879699707,
"learning_rate": 2.3391812865497074e-05,
"loss": 0.1368,
"step": 220
},
{
"epoch": 5.1,
"eval_accuracy": 0.9285714285714286,
"eval_loss": 0.1577732115983963,
"eval_runtime": 1.1295,
"eval_samples_per_second": 12.395,
"eval_steps_per_second": 1.771,
"step": 228
},
{
"epoch": 6.005263157894737,
"grad_norm": 1.9107282161712646,
"learning_rate": 2.1929824561403507e-05,
"loss": 0.1195,
"step": 230
},
{
"epoch": 6.031578947368421,
"grad_norm": 0.2877643406391144,
"learning_rate": 2.046783625730994e-05,
"loss": 0.074,
"step": 240
},
{
"epoch": 6.057894736842106,
"grad_norm": 15.423999786376953,
"learning_rate": 1.9005847953216373e-05,
"loss": 0.3348,
"step": 250
},
{
"epoch": 6.08421052631579,
"grad_norm": 13.305224418640137,
"learning_rate": 1.7543859649122806e-05,
"loss": 0.043,
"step": 260
},
{
"epoch": 6.1,
"eval_accuracy": 1.0,
"eval_loss": 0.04753781110048294,
"eval_runtime": 1.1871,
"eval_samples_per_second": 11.794,
"eval_steps_per_second": 1.685,
"step": 266
},
{
"epoch": 7.010526315789473,
"grad_norm": 1.7253011465072632,
"learning_rate": 1.608187134502924e-05,
"loss": 0.2052,
"step": 270
},
{
"epoch": 7.036842105263158,
"grad_norm": 0.0725923627614975,
"learning_rate": 1.4619883040935673e-05,
"loss": 0.0899,
"step": 280
},
{
"epoch": 7.063157894736842,
"grad_norm": 0.664633572101593,
"learning_rate": 1.3157894736842106e-05,
"loss": 0.0753,
"step": 290
},
{
"epoch": 7.089473684210526,
"grad_norm": 0.12937086820602417,
"learning_rate": 1.1695906432748537e-05,
"loss": 0.0842,
"step": 300
},
{
"epoch": 7.1,
"eval_accuracy": 1.0,
"eval_loss": 0.06243452429771423,
"eval_runtime": 1.1818,
"eval_samples_per_second": 11.847,
"eval_steps_per_second": 1.692,
"step": 304
},
{
"epoch": 8.01578947368421,
"grad_norm": 0.04959488287568092,
"learning_rate": 1.023391812865497e-05,
"loss": 0.0558,
"step": 310
},
{
"epoch": 8.042105263157895,
"grad_norm": 0.07736323028802872,
"learning_rate": 8.771929824561403e-06,
"loss": 0.004,
"step": 320
},
{
"epoch": 8.06842105263158,
"grad_norm": 0.037877339869737625,
"learning_rate": 7.3099415204678366e-06,
"loss": 0.0784,
"step": 330
},
{
"epoch": 8.094736842105263,
"grad_norm": 0.04343694821000099,
"learning_rate": 5.8479532163742686e-06,
"loss": 0.003,
"step": 340
},
{
"epoch": 8.1,
"eval_accuracy": 1.0,
"eval_loss": 0.05573272332549095,
"eval_runtime": 1.2047,
"eval_samples_per_second": 11.621,
"eval_steps_per_second": 1.66,
"step": 342
},
{
"epoch": 9.021052631578947,
"grad_norm": 0.037801120430231094,
"learning_rate": 4.3859649122807014e-06,
"loss": 0.0041,
"step": 350
},
{
"epoch": 9.047368421052632,
"grad_norm": 0.3717033863067627,
"learning_rate": 2.9239766081871343e-06,
"loss": 0.0051,
"step": 360
},
{
"epoch": 9.073684210526316,
"grad_norm": 0.030566079542040825,
"learning_rate": 1.4619883040935671e-06,
"loss": 0.0034,
"step": 370
},
{
"epoch": 9.1,
"grad_norm": 0.05392751097679138,
"learning_rate": 0.0,
"loss": 0.0828,
"step": 380
},
{
"epoch": 9.1,
"eval_accuracy": 1.0,
"eval_loss": 0.04455011337995529,
"eval_runtime": 1.3077,
"eval_samples_per_second": 10.706,
"eval_steps_per_second": 1.529,
"step": 380
},
{
"epoch": 9.1,
"step": 380,
"total_flos": 3.7881039164748595e+18,
"train_loss": 0.47875460998988467,
"train_runtime": 960.3946,
"train_samples_per_second": 3.165,
"train_steps_per_second": 0.396
},
{
"epoch": 9.1,
"eval_accuracy": 0.7857142857142857,
"eval_loss": 0.785201907157898,
"eval_runtime": 15.1964,
"eval_samples_per_second": 1.843,
"eval_steps_per_second": 0.263,
"step": 380
},
{
"epoch": 9.1,
"eval_accuracy": 0.7857142857142857,
"eval_loss": 0.7852017283439636,
"eval_runtime": 2.5877,
"eval_samples_per_second": 10.821,
"eval_steps_per_second": 1.546,
"step": 380
}
],
"logging_steps": 10,
"max_steps": 380,
"num_input_tokens_seen": 0,
"num_train_epochs": 9223372036854775807,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.7881039164748595e+18,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}