|
{ |
|
"best_metric": 1.0, |
|
"best_model_checkpoint": "videomae-base-finetuned-ucf101-subset/checkpoint-266", |
|
"epoch": 9.1, |
|
"eval_steps": 500, |
|
"global_step": 380, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02631578947368421, |
|
"grad_norm": 7.891677379608154, |
|
"learning_rate": 1.3157894736842106e-05, |
|
"loss": 1.3956, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05263157894736842, |
|
"grad_norm": 9.808924674987793, |
|
"learning_rate": 2.6315789473684212e-05, |
|
"loss": 1.3551, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.07894736842105263, |
|
"grad_norm": 11.947072982788086, |
|
"learning_rate": 3.9473684210526316e-05, |
|
"loss": 1.3762, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.2857142857142857, |
|
"eval_loss": 1.4489834308624268, |
|
"eval_runtime": 6.6934, |
|
"eval_samples_per_second": 2.092, |
|
"eval_steps_per_second": 0.299, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 1.0052631578947369, |
|
"grad_norm": 9.198051452636719, |
|
"learning_rate": 4.970760233918128e-05, |
|
"loss": 1.3077, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.0315789473684212, |
|
"grad_norm": 8.386839866638184, |
|
"learning_rate": 4.824561403508772e-05, |
|
"loss": 1.3181, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.0578947368421052, |
|
"grad_norm": 7.219974040985107, |
|
"learning_rate": 4.678362573099415e-05, |
|
"loss": 1.3483, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.0842105263157895, |
|
"grad_norm": 8.174283027648926, |
|
"learning_rate": 4.5321637426900585e-05, |
|
"loss": 1.2421, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_accuracy": 0.42857142857142855, |
|
"eval_loss": 1.3189738988876343, |
|
"eval_runtime": 1.1969, |
|
"eval_samples_per_second": 11.696, |
|
"eval_steps_per_second": 1.671, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 2.0105263157894737, |
|
"grad_norm": 8.107172012329102, |
|
"learning_rate": 4.3859649122807014e-05, |
|
"loss": 1.192, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.036842105263158, |
|
"grad_norm": 4.386706352233887, |
|
"learning_rate": 4.239766081871345e-05, |
|
"loss": 1.1036, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.0631578947368423, |
|
"grad_norm": 22.933664321899414, |
|
"learning_rate": 4.093567251461988e-05, |
|
"loss": 0.9115, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.0894736842105264, |
|
"grad_norm": 8.911148071289062, |
|
"learning_rate": 3.9473684210526316e-05, |
|
"loss": 0.8753, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"eval_accuracy": 0.5714285714285714, |
|
"eval_loss": 0.9505947828292847, |
|
"eval_runtime": 1.3125, |
|
"eval_samples_per_second": 10.667, |
|
"eval_steps_per_second": 1.524, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 3.0157894736842104, |
|
"grad_norm": 12.133541107177734, |
|
"learning_rate": 3.8011695906432746e-05, |
|
"loss": 0.6292, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 3.042105263157895, |
|
"grad_norm": 3.894150495529175, |
|
"learning_rate": 3.654970760233918e-05, |
|
"loss": 0.4353, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 3.068421052631579, |
|
"grad_norm": 16.60247039794922, |
|
"learning_rate": 3.508771929824561e-05, |
|
"loss": 0.6194, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 3.094736842105263, |
|
"grad_norm": 9.527063369750977, |
|
"learning_rate": 3.362573099415205e-05, |
|
"loss": 0.4285, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"eval_accuracy": 0.7857142857142857, |
|
"eval_loss": 0.5579931139945984, |
|
"eval_runtime": 1.2045, |
|
"eval_samples_per_second": 11.623, |
|
"eval_steps_per_second": 1.66, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 4.021052631578947, |
|
"grad_norm": 10.711992263793945, |
|
"learning_rate": 3.216374269005848e-05, |
|
"loss": 0.2534, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 4.0473684210526315, |
|
"grad_norm": 0.5280627012252808, |
|
"learning_rate": 3.0701754385964913e-05, |
|
"loss": 0.1613, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 4.073684210526316, |
|
"grad_norm": 6.357590675354004, |
|
"learning_rate": 2.9239766081871346e-05, |
|
"loss": 0.2539, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"grad_norm": 10.521815299987793, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.3808, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"eval_accuracy": 0.8571428571428571, |
|
"eval_loss": 0.49507784843444824, |
|
"eval_runtime": 1.2129, |
|
"eval_samples_per_second": 11.543, |
|
"eval_steps_per_second": 1.649, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 5.026315789473684, |
|
"grad_norm": 0.37827879190444946, |
|
"learning_rate": 2.6315789473684212e-05, |
|
"loss": 0.0941, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 5.052631578947368, |
|
"grad_norm": 15.41084098815918, |
|
"learning_rate": 2.485380116959064e-05, |
|
"loss": 0.1117, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 5.078947368421052, |
|
"grad_norm": 16.81968879699707, |
|
"learning_rate": 2.3391812865497074e-05, |
|
"loss": 0.1368, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"eval_accuracy": 0.9285714285714286, |
|
"eval_loss": 0.1577732115983963, |
|
"eval_runtime": 1.1295, |
|
"eval_samples_per_second": 12.395, |
|
"eval_steps_per_second": 1.771, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 6.005263157894737, |
|
"grad_norm": 1.9107282161712646, |
|
"learning_rate": 2.1929824561403507e-05, |
|
"loss": 0.1195, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 6.031578947368421, |
|
"grad_norm": 0.2877643406391144, |
|
"learning_rate": 2.046783625730994e-05, |
|
"loss": 0.074, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 6.057894736842106, |
|
"grad_norm": 15.423999786376953, |
|
"learning_rate": 1.9005847953216373e-05, |
|
"loss": 0.3348, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 6.08421052631579, |
|
"grad_norm": 13.305224418640137, |
|
"learning_rate": 1.7543859649122806e-05, |
|
"loss": 0.043, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.04753781110048294, |
|
"eval_runtime": 1.1871, |
|
"eval_samples_per_second": 11.794, |
|
"eval_steps_per_second": 1.685, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 7.010526315789473, |
|
"grad_norm": 1.7253011465072632, |
|
"learning_rate": 1.608187134502924e-05, |
|
"loss": 0.2052, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 7.036842105263158, |
|
"grad_norm": 0.0725923627614975, |
|
"learning_rate": 1.4619883040935673e-05, |
|
"loss": 0.0899, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 7.063157894736842, |
|
"grad_norm": 0.664633572101593, |
|
"learning_rate": 1.3157894736842106e-05, |
|
"loss": 0.0753, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 7.089473684210526, |
|
"grad_norm": 0.12937086820602417, |
|
"learning_rate": 1.1695906432748537e-05, |
|
"loss": 0.0842, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.06243452429771423, |
|
"eval_runtime": 1.1818, |
|
"eval_samples_per_second": 11.847, |
|
"eval_steps_per_second": 1.692, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 8.01578947368421, |
|
"grad_norm": 0.04959488287568092, |
|
"learning_rate": 1.023391812865497e-05, |
|
"loss": 0.0558, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 8.042105263157895, |
|
"grad_norm": 0.07736323028802872, |
|
"learning_rate": 8.771929824561403e-06, |
|
"loss": 0.004, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 8.06842105263158, |
|
"grad_norm": 0.037877339869737625, |
|
"learning_rate": 7.3099415204678366e-06, |
|
"loss": 0.0784, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 8.094736842105263, |
|
"grad_norm": 0.04343694821000099, |
|
"learning_rate": 5.8479532163742686e-06, |
|
"loss": 0.003, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.05573272332549095, |
|
"eval_runtime": 1.2047, |
|
"eval_samples_per_second": 11.621, |
|
"eval_steps_per_second": 1.66, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 9.021052631578947, |
|
"grad_norm": 0.037801120430231094, |
|
"learning_rate": 4.3859649122807014e-06, |
|
"loss": 0.0041, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 9.047368421052632, |
|
"grad_norm": 0.3717033863067627, |
|
"learning_rate": 2.9239766081871343e-06, |
|
"loss": 0.0051, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 9.073684210526316, |
|
"grad_norm": 0.030566079542040825, |
|
"learning_rate": 1.4619883040935671e-06, |
|
"loss": 0.0034, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"grad_norm": 0.05392751097679138, |
|
"learning_rate": 0.0, |
|
"loss": 0.0828, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 0.04455011337995529, |
|
"eval_runtime": 1.3077, |
|
"eval_samples_per_second": 10.706, |
|
"eval_steps_per_second": 1.529, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"step": 380, |
|
"total_flos": 3.7881039164748595e+18, |
|
"train_loss": 0.47875460998988467, |
|
"train_runtime": 960.3946, |
|
"train_samples_per_second": 3.165, |
|
"train_steps_per_second": 0.396 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"eval_accuracy": 0.7857142857142857, |
|
"eval_loss": 0.785201907157898, |
|
"eval_runtime": 15.1964, |
|
"eval_samples_per_second": 1.843, |
|
"eval_steps_per_second": 0.263, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"eval_accuracy": 0.7857142857142857, |
|
"eval_loss": 0.7852017283439636, |
|
"eval_runtime": 2.5877, |
|
"eval_samples_per_second": 10.821, |
|
"eval_steps_per_second": 1.546, |
|
"step": 380 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 380, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.7881039164748595e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|