{ "best_metric": 1.0, "best_model_checkpoint": "videomae-base-finetuned-ucf101-subset/checkpoint-266", "epoch": 9.1, "eval_steps": 500, "global_step": 380, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02631578947368421, "grad_norm": 7.891677379608154, "learning_rate": 1.3157894736842106e-05, "loss": 1.3956, "step": 10 }, { "epoch": 0.05263157894736842, "grad_norm": 9.808924674987793, "learning_rate": 2.6315789473684212e-05, "loss": 1.3551, "step": 20 }, { "epoch": 0.07894736842105263, "grad_norm": 11.947072982788086, "learning_rate": 3.9473684210526316e-05, "loss": 1.3762, "step": 30 }, { "epoch": 0.1, "eval_accuracy": 0.2857142857142857, "eval_loss": 1.4489834308624268, "eval_runtime": 6.6934, "eval_samples_per_second": 2.092, "eval_steps_per_second": 0.299, "step": 38 }, { "epoch": 1.0052631578947369, "grad_norm": 9.198051452636719, "learning_rate": 4.970760233918128e-05, "loss": 1.3077, "step": 40 }, { "epoch": 1.0315789473684212, "grad_norm": 8.386839866638184, "learning_rate": 4.824561403508772e-05, "loss": 1.3181, "step": 50 }, { "epoch": 1.0578947368421052, "grad_norm": 7.219974040985107, "learning_rate": 4.678362573099415e-05, "loss": 1.3483, "step": 60 }, { "epoch": 1.0842105263157895, "grad_norm": 8.174283027648926, "learning_rate": 4.5321637426900585e-05, "loss": 1.2421, "step": 70 }, { "epoch": 1.1, "eval_accuracy": 0.42857142857142855, "eval_loss": 1.3189738988876343, "eval_runtime": 1.1969, "eval_samples_per_second": 11.696, "eval_steps_per_second": 1.671, "step": 76 }, { "epoch": 2.0105263157894737, "grad_norm": 8.107172012329102, "learning_rate": 4.3859649122807014e-05, "loss": 1.192, "step": 80 }, { "epoch": 2.036842105263158, "grad_norm": 4.386706352233887, "learning_rate": 4.239766081871345e-05, "loss": 1.1036, "step": 90 }, { "epoch": 2.0631578947368423, "grad_norm": 22.933664321899414, "learning_rate": 4.093567251461988e-05, "loss": 0.9115, "step": 100 }, { "epoch": 2.0894736842105264, "grad_norm": 8.911148071289062, "learning_rate": 3.9473684210526316e-05, "loss": 0.8753, "step": 110 }, { "epoch": 2.1, "eval_accuracy": 0.5714285714285714, "eval_loss": 0.9505947828292847, "eval_runtime": 1.3125, "eval_samples_per_second": 10.667, "eval_steps_per_second": 1.524, "step": 114 }, { "epoch": 3.0157894736842104, "grad_norm": 12.133541107177734, "learning_rate": 3.8011695906432746e-05, "loss": 0.6292, "step": 120 }, { "epoch": 3.042105263157895, "grad_norm": 3.894150495529175, "learning_rate": 3.654970760233918e-05, "loss": 0.4353, "step": 130 }, { "epoch": 3.068421052631579, "grad_norm": 16.60247039794922, "learning_rate": 3.508771929824561e-05, "loss": 0.6194, "step": 140 }, { "epoch": 3.094736842105263, "grad_norm": 9.527063369750977, "learning_rate": 3.362573099415205e-05, "loss": 0.4285, "step": 150 }, { "epoch": 3.1, "eval_accuracy": 0.7857142857142857, "eval_loss": 0.5579931139945984, "eval_runtime": 1.2045, "eval_samples_per_second": 11.623, "eval_steps_per_second": 1.66, "step": 152 }, { "epoch": 4.021052631578947, "grad_norm": 10.711992263793945, "learning_rate": 3.216374269005848e-05, "loss": 0.2534, "step": 160 }, { "epoch": 4.0473684210526315, "grad_norm": 0.5280627012252808, "learning_rate": 3.0701754385964913e-05, "loss": 0.1613, "step": 170 }, { "epoch": 4.073684210526316, "grad_norm": 6.357590675354004, "learning_rate": 2.9239766081871346e-05, "loss": 0.2539, "step": 180 }, { "epoch": 4.1, "grad_norm": 10.521815299987793, "learning_rate": 2.777777777777778e-05, "loss": 0.3808, "step": 190 }, { "epoch": 4.1, "eval_accuracy": 0.8571428571428571, "eval_loss": 0.49507784843444824, "eval_runtime": 1.2129, "eval_samples_per_second": 11.543, "eval_steps_per_second": 1.649, "step": 190 }, { "epoch": 5.026315789473684, "grad_norm": 0.37827879190444946, "learning_rate": 2.6315789473684212e-05, "loss": 0.0941, "step": 200 }, { "epoch": 5.052631578947368, "grad_norm": 15.41084098815918, "learning_rate": 2.485380116959064e-05, "loss": 0.1117, "step": 210 }, { "epoch": 5.078947368421052, "grad_norm": 16.81968879699707, "learning_rate": 2.3391812865497074e-05, "loss": 0.1368, "step": 220 }, { "epoch": 5.1, "eval_accuracy": 0.9285714285714286, "eval_loss": 0.1577732115983963, "eval_runtime": 1.1295, "eval_samples_per_second": 12.395, "eval_steps_per_second": 1.771, "step": 228 }, { "epoch": 6.005263157894737, "grad_norm": 1.9107282161712646, "learning_rate": 2.1929824561403507e-05, "loss": 0.1195, "step": 230 }, { "epoch": 6.031578947368421, "grad_norm": 0.2877643406391144, "learning_rate": 2.046783625730994e-05, "loss": 0.074, "step": 240 }, { "epoch": 6.057894736842106, "grad_norm": 15.423999786376953, "learning_rate": 1.9005847953216373e-05, "loss": 0.3348, "step": 250 }, { "epoch": 6.08421052631579, "grad_norm": 13.305224418640137, "learning_rate": 1.7543859649122806e-05, "loss": 0.043, "step": 260 }, { "epoch": 6.1, "eval_accuracy": 1.0, "eval_loss": 0.04753781110048294, "eval_runtime": 1.1871, "eval_samples_per_second": 11.794, "eval_steps_per_second": 1.685, "step": 266 }, { "epoch": 7.010526315789473, "grad_norm": 1.7253011465072632, "learning_rate": 1.608187134502924e-05, "loss": 0.2052, "step": 270 }, { "epoch": 7.036842105263158, "grad_norm": 0.0725923627614975, "learning_rate": 1.4619883040935673e-05, "loss": 0.0899, "step": 280 }, { "epoch": 7.063157894736842, "grad_norm": 0.664633572101593, "learning_rate": 1.3157894736842106e-05, "loss": 0.0753, "step": 290 }, { "epoch": 7.089473684210526, "grad_norm": 0.12937086820602417, "learning_rate": 1.1695906432748537e-05, "loss": 0.0842, "step": 300 }, { "epoch": 7.1, "eval_accuracy": 1.0, "eval_loss": 0.06243452429771423, "eval_runtime": 1.1818, "eval_samples_per_second": 11.847, "eval_steps_per_second": 1.692, "step": 304 }, { "epoch": 8.01578947368421, "grad_norm": 0.04959488287568092, "learning_rate": 1.023391812865497e-05, "loss": 0.0558, "step": 310 }, { "epoch": 8.042105263157895, "grad_norm": 0.07736323028802872, "learning_rate": 8.771929824561403e-06, "loss": 0.004, "step": 320 }, { "epoch": 8.06842105263158, "grad_norm": 0.037877339869737625, "learning_rate": 7.3099415204678366e-06, "loss": 0.0784, "step": 330 }, { "epoch": 8.094736842105263, "grad_norm": 0.04343694821000099, "learning_rate": 5.8479532163742686e-06, "loss": 0.003, "step": 340 }, { "epoch": 8.1, "eval_accuracy": 1.0, "eval_loss": 0.05573272332549095, "eval_runtime": 1.2047, "eval_samples_per_second": 11.621, "eval_steps_per_second": 1.66, "step": 342 }, { "epoch": 9.021052631578947, "grad_norm": 0.037801120430231094, "learning_rate": 4.3859649122807014e-06, "loss": 0.0041, "step": 350 }, { "epoch": 9.047368421052632, "grad_norm": 0.3717033863067627, "learning_rate": 2.9239766081871343e-06, "loss": 0.0051, "step": 360 }, { "epoch": 9.073684210526316, "grad_norm": 0.030566079542040825, "learning_rate": 1.4619883040935671e-06, "loss": 0.0034, "step": 370 }, { "epoch": 9.1, "grad_norm": 0.05392751097679138, "learning_rate": 0.0, "loss": 0.0828, "step": 380 }, { "epoch": 9.1, "eval_accuracy": 1.0, "eval_loss": 0.04455011337995529, "eval_runtime": 1.3077, "eval_samples_per_second": 10.706, "eval_steps_per_second": 1.529, "step": 380 }, { "epoch": 9.1, "step": 380, "total_flos": 3.7881039164748595e+18, "train_loss": 0.47875460998988467, "train_runtime": 960.3946, "train_samples_per_second": 3.165, "train_steps_per_second": 0.396 }, { "epoch": 9.1, "eval_accuracy": 0.7857142857142857, "eval_loss": 0.785201907157898, "eval_runtime": 15.1964, "eval_samples_per_second": 1.843, "eval_steps_per_second": 0.263, "step": 380 }, { "epoch": 9.1, "eval_accuracy": 0.7857142857142857, "eval_loss": 0.7852017283439636, "eval_runtime": 2.5877, "eval_samples_per_second": 10.821, "eval_steps_per_second": 1.546, "step": 380 } ], "logging_steps": 10, "max_steps": 380, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.7881039164748595e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }